#! /usr/bin/perl

=for comment

Copyright 2021 Paul C. Leyland

Redistribution and use in source and binary forms, with or without modification, are permitted provided that
the following conditions are met:

1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
following disclaimer.

2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
following disclaimer in the documentation and/or other materials provided with the distribution.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
DAMAGE.

=cut

use strict;
use warnings;
use utf8;
use LWP::Simple;
use HTML::TokeParser;
use Astro::Catalog::Query::Vizier;

# Force everything to be UTF8 encoded.
binmode STDIN,  ":utf8";
binmode STDOUT, ":utf8";
binmode STDERR, ":utf8";

my $Version = '1.00';

=for comment

This script builds comparisons files for a range of objects of interest to photometrists, including observers
of known variable stars, stars hosting exoplanets, transient objects such as newly discovered novae and
supernovae, and moving objects such as planetary satellites, asteroids, TNO and other small solar system
bodies. The files are human readable and designed to be easily parsed by programs. The format is as follows:

The first line contains the object name.
The second line contains AAVSO's sequence name.
The third line contains the filter name.

Each subsequent line contains the RA, Dec, AAVSO comparison name (with clashes distinguished by a subscript),
the magnitude in that filter, and its error, each value separated by a TAB character. If the error is not
given by AAVSO a value of 9.999 magnitudes is inserted --- big enough to render that comparison harmless if
ensemble photometry is to be used.

As of Version 1.00 comparison magnitudes are taken only from AAVSO's VSP facility and in B, V and R filters
only. Forthcoming versions may also interrogate the latest Gaia catalogue for G, BP and RB magnitudes.

Two types of objects are known to the script and explicit coordinates of them are not required to be provided
on the command line, though there is nothing to stop you from giving them if you wish. The first type is
variable stars known to AAVSO, an example is "AE And" --- note that spaces in the name must be quoted as here,
otherwise the argument will be interpreted as two or more. The second type is for Gaia alerts for objects such
as gravitational lensing candidates; these have names similar to Gaia21dnc. Finally, an object can be given as
an arbitrary name (such as "My new discovery") but in that case J2000 RA and Dec coordinates in hexadecimal
format must be specified.

The process below splits naturally into three phases:

1) Determine the RA and Dec coordinates of the object.

2) Go to aavso.org and grab the HTML for CCD B, V and R photometry for <variable> with field of 0
   arcmin. That gives the positions of the VS and its sequence, together with the magnitudes of the latter.

3) Build classic sequence-files <variable>_<filter>.cmp files.

=cut

my %variable;			# Information about the variables in the field.
my (%sequence, $seq_name);	# Variable and comparison sequence data from AAVSO.
my %stars;			# All stars returned from Gaia DR2 search.

# A couple of utility routines.

sub dec2deg($)			# Convert Declination in ':'-separated format into decimal degrees.
{
    my ($sign, $d, $m, $s) = $_[0] =~ /([-+]?)(\d\d):(\d\d):(\d\d(\.\d*)?)/;
    defined $d && defined $m && defined $s or return undef;
    my $degrees = $d + $m/60.0 + $s/3600.0;
    defined $sign and $sign eq '-' and return -$degrees;
    return $degrees;
}
sub ra2deg($)			# Convert RA in ':'-separated format into decimal degrees.
{
    my $retval = dec2deg($_[0]);
    $retval *= 15.0 if defined $retval;
    return $retval
}

# Process command line arguments.

my ($var_name, $RA, $Dec, $RA_deg, $Dec_deg);
my $usage = "Usage: $0 variable_name [hh:mm:ss[.s] [+/-]ddd:mm:ss[.ss] ]\n";

if ($#ARGV == 0 && $ARGV[0] eq '-h') {
    printf $usage;
    exit
}

if ($#ARGV == 0 or $#ARGV == 2) { ($var_name, $RA, $Dec) = @ARGV } else { die $usage }

#############
### Phase 1 #
#############

our $Doris = 'oats';		# You are not expected to understand this.

=for comment

The variable name can be a valid entry in AAVSO's VSP database, a Gaia alert of the form
Gaia\d\d[a-z][a-z][a-z] or an arbitrary string. If the sky coordinate is given it overrides any catalogued
position; it must be present if the variable can not be found in either catalogue.

The easiest form to process is when the optional sky position has been given because we can treat $var_name as
an arbitrary label and just dig out the APASS catalogue details. Otherwise we need to discover the position
and bail out if it can not be found.

First validate the position, if given. Easiest way is to see whether the conversion routines return a value in
the range 0 through 360. If not found, try the Gaia alerts and AAVSO VSP in turn.

=cut

my ($doc, $parser, $para);

if (defined $RA) {
    defined $Dec or die $usage;
    $RA_deg = ra2deg ($RA);
    defined $RA_deg && $RA_deg >= 0.0 && $RA_deg < 360.0 or
	die "Bad value of RA (should be hh:mm:ss[.ss]) '$RA'\n";
    $Dec_deg = dec2deg ($Dec);
    defined $Dec_deg && $Dec_deg >= -90.0 && $Dec_deg <= 90.0 or
	die "Bad value of Dec (should be [+/-]ddd:mm:ss[.s]) '$Dec'\n";
} elsif ($var_name =~ /^Gaia\d\d[a-z][a-z][a-z]$/) {
    $doc = LWP::Simple::get ("http://gsaweb.ast.cam.ac.uk/alerts/alert/$var_name/");

# This next is gruesome but the returned page is a mess of scripts, etc, and hard to parse.

    ($RA_deg, $Dec_deg) = $doc =~ /A\.aladin.*?target:.*?(\d+\.\d+).*?([-+]?\d+\.\d+)/;
    defined $RA_deg && defined $Dec_deg
	or die "Can't find coordinates of $var_name in Gaia alerts page. Try giving coordinates explicitly.\n";
} else {					# Ask AAVSO VSP for the position of the variable.
    $var_name =~ s/ /+/g;			# For URL protection
    $doc = LWP::Simple::get ("https://www.aavso.org/apps/vsp/photometry/?" .
				"fov=60.0&star=$var_name&maglimit=21.5&type=photometry");
    $doc =~ /could not be found in our database/
	and die "Can't find coordinates of $ARGV[0] in AAVSO. Try giving coordinates explicitly.\n";
    $parser = HTML::TokeParser->new(\$doc);

# There are only three paragraphs in the returned page. The first reports the name of the VS which is already
# known. The second holds the field size and position, the latter of which is of immediate interest.

    $parser->get_tag('p');			# Skip to first <p>
    $parser->get_tag('p');			# Skip to second <p>
    $para = $parser->get_text('/p');		# Get the text within the paragraph.
    ($RA_deg, $Dec_deg) = $para =~ /.*?\[([-.\d]*).*?\n.*?\[([-.\d]*)/;
}

# Now have position in all three cases so ask VSP for the photometry data.

my $harmless = 9.999;				# Harmless value to be used if the error is not given.
my $id_subscript = 1;				# Sometimes an id has multiple entries.

$doc = LWP::Simple::get ("https://www.aavso.org/apps/vsp/photometry/?" .
			 "fov=60.0&ra=$RA_deg&dec=$Dec_deg&maglimit=21.5&B=on&Rc=on&type=photometry");

$doc =~ /No comparison stars were found in this field/ and die "No comparisons!\n";

$parser = HTML::TokeParser->new(\$doc);
						# The fourth paragraph holds the sequence name.
$parser->get_tag('p');				# Skip to first <p>
$parser->get_tag('p');				# Skip to second <p>
$parser->get_tag('p');				# Skip to third <p>
$parser->get_tag('p');				# Skip to fourth <p>
$para = $parser->get_text('/p');		# Get the text within the paragraph.
($seq_name) = $para =~ /sequence as (\S*)/;

$parser->get_tag('tbody');			# Skip to the photometry table.
my $tag = $parser->get_tag('tr');		# Go to first row of table
$parser->get_tag('td');				# Go to first datum in row.

while ($tag->[0] ne '/tbody') {			# Parse complete from the HTML
    my (@row, $mag, $err);			# @row is (AUID, RA, Dec Label B V B-V Rc comments)
    
    while ($tag->[0] ne '/tr') {		# Parse the current row from the HTML
	push @row, $parser->get_trimmed_text('/td');
	$tag = $parser->get_tag('td', '/tr');
    }
    my $id = $row[3];
    if (exists $sequence{$id}) {		# Already seen?
	$id = "${id}_$id_subscript";		# Make unique.
	$id_subscript++
    } else {
	$id_subscript = 1
    }
    ($sequence{$id}{RA})  = $row[1] =~ /^([012]\d:\d\d:\d\d\.\d+)/;
    ($sequence{$id}{Dec}) = $row[2] =~ /^([-+]?\d\d:\d\d:\d\d\.\d+)/;

    ($mag) = $row[4] =~ /^(-?\d+\.\d+)/;
    if (defined $mag) {
	($sequence{$id}{Bmag}) = $mag;
	($err) = $row[4] =~ /\((\d+\.\d+)\)/;
	$err or $err = $harmless;
	$sequence{$id}{B_err} = $err;
    }

    ($sequence{$id}{Vmag}) = $row[5] =~ /^(-?\d+\.\d+)/;	# Guaranteed by AAVSO to be present.
    ($sequence{$id}{V_err}) = $row[5] =~ /\((\d+\.\d+)\)/;
    defined $sequence{$id}{V_err} or $sequence{$id}{V_err} = $harmless;

    ($mag) = $row[7] =~ /^(-?\d+\.\d+)/;
    if (defined $mag) {
	$sequence{$id}{Rmag} = $mag;
	($err) = $row[7] =~ /\((\d+\.\d+)\)/;
	$err or $err = $harmless;
	$sequence{$id}{R_err} = $err;
    }
    $tag = $parser->get_tag('td', '/tbody');
}

#############
### Phase 3 #
#############

my %filters_present;
foreach my $filter ('V', 'B', 'R') {
    while (my ($name, $data) = each %sequence) {
	$data->{"${filter}mag"} or next;		# No magnitude data in this filter
	$filters_present{$filter} = 1;			# But there is in this one.
    }
}

(my $file_name = $ARGV[0]) =~ s/ /_/g;			# Escape white_space. # 
foreach my $filter (keys %filters_present) {
    open CMP, "> ${file_name}_$filter.cmp" or die "Can't create cmp file for $filter\n";
    print CMP "$ARGV[0]\n$seq_name\n$filter\n";
    my ($mag, $err) = ("${filter}mag", "${filter}_err");
    foreach my $c (sort keys %sequence) {
	exists $sequence{$c}{$mag} and
	    print CMP "$sequence{$c}{RA}\t$sequence{$c}{Dec}\t$c\t$sequence{$c}{$mag}\t($sequence{$c}{$err})\n";
    }
    close CMP;
}

