#!/usr/bin/perl -s

# The following documentation is formatted to be read by running
# unix> perldoc wdsToSql.pl

=pod

=head1 Description

This is the script wdsToSql.pl that translates ASCII files representations of
the Washington Double Star Catalog (WDS) into a SQL command sequence which
can be fed into a SQL
server (MySQL, sqlite, ...) to simplify selections.

=head1 Syntax

 wdsToSql.pl [-o]

 perldoc wdsToSql.pl

=head1 Options

The only available option is B<-o> which must be used to tell the program
that the standard input is the B<o>rbital catalog, not the double star catalog.


=head1 Usage

=over 4

=item 1.

Create the SQL command file (here: C<wds.sql> or C<orb6.sql> respectively)
by piping all available WDS text
files with the "new" 130 or 264 bytes/row format through this shell script here. This
creates a much bigger file than the original files combined. The switch -o must be used
in case this is the orbital catalog:
 
 unix>	cat wdsnew*.txt | wdsToSql.pl > wds.sql
 unix>	wdsToSql.pl -o < orb6orbits.txt > orb6.sql

=item 2.

Start the database server (look for mysqld for example on Linux systems),
if needed:

  unix>  sqlite3

=item 3.

execute the previously generated file(s) as commands

 sqlite> .read wds.sql
 sqlite> .read orb6.sql

=item 4.

use SQL commands to select data from the database. Examples are

=over 2

=item *

select all WDS entries where the denomination starts with two zeros followed
by 10 and any other trailing letters:

  sqlite> SELECT den FROM wds WHERE den LIKE '0010%';

=item *

select all WDS entries which have more than two component:

  sqlite> SELECT den FROM wds WHERE comp NOT LIKE '     ';
  sqlite> SELECT * FROM wds WHERE comp NOT LIKE '     ';

=item *

show all WDS denominations with proper motions of the primay less than 2
(in the original units)

  sqlite> SELECT den FROM wds WHERE pm1ra < 2 AND pm1dec < 2;
  sqlite> SELECT * FROM wds WHERE pm1ra < 2 AND pm1dec < 2;

=item *

show all WDS denominations with absolute values of the proper motions of the
primay less than 2 (in the original units), probably closer to
what one wants to get instead of example c:

  sqlite> SELECT * FROM wds WHERE ABS(pm1ra) < 2 AND ABS(pm1dec) < 2;

=item *

show the entire WDS DB

  sqlite> SELECT * FROM wds;

=item *

show all WDS stars observed after the year 2000

  sqlite> SELECT * FROM wds WHERE datelst > 2000;

=item *

show all WDS stars observed after the year 2000 with spectral type A:

  sqlite> SELECT * FROM wds WHERE datelst > 2000 AND styp LIKE 'A%';

=item *

show all WDS stars observed after the year 2000 with spectral type A or B:

  sqlite> SELECT * FROM wds WHERE datelst > 2000 AND styp LIKE 'A%' OR styp LIKE 'B%';

=item *

show all orbits with best grade and periods less than 200 days

  sqlite> SELECT * FROM orb6 WHERE grade = 1 AND P < 200 AND Pu = 'd';

=item *

show all WDS entries with a magnitude of the first component (numerically)
smaller than 10

  sqlite> SELECT * FROM wds WHERE magfst < 10;

=item *

list star separations of the WDS entries with a magnitude of the first component (numerically)
smaller than 10

  sqlite> SELECT sepfst FROM wds WHERE magfst < 10;

list star separations of the WDS entries with an infrared magnitude of the first component (numerically)
smaller than 11

  sqlite> SELECT sepfst FROM wds WHERE note LIKE '%K%' AND magfst < 11;

count entries within the WDS with an infrared magnitude of the first component (numerically)
smaller than 11

  sqlite> SELECT COUNT(sepfst) FROM wds WHERE note LIKE '%K%' AND magfst < 11;

=item *
show duration and unit for entries in the orbital catalog with magnitude
brighter than 10 and periods between half a year and 4 years

  sqlite> SELECT P,Pu FROM orb6 WHERE magfst < '10' AND ( Pu='y' AND P<'4' AND P>'0.5' OR Pu='d' AND P>'182' AND P<'1460');

=item *

dump star separations of the WDS entries with a magnitude of the first component (numerically)
smaller than 10 into the file wdstmp

  sqlite> .output wdstmp
  sqlite> SELECT sepfst FROM wds WHERE magfst < 10;
  sqlite> .output stdout

=back

=item 5.

Quit the server

  sqlite>  .quit

=back

=head1 See also

 mysql(1) sqlite3(1)

 http://ad.usno.navy.mil/wds/orb6/newformat.html
 http://ad.usno.navy.mil/wds/wdsweb_format.txt

 http://aam.ugpl.de/node/236
 http://dev.mysql.com/doc/
 http://sourceforge.net/search/?type=soft&exact=0&q=mysql&offset=25
 http://www.geocities.com/SiliconValley/Vista/2207/sql1.html
 http://www.1keydata.com/sql/sql.html
 http://www.dcs.napier.ac.uk/~andrew/sql/
 http://www.gnu.org/directory/postgresql.html
 http://www.gnu.org/directory/mysql.html
 http://www.sql-tutorial.net/

=head1 To do

The current conversion of the RA and DEC coordinates is done as if these
were text strings, which makes selection of sky sections awkward. These might
better be represented as (signed) floating point numbers in a +-DDMMSS.sss
and HHMMSS.sss format to allow use of the SQL greater-equal-less operators
instead of the "C<LIKE>" operators.


=head1 Author

Richard J. Mathar, http://www.strw.leidenuniv.nl/~mathar

=head1 History

 # who      when        what
 # mathar    2005-09-27    created
 # mathar    2005-09-28    corrected bug of dot delimiters (instead of comma) in DEC(..,..)
 # mathar    2005-11-07    corrected typo in the usage documentation

=cut


$col = 0 ;

if ( $o eq '1')
{
	# the case of handling the orbital catalog of visual binaries
	# see http://ad.usno.navy.mil/wds/orb6/newformat.html
	$db = 'orb6' ;
	# @n[$col] = 'ra' ;	@c[$col] = '1-9' ;	@f[$col] = 'char(9)' ;	$col++ ;
	# @n[$col] = 'dec' ;	@c[$col] = '10-18' ;	@f[$col] = 'char(9)' ;	$col++ ;
	@n[$col] = 'coo' ;	@c[$col] = '1-18' ;	@f[$col] = 'char(18)' ;	$col++ ;
	@n[$col] = 'den' ;	@c[$col] = '20-29' ;	@f[$col] = 'char(10)' ;	$pkey=$col ; $col++ ;
	@n[$col] = 'disc' ;	@c[$col] = '31-44' ;	@f[$col] = 'char(14)' ;	$col++ ;
	@n[$col] = 'ads' ;	@c[$col] = '46-50' ;	@f[$col] = 'INTEGER' ;	$col++ ;
	@n[$col] = 'hip' ;	@c[$col] = '52-57' ;	@f[$col] = 'INTEGER' ;	$col++ ;
	@n[$col] = 'magfstC' ;	@c[$col] = '66-66' ;	@f[$col] = 'char(1)' ;	$col++ ;
	@n[$col] = 'magfst' ;	@c[$col] = '67-71' ;	@f[$col] = 'DEC(5,2)' ;	$col++ ;
	@n[$col] = 'magfstF' ;	@c[$col] = '72-72' ;	@f[$col] = 'char(1)' ;	$col++ ;
	@n[$col] = 'magsndC' ;	@c[$col] = '73-73' ;	@f[$col] = 'char(1)' ;	$col++ ;
	@n[$col] = 'magsnd' ;	@c[$col] = '74-78' ;	@f[$col] = 'DEC(5,2)' ;	$col++ ;
	@n[$col] = 'magsndF' ;	@c[$col] = '79-79' ;	@f[$col] = 'char(1)' ;	$col++ ;
	@n[$col] = 'P' ;	@c[$col] = '82-92' ;	@f[$col] = 'DEC(11,6)' ;	$skey=$col;	$col++ ;
	@n[$col] = 'Pu' ;	@c[$col] = '93-93' ;	@f[$col] = 'char(1)' ;	$col++ ;
	@n[$col] = 'a' ;	@c[$col] = '106-114' ;	@f[$col] = 'DEC(9,5)' ;	$col++ ;
	@n[$col] = 'au' ;	@c[$col] = '115-115' ;	@f[$col] = 'char(1)' ;	$col++ ;
	@n[$col] = 'i' ;	@c[$col] = '126-133' ;	@f[$col] = 'DEC(8,4)' ;	$col++ ;
	@n[$col] = 'Omegg' ;	@c[$col] = '144-151' ;	@f[$col] = 'DEC(8,4)' ;	$col++ ;
	@n[$col] = 'T0' ;	@c[$col] = '163-174' ;	@f[$col] = 'DEC(12,6)' ;	$col++ ;
	@n[$col] = 'T0u' ;	@c[$col] = '175-175' ;	@f[$col] = 'char(1)' ;	$col++ ;
	@n[$col] = 'e' ;	@c[$col] = '188-195' ;	@f[$col] = 'DEC(8,6)' ;	$col++ ;
	@n[$col] = 'omeg' ;	@c[$col] = '206-213' ;	@f[$col] = 'DEC(8,4)' ;	$col++ ;
	@n[$col] = 'equin' ;	@c[$col] = '224-227' ;	@f[$col] = 'INTEGER' ;	$col++ ;
	@n[$col] = 'datelst' ;	@c[$col] = '229-232' ;	@f[$col] = 'INTEGER' ;	$col++ ;
	@n[$col] = 'grade' ;	@c[$col] = '234-234' ;	@f[$col] = 'INTEGER' ;	$tkey=$col ; $col++ ;
}
else
{
	# the case of handling the double star catalog
	# see http://ad.usno.navy.mil/wds/wdsweb_format.txt
	$db = 'wds' ;
	@n[$col] = 'den' ;	@c[$col] = '1-10' ;	@f[$col] = 'char(10)' ; $pkey=$col ;	$col++ ;
	@n[$col] = 'disc' ;	@c[$col] = '11-17' ;	@f[$col] = 'char(7)' ; $skey=$col ;	$col++ ;
	@n[$col] = 'comp' ;	@c[$col] = '18-22' ;	@f[$col] = 'char(5)' ; $tkey=$col ;	$col++ ;
	@n[$col] = 'datefst' ;	@c[$col] = '24-27' ;	@f[$col] = 'INTEGER' ; $col++ ;
	@n[$col] = 'datelst' ;	@c[$col] = '29-32' ;	@f[$col] = 'INTEGER' ; $col++ ;
	@n[$col] = 'nobs' ;	@c[$col] = '34-37' ;	@f[$col] = 'INTEGER' ; $col++ ;
	@n[$col] = 'posfst' ;	@c[$col] = '39-41' ;	@f[$col] = 'INTEGER' ; $col++ ;
	@n[$col] = 'poslst' ;	@c[$col] = '43-45' ;	@f[$col] = 'INTEGER' ; $col++ ;
	@n[$col] = 'sepfst' ;	@c[$col] = '47-51' ;	@f[$col] = 'DEC(5,1)' ; $col++ ;
	@n[$col] = 'seplst' ;	@c[$col] = '53-57' ;	@f[$col] = 'DEC(5,1)' ; $col++ ;
	@n[$col] = 'magfst' ;	@c[$col] = '59-63' ;	@f[$col] = 'DEC(5,2)' ; $col++ ;
	@n[$col] = 'magsnd' ;	@c[$col] = '65-69' ;	@f[$col] = 'DEC(5,2)' ; $col++ ;
	@n[$col] = 'styp' ;	@c[$col] = '71-79' ;	@f[$col] = 'char(9)' ; $col++ ;
	@n[$col] = 'pm1ra' ;	@c[$col] = '81-84' ;	@f[$col] = 'INTEGER' ; $col++ ;
	@n[$col] = 'pm1dec' ;	@c[$col] = '85-88' ;	@f[$col] = 'INTEGER' ; $col++ ;
	@n[$col] = 'pm2ra' ;	@c[$col] = '90-93' ;	@f[$col] = 'INTEGER' ; $col++ ;
	@n[$col] = 'pm2dec' ;	@c[$col] = '94-97' ;	@f[$col] = 'INTEGER' ; $col++ ;
	@n[$col] = 'dnum' ;	@c[$col] = '99-106' ;	@f[$col] = 'char(8)' ; $col++ ;
	@n[$col] = 'note' ;	@c[$col] = '108-111' ;	@f[$col] = 'char(4)' ; $col++ ;
	@n[$col] = 'coo' ;	@c[$col] = '113-130' ;	@f[$col] = 'char(18)' ; $col++ ;
}

print 'CREATE TABLE ',$db,' (' ;

# this is the constant (standard) part of the SQL insert statement
$stdinsrt = 'INSERT INTO ' . $db . ' (' ;

for ( $col = 0 ; $col < @n ; $col++)
{
	if ( $col != 0 )
	{
		print ", " ;
		$stdinsrt = $stdinsrt . ',' ;
	}
	print $n[$col],' ',$f[$col] ; 	# define the SQL column structure
	$stdinsrt = $stdinsrt . '"' . $n[$col] . '"' ;
	(@c1[$col],@c2[$col]) = split("-",@c[$col]) ;	# isolate start and stop byte number
	@c1[$col]-- ;			# index starts one lower at 0 in perl
	@cd[$col] = $c2[$col] - $c1[$col] ;	# width of the byte columns
}
$stdinsrt .= ') VALUES(' ;

print ", PRIMARY KEY (",@n[$pkey],",",@n[$skey],",",@n[$tkey],") ) ;\n" ;

while ( $line = <>)
{
	# debugging ... print $line ;
	print $stdinsrt ;
	for( $col = 0 ; $col < @n ; $col++)
	{
		$val = substr($line,@c1[$col],@cd[$col]) ;	# extract the value from this particular line
		if ( $col != 0 )
		{
			print "," ;
		}
		if ( $f[$col] =~ m/char/ )
		{
			print '\'' . $val . '\'' ;
		}
		else
		{
			printf "%g",$val ;
		}
	}
	print ");\n" ;
}

