The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl -w

eval 'exec /usr/bin/perl -w -S $0 ${1+"$@"}'
    if 0; # to go to perl if running under some shell

=head1 NAME

dta2sql - command-line tool to convert Stata 8 and Stata 10 .dta files to a SQL dump

=head1 SYNOPSIS

dta2sql I<file.dta> [I<otherfile.dta> ...]

The result will be in I<file.sql> (or I<file-1.sql>, I<file-2.sql>, etc; no overwriting of existing files).

=head1 BUGS

Minimal documentation, error management and testing.
All types of missing values in the .dta file are turned into NULLs.
Probably some other problems.

=head1 NO WARRANTY

This code comes with ABSOLUTELY NO WARRANTY of any kind.

=head1 AUTHOR

Written by Franck Latremoliere.
Copyright (c) 2007, 2008 Reckon LLP.
L<http://www.reckon.co.uk/staff/franck/>

=head1 LICENCE

This program is free software; you can use, redistribute and/or modify it under the same terms as Perl itself
(Artistic Licence or GNU GPL).

=head1 SEE ALSO

L<Parse::Stata::DtaReader> does the parsing.

=cut

use warnings;
use strict;
use Parse::Stata::DtaReader;

unless (@ARGV) {
    print STDERR "dta2sql: no input files\n";
    print STDERR "Usage: dta2sql file.dta [otherfile.dta ...]\n";
    print STDERR "Copyright (c) 2007, 2008 Reckon LLP";
    print STDERR
      " (Parse::Stata::DtaReader version $Parse::Stata::DtaReader::VERSION)\n";
    exit 1;
}

sub dta2sql {
    my $status = 0;
    for my $file (@_) {
        unless ( -e $file ) {
            warn "$file: file not found";
            ++$status;
            next;
        }
        open DTA, '<', $file;
        my $dta = new Parse::Stata::DtaReader(*DTA);
        if ( $dta->{ds_format} == 114 || $dta->{ds_format} == 113 ) {
            print STDERR "$file: ";
            print STDERR $dta->{ds_format} == 114 ? 'Stata 10' : 'Stata 8';
            print STDERR $dta->{byteorder} == 1
              ? ' big endian'
              : ' little endian';
            print STDERR ", $dta->{nvar} variable";
            print STDERR 's' if $dta->{nvar} > 1;
            print STDERR ", $dta->{nobs} observation";
            print STDERR 's' if $dta->{nobs} > 1;
            print STDERR "\n";
            ( my $name = $file ) =~ s/(\.dta)?$/.sql/;
            my $no = '';
            $name =~ s/-?$no\.sql$/'-' . ++$no . '.sql'/e while -e $name;
            open SQL, '>', $name;
            $name =~ s/\..*//;
            $name =~ s/[^a-z0-9_]+/_/gi;
            $name = 'dta' unless $file;
            select SQL;
            print "CREATE TABLE `$name` (\n";

            for ( my $i = 0 ; $i < $dta->{nvar} ; ++$i ) {
                local $_ = $dta->{varlist}[$i];
                s/[^a-z0-9_]+/_/gi;
                $_ = "var$i" unless $_;
                print "    `$_` ", $dta->sqlType($i), ",\n";
            }
            print
"    `_n` int not null default 0,\n    PRIMARY KEY (`_n`)\n);\n";

            my $n = 0;
            while ( my @a = $dta->readRow ) {
                print "INSERT INTO `$name` VALUES (" . join(
                    ',',
                    map {
                        if ( defined $_ ) { s/"/\\"/g; qq%"$_"%; }
                        else { 'NULL'; }
                      } ( @a, ++$n )
                  )
                  . ");\n";
            }
            close SQL;
        }
        else {
            warn "$file: not a Stata 8 or 10 .dta file";
            ++$status;
        }
        close DTA;
    }
    return $status;
}

exit dta2sql(@ARGV);