#!/usr/bin/perl -w
eval 'exec /usr/bin/perl -w -S $0 ${1+"$@"}'
if 0; # to go to perl if running under some shell
=head1 NAME
dta2sql - command-line tool to convert Stata 8 and Stata 10 .dta files to a SQL dump
=head1 SYNOPSIS
dta2sql I<file.dta> [I<otherfile.dta> ...]
The result will be in I<file.sql> (or I<file-1.sql>, I<file-2.sql>, etc; no overwriting of existing files).
=head1 BUGS
Minimal documentation, error management and testing.
All types of missing values in the .dta file are turned into NULLs.
Probably some other problems.
=head1 NO WARRANTY
This code comes with ABSOLUTELY NO WARRANTY of any kind.
=head1 AUTHOR
Written by Franck Latremoliere.
Copyright (c) 2007, 2008 Reckon LLP.
L<http://www.reckon.co.uk/staff/franck/>
=head1 LICENCE
This program is free software; you can use, redistribute and/or modify it under the same terms as Perl itself
(Artistic Licence or GNU GPL).
=head1 SEE ALSO
L<Parse::Stata::DtaReader> does the parsing.
=cut
use warnings;
use strict;
use Parse::Stata::DtaReader;
unless (@ARGV) {
print STDERR "dta2sql: no input files\n";
print STDERR "Usage: dta2sql file.dta [otherfile.dta ...]\n";
print STDERR "Copyright (c) 2007, 2008 Reckon LLP";
print STDERR
" (Parse::Stata::DtaReader version $Parse::Stata::DtaReader::VERSION)\n";
exit 1;
}
sub dta2sql {
my $status = 0;
for my $file (@_) {
unless ( -e $file ) {
warn "$file: file not found";
++$status;
next;
}
open DTA, '<', $file;
my $dta = new Parse::Stata::DtaReader(*DTA);
if ( $dta->{ds_format} == 114 || $dta->{ds_format} == 113 ) {
print STDERR "$file: ";
print STDERR $dta->{ds_format} == 114 ? 'Stata 10' : 'Stata 8';
print STDERR $dta->{byteorder} == 1
? ' big endian'
: ' little endian';
print STDERR ", $dta->{nvar} variable";
print STDERR 's' if $dta->{nvar} > 1;
print STDERR ", $dta->{nobs} observation";
print STDERR 's' if $dta->{nobs} > 1;
print STDERR "\n";
( my $name = $file ) =~ s/(\.dta)?$/.sql/;
my $no = '';
$name =~ s/-?$no\.sql$/'-' . ++$no . '.sql'/e while -e $name;
open SQL, '>', $name;
$name =~ s/\..*//;
$name =~ s/[^a-z0-9_]+/_/gi;
$name = 'dta' unless $file;
select SQL;
print "CREATE TABLE `$name` (\n";
for ( my $i = 0 ; $i < $dta->{nvar} ; ++$i ) {
local $_ = $dta->{varlist}[$i];
s/[^a-z0-9_]+/_/gi;
$_ = "var$i" unless $_;
print " `$_` ", $dta->sqlType($i), ",\n";
}
print
" `_n` int not null default 0,\n PRIMARY KEY (`_n`)\n);\n";
my $n = 0;
while ( my @a = $dta->readRow ) {
print "INSERT INTO `$name` VALUES (" . join(
',',
map {
if ( defined $_ ) { s/"/\\"/g; qq%"$_"%; }
else { 'NULL'; }
} ( @a, ++$n )
)
. ");\n";
}
close SQL;
}
else {
warn "$file: not a Stata 8 or 10 .dta file";
++$status;
}
close DTA;
}
return $status;
}
exit dta2sql(@ARGV);