The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package t::utilBabel;
use t::util;
use Carp;
use Test::More;
use Test::Deep;
use List::MoreUtils qw(uniq);
use Exporter();
our @ISA=qw(Exporter);

our @EXPORT=
  (@t::util::EXPORT,
   qw(check_object_basics sort_objects
      prep_tabledata load_maptable load_master load_ur select_ur cleanup_ur
      cmp_objects cmp_objects_quietly cmp_table cmp_table_quietly
      check_handcrafted_idtypes check_handcrafted_masters check_handcrafted_maptables
      check_handcrafted_name2idtype check_handcrafted_name2master check_handcrafted_name2maptable
      check_handcrafted_id2object check_handcrafted_id2name
      load_handcrafted_maptables load_handcrafted_masters
    ));

sub check_object_basics {
  my($object,$class,$name,$label)=@_;
  report_fail($object,"$label connected object defined") or return 0;
  $object->name;		# touch object in case still Oid
  report_fail(UNIVERSAL::isa($object,$class),"$label: class") or return 0;
  report_fail($object->name eq $name,"$label: name") or return 0;
  return 1;
}
sub check_objects_basics {
  my($objects,$class,$names,$label)=@_;
  my @objects=sort_objects($objects,$label);
  for my $i (0..$#$objects) {
    my $object=$objects->[$i];
    check_object_basics($objects->[$i],$class,$names->[$i],"$label object $i") or return 0;
  }
  return 1;
}
# sort by name.
sub sort_objects {
  my($objects,$label)=@_;
  # hmm.. this doesn't work for Oids. not important anyway, so just bag it
  # TODO: revisit when AutoDB provides public method for fetching Oids.
#   # make sure all objects have names
#   for my $i (0..$#$objects) {
#     my $object=$objects->[$i];
#     report_fail(UNIVERSAL::can($object,'name'),"$label object $i: has name method") 
#       or return ();
#   }
  my @sorted_objects=sort {$a->name cmp $b->name} @$objects;
  wantarray? @sorted_objects: \@sorted_objects;
}
# scrunch whitespace
sub scrunch {
  my($x)=@_;
  $x=~s/\s+/ /g;
  $x=~s/^\s+|\s+$//g;
  $x;
}
sub scrunched_eq {scrunch($_[0]) eq scrunch($_[1]);}

########################################
# these functions deal w/ relational tables

# prepare table data
# data can be 
#   string: one line per row; each row is whitespace-separated values
#   list or ARRAY of strings: each string is row
#   list or ARRAY of ARRAYs: each sub-ARRAY is row
sub prep_tabledata {
  my @rows=(@_==1 && !ref $_[0])? split(/\n+/,$_[0]): flatten(@_);
  # clean whitespace and split rows 
  @rows=map {ref($_)? $_: do {s/^\s+|\s+$//g; s/\s+/ /g; [split(' ',$_)]}} @rows;
  # convert NULLS into undefs
  for my $row (@rows) {
    map {$_=undef if 'NULL' eq uc($_)} @$row;
  }
  \@rows;
}
sub load_maptable {
  my($babel,$maptable)=splice(@_,0,2);
  my $data=prep_tabledata(@_);
  ref $maptable or $maptable=$babel->name2maptable($maptable);

  # code adapted from ConnectDots::LoadMapTable Step
  my $tablename=$maptable->tablename;
  my @idtypes=@{$maptable->idtypes};
  my @column_names=map {$_->name} @idtypes;
  my @column_sql_types=map {$_->sql_type} @idtypes;
  my @column_defs=map {$column_names[$_].' '.$column_sql_types[$_]} (0..$#idtypes);
  my @indexes=@column_names;

  # code adapted from MainData::LoadData Step
  my $dbh=$babel->autodb->dbh;
  $dbh->do(qq(DROP TABLE IF EXISTS $tablename));
  my $columns=join(', ',@column_defs);
  $dbh->do(qq(CREATE TABLE $tablename ($columns)));

  # new code: insert data into table
  my @values=map {'('.join(', ',map {$dbh->quote($_)} @$_).')'} @$data;
  my $values=join(",\n",@values);
  $dbh->do(qq(INSERT INTO $tablename VALUES\n$values));

  # code adapted from MainData::LoadData Step
  # put parens around single columns
  my @alters=map {"($_)"} @indexes; # put parens around single columns
  my $alters=join(', ',map {"ADD INDEX $_"} @alters);
  $dbh->do(qq(ALTER TABLE $tablename $alters));
}
sub load_master {
  my($babel,$master)=splice(@_,0,2);
  my $data=prep_tabledata(@_);
  ref $master or $master=$babel->name2master($master);

  # code adapted from ConnectDots::LoadMaster, ConnectDots::LoadImpMaster, MainData::LoadData
  my $tablename=$master->tablename;
  my $idtype=$master->idtype;
  my $column_name=$idtype->name;
  my $column_sql_type=$idtype->sql_type;
  my $column_def="$column_name $column_sql_type";
  my $query=$master->query;

  my $dbh=$babel->autodb->dbh;
  if ($master->view) {
    $dbh->do(qq(DROP VIEW IF EXISTS $tablename));
    $dbh->do(qq(CREATE VIEW $tablename AS\n$query));
    return;
  }
  $dbh->do(qq(DROP TABLE IF EXISTS $tablename));
  my $sql=qq(CREATE TABLE $tablename ($column_def));
  $sql.=" AS\n$query" if $master->implicit; # if implicit, load data via query
  $dbh->do($sql);
  if (!$master->implicit) {
    # new code: insert data into table
    my @values=map {'('.join(', ',map {$dbh->quote($_)} @$_).')'} @$data;
    my $values=join(",\n",@values);
    $dbh->do(qq(INSERT INTO $tablename VALUES\n$values));
  }
  # code adapted from MainData::LoadData Step
  $dbh->do(qq(ALTER TABLE $tablename ADD INDEX ($column_name)));
}
# create universal relation (UR)
# algorithm: natual full outer join of all maptables and explicit masters
#            any pre-order traversal of schema graph will work (I think!)
# >>> assume that lexical order of maptables gives a valid pre-order <<<
# sadly, since MyQSL still lacks full outer joins, have to emulate with left/right
# joins plus union. do it step-by-step: I couldn't figure out how to do it in
# one SQL statement...
sub load_ur {
  my($babel,$urname)=@_;
  $urname or $urname='ur';
  # ASSUME that lexical order of maptables gives a valid pre-order
  my @tables=sort {$a->tablename cmp $b->tablename} @{$babel->maptables};
  # add in explicit Masters. order doesn't matter so long as they're last
  push(@tables,grep {$_->explicit} @{$babel->masters});

  my $left=shift @tables;
  while (my $right=shift @tables) {
    my $result_name=@tables? undef: $urname; # final answer is 'ur'
    $left=full_join($babel,$left,$right,$result_name);
  }
  $left;
}
# NG 11-01-21: added 'translate all'
# select data from ur (will actually work for any table)
sub select_ur {
  my $args=new Hash::AutoHash::Args(@_);
  my($babel,$urname,$input_idtype,$input_ids,$input_ids_all,$output_idtypes)=
    @$args{qw(babel urname input_idtype input_ids input_ids_all output_idtypes)};
  confess "Only one of inputs_ids or input_ids_all may be set" if $input_ids && $input_ids_all;
  $urname or $urname=$args->tablename || 'ur'; 
  my($input_idtype,@output_idtypes)=map {ref $_? $_->name: $_} ($input_idtype,@$output_idtypes);

  my $dbh=$babel->autodb->dbh;
  # NG 10-08-25: removed 'uniq' since duplicate columns are supposed to be kept
  # my @columns=uniq grep {length($_)} ($input_idtype,@output_idtypes);
  my @columns=grep {length($_)} ($input_idtype,@output_idtypes);
  my $columns=join(', ',@columns);
  my $sql=qq(SELECT DISTINCT $columns FROM $urname);
  if ($input_ids && @$input_ids) {
    my $in='('.join(', ',map {$dbh->quote($_)} @$input_ids).')'; 
    $sql.=qq( WHERE $input_idtype IN $in);
  }
  my $result=$dbh->selectall_arrayref($sql);
  # NG 10-11-10: remove NULL rows, because translate now skips these
  if (@output_idtypes) {
    my @result;
    for my $row (@$result) {
      my @output_cols=$input_idtype? @$row[1..$#$row]: @$row;
      push(@result,$row) if grep {defined $_} @output_cols;
    }
    $result=\@result;
  }
  # NG 11-01-21: if input_ids_all set, exclude rows where input_idtype is NULL
  #   (the check for $input_idtype is for consistency with loop above. I don't know
  #    whether it's possible for input_ids_all to be set w/o input_idtype being set)
  if ($input_idtype && $input_ids_all) {
    my @result=grep {defined $_->[0]} @$result;
    $result=\@result;
  }
  $result;
}
# cmp ARRAYs of Babel component objects (anything with an 'id' method will work)
# like cmp_bag but 
# 1) reports errors the way we want them
# 2) sorts the args to avoid Test::Deep's 'bag' which is ridiculously slow...
sub cmp_objects {
  my($actual,$correct,$label,$file,$line,$limit)=@_;
  my $ok=cmp_objects_quietly($actual,$correct,$label,$file,$line,$limit);
  report_pass($ok,$label);
}
sub cmp_objects_quietly {
  my($actual,$correct,$label,$file,$line)=@_;
  my @actual_sorted=sort {$a->id cmp $b->id} @$actual;
  my @correct_sorted=sort  {$a->id cmp $b->id} @$correct;
  cmp_quietly(\@actual_sorted,\@correct_sorted,$label,$file,$line);
}
# like cmp_bag but 
# 1) reports errors the way we want them
# 2) sorts the args to avoid Test::Deep's 'bag' which is ridiculously slow...
# NG 10-11-08: extend to test limit. CAUTION: limit should be small or TOO SLOW!
sub cmp_table {
  my($actual,$correct,$label,$file,$line,$limit)=@_;
  my $ok=cmp_table_quietly($actual,$correct,$label,$file,$line,$limit);
  report_pass($ok,$label);
}
sub cmp_table_quietly {
  my($actual,$correct,$label,$file,$line,$limit)=@_;
  unless (defined $limit) {
    my @actual_sorted=sort cmp_rows @$actual;
    my @correct_sorted=sort cmp_rows @$correct;
    # my $ok=cmp_quietly($actual,bag(@$correct),$label,$file,$line);
    return cmp_quietly(\@actual_sorted,\@correct_sorted,$label,$file,$line);
  } else {
    report_fail(@$actual<=$limit,"$label: expected $limit row(s), got ".scalar @$actual,
		$file,$line)
      or return 0;
    return cmp_quietly($actual,subbagof(@$correct),$label,$file,$line);
  }
  $ok;
}

# sort subroutine: $a, $b are ARRAYs of strings. should be same lengths. cmp element by element
sub cmp_rows {
  my $ret;
  for (0..$#$a) {
    return $ret if $ret=$a->[$_] cmp $b->[$_];
  }
  # equal up to here. if $b has more, then $a is smaller
  $#$a <=> $#$b;
}
# emulate natural full outer join. return result table
# $result is optional name of result table. if not set, unique name generated
# TODO: add option to delete intermediate tables as we go.
sub full_join {
  my($babel,$left,$right,$result_name)=@_;
  my @idtypes=uniq(@{$left->idtypes},@{$right->idtypes});
  my $result=new t::FullOuterJoinTable(name=>$result_name,idtypes=>\@idtypes);
  my $leftname=$left->tablename;
  my $rightname=$right->tablename;
  my $resultname=$result->tablename;
  my @column_names=map {$_->name} @idtypes;
  my @column_sql_types=map {$_->sql_type} @idtypes;
  my @column_defs=map {$column_names[$_].' '.$column_sql_types[$_]} (0..$#idtypes);
  my $column_names=join(', ',@column_names);
  my $column_defs=join(', ',@column_defs);
  
  # code adapted from MainData::LoadData Step
  my $dbh=$babel->autodb->dbh;
  $dbh->do(qq(DROP TABLE IF EXISTS $resultname));
  my $columns=join(', ',@column_defs);
  my $query=qq
    (SELECT $column_names FROM $leftname NATURAL LEFT OUTER JOIN $rightname
     UNION
     SELECT $column_names FROM $leftname NATURAL RIGHT OUTER JOIN $rightname);
  $dbh->do(qq(CREATE TABLE $resultname ($columns) AS\n$query));
  $result;
}
# arg is babel. clean up intermediate tables created en route to ur
sub cleanup_ur {t::FullOuterJoinTable->cleanup(@_) }

########################################
# these functions test our hand-crafted Babel & components

sub check_handcrafted_idtypes {
  my($actual,$mature,$label)=@_;
  $label or $label='idtypes'.($mature? ' (mature)': '');
  my $num=4;
  my $class='Data::Babel::IdType';
  report_fail(@$actual==$num,"$label: number of elements") or return 0;
  my @actual=sort_objects($actual,$label) or return 0;
  for my $i (0..$#actual) {
    my $actual=$actual[$i];
    my $suffix='00'.($i+1);
    report_fail(UNIVERSAL::isa($actual,$class),"$label object $i: class") or return 0;
    report_fail($actual->name eq "type_$suffix","$label object $i: name") or return 0;
    report_fail($actual->id eq "idtype:type_$suffix","$label object $i: id") or return 0;
    report_fail($actual->display_name eq "display_name_$suffix","$label object $i: display_name") or return 0;
    report_fail($actual->referent eq "referent_$suffix","$label object $i: referent") or return 0;
    report_fail($actual->defdb eq "defdb_$suffix","$label object $i: defdb") or return 0;
    report_fail($actual->meta eq "meta_$suffix","$label object $i: meta") or return 0;
    report_fail($actual->format eq "format_$suffix","$label object $i: format") or return 0;
    report_fail($actual->sql_type eq "VARCHAR(255)","$label object $i: sql_type") or return 0;
    if ($mature) {
      check_object_basics($actual->babel,'Data::Babel','test',"$label object $i babel");
      check_object_basics($actual->master,'Data::Babel::Master',
			  "type_${suffix}_master","$label object $i master");
    }
  }
  pass($label);
}

# masters 2&3 are implicit, hence some of their content is special
# NG 10-11-10: implicit Masters now have clauses to exclude NULLs in their queries
sub check_handcrafted_masters {
  my($actual,$mature,$label)=@_;
  $label or $label='masters'.($mature? ' (mature)': '');
  my $num=$mature? 4: 2;
  my $class='Data::Babel::Master';
  report_fail(@$actual==$num,"$label: number of elements") or return 0;
  my @actual=sort_objects($actual,$label) or return 0;
  for my $i (0..$#actual) {
    my $actual=$actual[$i];
    my $suffix='00'.($i+1);
    my $name="type_${suffix}_master";
    my $id="master:$name";
    # masters 2&3 are implicit, hence some of their content is special
    my($inputs,$namespace,$query,$view,$implicit);
    if ($i<2) {
      $inputs="MainData/table_$suffix";
      $namespace="ConnectDots";
      $namespace="ConnectDots";
      $query="SELECT col_$suffix AS type_$suffix FROM table_$suffix";
      $view=0;
      $implicit=0;
    } else {
      $namespace='';		# namespace not in input config file, but hopefully set in output
      $implicit=1;
      if ($i==2) {
	$inputs="ConnectDots/maptable_003 ConnectDots/maptable_002";
	# NG 10-11-10: added clause to exclude NULLs
# 	$query=<<QUERY
# 	SELECT type_003 FROM maptable_003
# 	UNION
# 	SELECT type_003 FROM maptable_002
# QUERY
	$query=<<QUERY
	SELECT type_003 FROM maptable_003 WHERE type_003 IS NOT NULL
	UNION
	SELECT type_003 FROM maptable_002 WHERE type_003 IS NOT NULL
QUERY
  ;
	$view=0;
      } elsif ($i==3) {
	$inputs="ConnectDots/maptable_003";
	# NG 10-11-10: added clause to exclude NULLs
	# $query="SELECT DISTINCT type_004 FROM maptable_003";
	$query="SELECT DISTINCT type_004 FROM maptable_003 WHERE type_004 IS NOT NULL";
	$view=1;      
      }}

    report_fail(UNIVERSAL::isa($actual,$class),"$label object $i: class") or return 0;
    report_fail($actual->name eq $name,"$label object $i: name") or return 0;
    report_fail($actual->id eq $id,"$label object $i: id") or return 0;
    report_fail(scrunched_eq($actual->inputs,$inputs),"$label object $i: inputs") or return 0;
    report_fail(scrunched_eq($actual->namespace,$namespace),"$label object $i: namespace") or return 0;
    report_fail(scrunched_eq($actual->query,$query),"$label object $i: query") or return 0;
    report_fail(as_bool($actual->view)==$view,"$label object $i: view") or return 0;
    report_fail(as_bool($actual->implicit)==$implicit,"$label object $i: implicit") or return 0;
    if ($mature) {
      check_object_basics($actual->babel,'Data::Babel','test',"$label object $i babel");
      check_object_basics($actual->idtype,'Data::Babel::IdType',
			  "type_$suffix","$label object $i idtype");
    }
  }
  pass($label);
}

sub check_handcrafted_maptables {
  my($actual,$mature,$label)=@_;
  $label or $label='maptables'.($mature? ' (mature)': '');
  my $num=3;
  my $class='Data::Babel::MapTable';
  report_fail(@$actual==$num,"$label: number of elements") or return 0;
  my @actual=sort_objects($actual,$label) or return 0;
  for my $i (0..$#actual) {
    my $actual=$actual[$i];
    my $suffix='00'.($i+1);
    my $suffix1='00'.($i+2);
    my $name="maptable_$suffix";
    my $id="maptable:$name";
    my $inputs="MainData/table_$suffix";
    my $query=<<QUERY
SELECT col_$suffix AS type_$suffix, col_$suffix1 AS type_$suffix1
FROM   table_$suffix
QUERY
      ;
    report_fail(UNIVERSAL::isa($actual,$class),"$label object $i: class") or return 0;
    report_fail($actual->name eq $name,"$label object $i: name") or return 0;
    report_fail($actual->id eq $id,"$label object $i: id") or return 0;
    report_fail(scrunched_eq($actual->inputs,$inputs),"$label object $i: inputs") or return 0;
    report_fail(scrunched_eq($actual->namespace,"ConnectDots"),"$label object $i: namespace") or return 0;
    report_fail(scrunched_eq($actual->query,$query),"$label object $i: query") or return 0;
     if ($mature) {
      check_object_basics($actual->babel,'Data::Babel','test',"$label object $i babel");
      check_objects_basics($actual->idtypes,'Data::Babel::IdType',
			  ["type_$suffix","type_$suffix1"],"$label object $i idtypes");
    }
  }
  pass($label);
}

sub check_handcrafted_name2idtype {
  my($babel)=@_;
  my $label='name2idtype';
  my %name2idtype=map {$_->name=>$_} @{$babel->idtypes};
  for my $name (qw(type_001 type_002 type_003 type_004)) {
    my $actual=$babel->name2idtype($name);
    report_fail($actual==$name2idtype{$name},"$label: object $name") or return 0;
  }
  pass($label);
}
sub check_handcrafted_name2master {
  my($babel)=@_;
  my $label='name2master';
  my %name2master=map {$_->name=>$_} @{$babel->masters};
  for my $name (qw(type_001 type_002 type_003 type_004)) {
    my $actual=$babel->name2master($name);
    report_fail($actual==$name2master{$name},"$label: object $name") or return 0;
  }
  pass($label);
}
sub check_handcrafted_name2maptable {
  my($babel)=@_;
  my $label='name2maptable';
  my %name2maptable=map {$_->name=>$_} @{$babel->maptables};
  for my $name (qw(type_001 type_002 type_003 type_004)) {
    my $actual=$babel->name2maptable($name);
    report_fail($actual==$name2maptable{$name},"$label: object $name") or return 0;
  }
  pass($label);
}
sub check_handcrafted_id2object {
  my($babel)=@_;
  my $label='id2object';
  my @objects=(@{$babel->idtypes},@{$babel->masters},@{$babel->maptables});
  my %id2object=map {$_->id=>$_} @objects;
  my @ids=
    (qw(idtype:type_001 idtype:type_002 idtype:type_003 idtype:type_004),
     qw(master:type_001_master master:type_002_master master:type_003_master master:type_004_master),
     qw(maptable:maptable_001 maptable:maptable_002 maptable:maptable_003));
  for my $id (@ids) {
    my $actual=$babel->id2object($id);
    report_fail($actual==$id2object{$id},"$label: object $id") or return 0;
  }
  pass($label);
}
sub check_handcrafted_id2name {
  my($babel)=@_;
  my $label='id2name';
  my @ids=
    (qw(idtype:type_001 idtype:type_002 idtype:type_003 idtype:type_004),
     qw(master:type_001_master master:type_002_master master:type_003_master master:type_004_master),
     qw(maptable:maptable_001 maptable:maptable_002 maptable:maptable_003));
  my @names=
    (qw(type_001 type_002 type_003 type_004),
     qw(type_001_master type_002_master type_003_master type_004_master),
     qw(maptable_001 maptable_002 maptable_003));
  my %id2name=map {$ids[$_]=>$names[$_]} (0..$#ids);
  for my $id (@ids) {
    my $actual=$babel->id2name($id);
    report_fail($actual eq $id2name{$id},"$label: object $name") or return 0;
  }
  pass($label);
}

sub load_handcrafted_maptables {
  my($babel,$data)=@_;
  for my $name (qw(maptable_001 maptable_002 maptable_003)) {
    load_maptable($babel,$name,$data->$name->data);
  }
}
sub load_handcrafted_masters {
  my($babel,$data)=@_;
  # explicit masters
  for my $name (qw(type_001_master type_002_master)) {
    load_master($babel,$name,$data->$name->data);
  }
  # implicit masters have no data
  for my $name (qw(type_003_master type_004_master)) {
    load_master($babel,$name);
  }
}
1;

package t::FullOuterJoinTable;
# simple class to represent intermediate tables used to emulate full outer joins
use strict;
use Carp;
use Class::AutoClass;
use vars qw(@AUTO_ATTRIBUTES @OTHER_ATTRIBUTES @CLASS_ATTRIBUTES %SYNONYMS %DEFAULTS);
use base qw(Class::AutoClass);

@AUTO_ATTRIBUTES=qw(name idtypes);
@OTHER_ATTRIBUTES=qw(seqnum);
@CLASS_ATTRIBUTES=qw();
%SYNONYMS=(tablename=>'name',columns=>'idtypes');
%DEFAULTS=(idtypes=>[]);
Class::AutoClass::declare;

our $seqnum=0;
sub seqnum {shift; @_? $seqnum=$_[0]: $seqnum}

sub _init_self {
  my($self,$class,$args)=@_;
  return unless $class eq __PACKAGE__; # to prevent subclasses from re-running this
  my $name=$self->name || $self->name('fulljoin_'.sprintf('%03d',++$seqnum));
}
# drop all tables that look like our intermediates
sub cleanup {
  my($class,$babel)=@_;
  my $dbh=$babel->autodb->dbh;
  my @tables=@{$dbh->selectcol_arrayref(qq(SHOW TABLES LIKE 'fulljoin_%'))};
  # being a bit paranoid, make sure each table ends with 3 digits
  @tables=grep /\d\d\d$/,@tables;
  map {$dbh->do(qq(DROP TABLE IF EXISTS $_))} @tables;
}
1;