#!/usr/local/ActivePerl-5.8/bin/perl -w ################################################################################ # package Data::Generate # Description: returns an SQL-Data generator object # Design: during parsing we create following data structure internally: # 'value_term': ascii string # 'value_column': array of possible alternative choices for the value term # 'value_chain': a chain of value columns # 'chain_list': the generator itself # output data : output data is retrieved by subsequent concatenation # of value terms in a value chain. If more than one value chains are defined, # then, based on weigthing, each chain at turn will be "asked" to return an # output value, until an array of the requested cardinality is filled. # ################################################################################ package Data::Generate; use 5.006; use strict; use warnings; use Carp; use Parse::RecDescent; use Date::Parse; use Date::DayOfWeek; use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); require Exporter; our @ISA = qw(Exporter); # Items to export into callers namespace by default. Note: do not export # names by default without a very good reason. Use EXPORT_OK instead. # Do not simply export all your public functions/methods/constants. # This allows declaration use Data::Generate ':all'; our %EXPORT_TAGS = ( 'all' => [ qw( parse ) ] ); our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); our @EXPORT = qw(); our $VERSION = '0.01'; $Data::Generate::Parser=undef; $Data::Generate::current=undef; $Data::Generate::ACTUAL_VALUE_COLUMN=undef; $Data::Generate::VC_RANGE_REVERSE_FLAG=undef; #------------------------------------------------------------------------------- # Various constant definitions #------------------------------------------------------------------------------- $Data::Generate::vcol_type ={}; $Data::Generate::vcol_type->{year}->{lowlimit}=1970; # Unix 32 bit date $Data::Generate::vcol_type->{year}->{highlimit}=2037; # Unix 32 bit date $Data::Generate::vcol_type->{year}->{type}='year'; $Data::Generate::vcol_type->{month}->{lowlimit}=1; $Data::Generate::vcol_type->{month}->{highlimit}=12; $Data::Generate::vcol_type->{month}->{type}='month'; $Data::Generate::vcol_type->{day}->{lowlimit}=1; $Data::Generate::vcol_type->{day}->{highlimit}=31; $Data::Generate::vcol_type->{day}->{type}='day'; $Data::Generate::vcol_type->{hour}->{lowlimit}=0; $Data::Generate::vcol_type->{hour}->{highlimit}=24; $Data::Generate::vcol_type->{hour}->{type}='hour'; $Data::Generate::vcol_type->{minute}->{lowlimit}=0; $Data::Generate::vcol_type->{minute}->{highlimit}=59; $Data::Generate::vcol_type->{minute}->{type}='minute'; $Data::Generate::vcol_type->{second}->{lowlimit}=0; $Data::Generate::vcol_type->{second}->{highlimit}=59; $Data::Generate::vcol_type->{second}->{type}='second'; $Data::Generate::vcol_type->{fraction}->{type}='fraction'; $Data::Generate::vchain_type ={}; $Data::Generate::vchain_type->{DATE}->{type}='DATE'; $Data::Generate::vchain_type->{DATE}->{vcol_output_format}= ['%s',' %02d:','%02d:','%02d','.%s']; # ['%04d','%02d','%02d',' %02d:','%02d:','%02d','.%s']; $Data::Generate::vchain_type->{DATE}->{check_type}=sub { no warnings "all"; my $input=shift; (my $ss,my $mm, my $hh,my $day,my $month,my $year)= strptime($input); return undef unless defined $year; $year+=1900; $month++; my $precision=0; $precision = $Data::Generate::current->{ct_precision} if defined $Data::Generate::current->{ct_precision}; my $result=sprintf('%04d%02d%02d %02d:%02d:%02.'.$precision.'f', $year, $month, $day,$hh,$mm,$ss); return undef unless defined str2time($result); return $result; }; $Data::Generate::vchain_type->{DATE}->{output_format_fct}=sub { my $input=shift; return $input unless defined $Data::Generate::current->{ct_precision}; my $precision=$Data::Generate::current->{ct_precision}; my ( $date_string, $date_fraction) = ($input =~ /^(.+?)(\d{2}\.\d*)$/); $date_fraction=sprintf('%02.'.$precision.'f',$date_fraction); return $date_string.$date_fraction; }; $Data::Generate::vchain_type->{DATE}->{subtype}->{'DATEWITHFRACTION'} ->{fraction_start_ix}=4; $Data::Generate::vchain_type->{INTEGER}->{type}='INTEGER'; $Data::Generate::vchain_type->{INTEGER}->{check_type}=sub { no warnings "all"; my $input=shift; my $result=int($input); return undef unless $result == $input; return $result; }; $Data::Generate::vchain_type->{FLOAT}->{output_format_fct}=sub { my $input=shift; $input =~ s/^\-0+\.0+$/0.0/; $input =~ s/^\+//; return eval($input); }; $Data::Generate::vchain_type->{FLOAT}->{check_type}=sub { # no warnings "all"; my $input=shift; my $result=$input*1.0; $input=eval($input); $result=eval($result); return undef unless $result == $input; return $result; }; $Data::Generate::vcol_type->{weekday}->{type}='weekday'; $Data::Generate::vcol_type->{weekday}->{term_list}=[qw{SUN MON TUE WED THU FRI SAT}]; ################################################################################ # sub new # Description: # inital constructor for a list of value chains. # ################################################################################ sub new { my ($class,$text) = @_; my $self = {}; $self->{vchain_text} = $text; $self->{vchain_length} = 0; $self->{data_array} = ['']; $self->{vchain_array} = []; $self->{vchain_hash} = {}; $self->{actual_vcol} = {}; bless $self, $class; $self->reset_actual_vchain(); return $self; } ################################################################################ # sub load_parser # Description: # create a Parse::RecDescent parser # and load Data::Generate grammatics into. # ################################################################################ sub load_parser { #------------------------------------------------------------------------------# # START OF GRAMMATICS # #------------------------------------------------------------------------------# my $grammar = q { start: varchar_type | string_type | date_type | integer_type | float_type #------------------------------------------------------------------------------# # STRING TYPE GRAMMATICS # #------------------------------------------------------------------------------# # different intialization, but for the rest see varchar type string_type: ct_string vch_list ct_string: /STRING/ { $Data::Generate::current->{chain_type}='STRING'; } #------------------------------------------------------------------------------# # VARCHAR TYPE GRAMMATICS # #------------------------------------------------------------------------------# varchar_type: ct_varchar vch_list ct_varchar: /(VC2|VC|VARCHAR2|VARCHAR)/ '(' /\d+/ ')' { $Data::Generate::current->{chain_type}='VARCHAR'; $Data::Generate::current->{ct_length}=$item[3]; } vch_list: value_chain: value_col(s) vchain_weigth(?) { $Data::Generate::current->bind_actual_vchain(); 1; } vchain_weigth: /\(/ /\d+\.?\d*/ /\%\)/ { $Data::Generate::current->{actual_vchain}->{weigth}=$item[2]; 1; } value_col: vcr_integer vcol_card(?) { $Data::Generate::current->bind_actual_vcol(); 1; } | vcol_range | vcol_literal | vcol_filelist vcol_literal: vcol_lit_term vcol_card(?) { $Data::Generate::current->bind_actual_vcol(); 1; } vcol_card: '{' /\d+/ '}' { $Data::Generate::current->{actual_vcol}->{quantifier}=$item[2]; 1; } vcol_lit_term: /\'.+?\'/ { $item[1] =~ /\'(.+?)\'/; push(@{$Data::Generate::current-> {actual_vcol}->{value_term_list}},$1); 1; } vcol_range: vcr_start vcr_reverse(?) vcr_term(s) vcr_end vcol_card(?) { $Data::Generate::current->check_reverse_flag(); $Data::Generate::current->bind_actual_vcol(); 1;} vcr_start: /\[/ vcr_reverse: /\^/ { $Data::Generate::current->{actual_vcol} ->{reverse_flag}=1; } vcr_term: /[^\s\]\[]/ '-' /[^\s\]\[]/ { my @cmp = map(chr, ( ord($item[1])..ord($item[3]) ) ); push(@{$Data::Generate::current-> {actual_vcol}->{value_term_list}},@cmp); } | '\\\\ ' { push(@{$Data::Generate::current-> {actual_vcol}->{value_term_list}},' '); } | '\\\\' /./ { push(@{$Data::Generate::current-> {actual_vcol}->{value_term_list}},$item[2]); } | /[^\]\[]/ { push(@{$Data::Generate::current-> {actual_vcol}->{value_term_list}},$item[1]); } vcr_end: /\]/ vcr_integer: /\[/ /\d+/ '..' /\d+/ /\]/ { warn "false integer order " if $item[4] < $item[2]; my @cmp = ($item[2]..$item[4]); push(@{$Data::Generate::current-> {actual_vcol}->{value_term_list}},@cmp); } vcol_filelist: vcol_filelist_term vcol_card(?) { $Data::Generate::current->bind_actual_vcol(); 1; } vcol_filelist_term: /\<\S+\>/ { (my $file)= ($item[1] =~ /\<(\S+)\>/); $Data::Generate::current->vcol_file_process($file); 1; } #------------------------------------------------------------------------------# # INTEGER TYPE GRAMMATICS # #------------------------------------------------------------------------------# integer_type: ct_integer vch_int_list ct_integer: /(INTEGER|INT)/ ct_int_length(?) { $Data::Generate::current->{chain_type}='INTEGER'; $Data::Generate::current->{ct_length}=9 # max integer value unless (exists $Data::Generate::current->{ct_length}); if ($Data::Generate::current->{ct_length}>9) { warn " maximal integer length is 9 \n". "Current Value: $Data::Generate::current->{ct_length} is too high" .",will use length 9."; $Data::Generate::current->{ct_length}=9; } } ct_int_length: '(' /\d+/ ')' { $Data::Generate::current->{ct_length}=$item[2]; } vch_int_list: vch_int: vchi_sign(?) vcol_int(s) vchain_weigth(?) { $Data::Generate::current->bind_actual_vchain(); 1; } vchi_sign: /\+\/\-/ { $Data::Generate::current->{actual_vchain}->{sign}->{'+'}++; $Data::Generate::current->{actual_vchain}->{sign}->{'-'}++; 1; } | /[+-]/ { $Data::Generate::current->{actual_vchain}->{sign}->{$item[1]}++; 1; } vcol_int: vcint_range | vcint_literal | vcol_filelist vcint_range: /\[/ /\]/ vcint_card(?) { $Data::Generate::current->bind_actual_vcol(); 1;} vcint_term: /\d+/ '-' /\d+/ { my @cmp = (($item[1]+0)..($item[3]+0)); push(@{$Data::Generate::current-> {actual_vcol}->{value_term_list}},@cmp); } | vcint_lit_term vcint_literal: vcint_lit_term vcint_card(?) { $Data::Generate::current->bind_actual_vcol(); 1; } vcint_lit_term: /\d+/ { push(@{$Data::Generate::current-> {actual_vcol}->{value_term_list}},($item[1]+0)); } vcint_card: '{' /\d+/ '}' { $Data::Generate::current->{actual_vcol}->{quantifier}=$item[2]; 1; } #------------------------------------------------------------------------------# # FLOAT TYPE GRAMMATICS # #------------------------------------------------------------------------------# float_type: ct_float vch_float_list ct_float: /FLOAT/ '(' /\d+/ ')' { $Data::Generate::current->{chain_type}='FLOAT'; $Data::Generate::current->{ct_length}=$item[3]; } vch_float_list: vch_float: vchfloat_filelist | vcol_float_int_part vcol_float_fraction vcol_float_exponent(?) vchain_weigth(?) { $Data::Generate::current->{actual_vchain} ->{chain_subtype}='FLOATTOTAL'; $Data::Generate::current->bind_actual_vchain(); 1; } vchfloat_filelist: /\<\S+\>/ { $Data::Generate::current->{actual_vchain} ->{chain_subtype}='FLOATLIST'; (my $file)= ($item[1] =~ /\<(\S+)\>/); $Data::Generate::current->vcol_file_process($file); $Data::Generate::current->bind_actual_vcol(); $Data::Generate::current->bind_actual_vchain(); 1; } vcol_float_int_part: vchi_sign(?) vcol_int(s) { $Data::Generate::current->{actual_vchain} ->{chain_subtype}='FLOATINTPART'; $Data::Generate::current->bind_actual_vchain(); 1; } vcol_float_exponent: 'E' vcfloat_exp_sign(?) vcfloat_exp_term { $Data::Generate::current->{actual_vchain} ->{chain_subtype}='FLOATEXP'; $Data::Generate::current->bind_actual_vchain(); 1; } vcfloat_exp_sign: /[+-]/ { $Data::Generate::current->{actual_vchain}->{sign}->{$item[1]}++; 1; } vcfloat_exp_term: vcfloatexp_lit_term { $Data::Generate::current->bind_actual_vcol(); 1; } vcfloatexp_lit_term: /\d+/ { push(@{$Data::Generate::current-> {actual_vcol}->{value_term_list}},($item[1]+0)); 1; } vcol_float_fraction: '.' vcol_fraction { $Data::Generate::current->{actual_vchain} ->{chain_subtype}='FLOATFRACTION'; $Data::Generate::current->bind_actual_vchain(); 1; } #------------------------------------------------------------------------------# # DATE TYPE GRAMMATICS # #------------------------------------------------------------------------------# date_type: ct_date ct_date_precision(?) vch_date_list ct_date: /(DT|DATE)/ { $Data::Generate::current->{chain_type}='DATE'; $Data::Generate::current->{ct_length}=17; } ct_date_precision: '(' /\d+/ ')' { $Data::Generate::current->{ct_precision}=$item[2]; if ($Data::Generate::current->{ct_precision}>14) { warn " maximal precision for fraction of seconds is 14 \n". "Current Value: $Data::Generate::current->{ct_precision} is too high" .",will use precision 14."; $Data::Generate::current->{ct_precision}=14; } $Data::Generate::current->{ct_length}+= $Data::Generate::current->{ct_precision}+1; # +1 because of dot sign } vch_date_list: vch_date: vcol_year vcol_month vcol_day vcol_time(?) vchain_weigth(?) { $Data::Generate::current->bind_actual_vchain(); 1; } | vchdate_filelist vchdate_filelist: /\<\S+\>/ { (my $file)= ($item[1] =~ /\<(\S+)\>/); $Data::Generate::current->vcol_file_process($file); $Data::Generate::current->bind_actual_vcol(); $Data::Generate::current->bind_actual_vchain(); 1; } vcol_time: vcol_hour ':' vcol_min ':' vcol_sec vcol_date_fraction(?) vcol_year: vcdate_range { $Data::Generate::current->bind_vcol_range('year'); 1;} | vcdate_literal { $Data::Generate::current->bind_vcol_literal('year'); 1;} vcol_month: vcmonth_range { $Data::Generate::current->bind_vcol_range('month'); 1;} | vcmonth_literal { my $litval=shift(@{$Data::Generate::current->{actual_vcol}->{month_literal_values}}); $Data::Generate::current->{actual_vcol}->{literal_value}=$litval; $Data::Generate::current->bind_vcol_literal('month'); 1;} vcol_day: vcday_range { $Data::Generate::current->bind_vcol_range('day'); 1;} | vcdate_literal { $Data::Generate::current->bind_vcol_literal('day'); 1;} vcol_hour: vcdate_range { $Data::Generate::current->bind_vcol_range('hour'); 1;} | vcdate_literal { $Data::Generate::current->bind_vcol_literal('hour'); 1;} vcol_min: vcdate_range { $Data::Generate::current->bind_vcol_range('minute'); 1;} | vcdate_literal { $Data::Generate::current->bind_vcol_literal('minute'); 1;} vcol_sec: vcdate_range { $Data::Generate::current->bind_vcol_range('sec'); 1;} | vcdate_literal { $Data::Generate::current->bind_vcol_literal('sec'); 1;} vcol_date_fraction: '.' vcol_fraction { $Data::Generate::current->{actual_vchain} ->{chain_subtype}='DATEWITHFRACTION'; 1; } vcdate_literal: /\d+/ { $Data::Generate::current->{actual_vcol}->{literal_value}=$item[1]; 1; } vcdate_range: /\[/ /\]/ vcdate_term: /\d+/ '-' /\d+/ { $Data::Generate::current->add_term_range($item[1],$item[3]);1; } | /\d+/ { push(@{$Data::Generate::current->{actual_vcol}->{value_term_list}}, $item[1]); 1; } vcday_range: /\[/ /\]/ vcday_term: vcdate_term | { my $low =shift(@{$Data::Generate::current->{actual_vcol}->{weekday_index_values}}); my $high =shift(@{$Data::Generate::current->{actual_vcol}->{weekday_index_values}}); push(@{$Data::Generate::current->{actual_vcol}->{weekday_term_list}}, $low) unless defined $high; $Data::Generate::current-> add_weekday_term_range($low,$high) if defined $high; 1; } vcday_literal: /[a-zA-Z]+/ { my @week=@{$Data::Generate::vcol_type->{weekday}->{term_list}}; my $ix=-1; foreach my $wday_ix (0..$#week) { $ix=$wday_ix if $item[1] =~ /^$week[$wday_ix]/i; } die "cant process day term $item[1] " if $ix==-1; push(@{$Data::Generate::current->{actual_vcol}->{weekday_index_values}} ,$ix); 1; } vcmonth_range: /\[/ /\]/ vcmonth_term: { my $low =shift(@{$Data::Generate::current->{actual_vcol}->{month_literal_values}}); my $high =shift(@{$Data::Generate::current->{actual_vcol}->{month_literal_values}}); push(@{$Data::Generate::current->{actual_vcol}->{value_term_list}}, $low) unless defined $high; $Data::Generate::current->add_term_range($low,$high) if defined $high; 1; } vcmonth_literal: /(\d+|[a-zA-Z]+)/ { my $month=undef; if ($item[1] =~ /\d+/) { $month =$item[1]; } else { (undef,undef,undef,undef,$month,undef,undef) = Date::Parse::strptime($item[1].' 01'); die "Month $item[2] invalid " unless defined $month; ++$month; } push(@{$Data::Generate::current->{actual_vcol}->{month_literal_values}} ,$month); 1; } #------------------------------------------------------------------------------# # FRACTION SUBTYPE GRAMMATICS # # (RELEVANT FOR DATE AND FLOAT) # #------------------------------------------------------------------------------# vcol_fraction: vcol_fract(s) vcol_fract: vcfract_range | vcfract_literal vcfract_range: /\[/ /\]/ vcfract_card(?) { $Data::Generate::current->bind_actual_vcol(); 1;} vcfract_term: /\d+/ '-' /\d+/ { my @cmp = (($item[1]+0)..($item[3]+0)); push(@{$Data::Generate::current-> {actual_vcol}->{value_term_list}},@cmp); } | vcfract_lit_term vcfract_literal: vcfract_lit_term vcfract_card(?) { $Data::Generate::current->bind_actual_vcol(); 1; } vcfract_lit_term: /\d+/ { push(@{$Data::Generate::current-> {actual_vcol}->{value_term_list}},($item[1]+0)); } vcfract_card: '{' /\d+/ '}' { $Data::Generate::current->{actual_vcol}->{quantifier}=$item[2]; 1; } }; #------------------------------------------------------------------------------# # END OF GRAMMATICS # #------------------------------------------------------------------------------# my $parser = Parse::RecDescent->new($grammar); defined $parser or carp "couldn't load parser"; return $parser; } ################################################################################ # Description: helper function ################################################################################ sub check_reverse_flag { my $self =shift; return unless exists $self->{actual_vcol}->{reverse_flag}; $self->{actual_vcol}->{value_term_list}= $self->get_value_column_reverse($self->{actual_vcol}->{value_term_list}); delete $self->{actual_vcol}->{reverse_flag}; } ################################################################################ # Description: helper function ################################################################################ sub check_range_order ($$) { my $min =shift; my $max =shift; if ($min >$max ) { carp "false range order, $min > $max". " will invert limits"; return [$max, $min]; } return [$min, $max]; } ################################################################################ # sub vcol_file_process # Description: read vcol_terms from file # ################################################################################ sub vcol_file_process { my $self =shift; my $file =shift; open(VCOLFILE,$file) or carp "Couldnt open term file $file "; my @cmp = (); close(VCOLFILE); @cmp=('') if $#cmp==-1; map(chomp($_),@cmp); if (exists $Data::Generate::vchain_type->{$self->{chain_type}} && exists $Data::Generate::vchain_type->{$self->{chain_type}}->{check_type} ) { my @cmp2=(); foreach my $element (@cmp) { my $result= &{$Data::Generate::vchain_type->{$self->{chain_type}}->{check_type}} ($element); push(@cmp2,$result) if defined $result; } @cmp=@cmp2; }; my $uniq={}; map($uniq->{$_}++,@cmp); @cmp=(keys %$uniq); push(@{$self->{actual_vcol}->{value_term_list}},@cmp); } ################################################################################ # sub vcol_date_process # Description: processing action for dates. # At the end of each date production the three vcol date types year month day # will be merged to a single one, so that date validity can be assessed, # therefore instead of normally adding the date columns year and month, # we keep them aside until the day column is processed. # ################################################################################ sub vcol_date_process { my $self =shift; if ($self->{actual_vcol}->{type} =~ /^(month|year)$/ ) { my $type=$self->{actual_vcol}->{type}; $type.='_vcol'; $self->{$type} = $self->{actual_vcol}; return; } die "internal eror" if ($self->{actual_vcol}->{type} ne 'day' ); $self->{day_vcol} = $self->{actual_vcol}; $self->{actual_vcol}={}; my @value_term_list=(); my $weekdays={}; if (exists $self->{day_vcol}->{weekday_term_list}) { foreach my $day_term (@{$self->{day_vcol}->{weekday_term_list}}) { $weekdays->{$day_term}++ } } foreach my $year_term (@{$self->{year_vcol}->{value_term_list}}) { foreach my $month_term (@{$self->{month_vcol}->{value_term_list}}) { my $monthdays={}; foreach my $day_term (@{$self->{day_vcol}->{value_term_list}}) { # convert 'char dates in numeric ones like '07'-> 7 # otherwise we cannot make unique value set $day_term+=0; $monthdays->{$day_term}++ } my $first_month_weekday=dayofweek( 01,$month_term, $year_term ); foreach my $wkday_term (keys %{$weekdays}) { my $day_term=$wkday_term-$first_month_weekday+1; $day_term+=7 if $day_term<1; while ($day_term<31) { $monthdays->{$day_term}++; $day_term+=7; } } foreach my $day_term (keys %{$monthdays}) { my $date_term = sprintf('%04d%02d%02d',$year_term, $month_term, $day_term); push(@value_term_list,$date_term) if defined str2time($date_term); } } } @value_term_list=sort(@value_term_list); $self->{actual_vcol}->{value_term_list}=\@value_term_list; $self->add_value_column($self->{actual_vcol}->{value_term_list}); delete $self->{year_vcol}; delete $self->{month_vcol}; delete $self->{day_vcol}; } ################################################################################ # sub vchain_date_fraction_process # Description: reorganizes the internal vchain structure of date types with # fraction values due to the possible presence of trailing zeros. ################################################################################ sub vchain_date_fraction_process { my $self =shift; my $vchain_full=$self->{actual_vchain}; $self->reset_actual_vchain(); my $vchain_fraction={}; $vchain_fraction->{vcol_count}=$vchain_full->{vcol_count}; map($vchain_fraction->{vcol_hash}->{$_}->{value_column}= $vchain_full->{vcol_hash}->{$_}->{value_column}, (0..$vchain_fraction->{vcol_count})); my $fraction_start= $Data::Generate::vchain_type->{DATE}->{subtype}->{'DATEWITHFRACTION'} ->{fraction_start_ix}; map_vchain_indexes($vchain_fraction, sub { return undef if $_[0] <$fraction_start; return $vchain_fraction->{vcol_count}-$_[0]; } ); $vchain_fraction->{vcol_count}=$vchain_full->{vcol_count}- $fraction_start; my $vchain_data={}; $vchain_data->{weigth}=$vchain_full->{weigth}; my $vchain_weigth_list=$self->vchain_number_reprocess($vchain_fraction); foreach my $vchain (@$vchain_weigth_list) { $vchain->{vcol_count}+=$fraction_start; map_vchain_indexes($vchain, sub { return $vchain->{vcol_count}-$_[0]; } ); map($vchain->{vcol_hash}->{$_}->{value_column}= $vchain_full->{vcol_hash}->{$_}->{value_column}, (0..$fraction_start-1)); } # weigth has to be recalculated now. calculate_vchain_list_weigth($vchain_weigth_list,$vchain_data->{weigth}); 1; } ################################################################################ # sub vchain_fraction_process # Description: reorganizes the internal vchain structure of a fractional # vchain part due to the possible presence of trailing zeros. ################################################################################ sub vchain_fraction_process { my $self =shift; my $vchain_fraction =$self->{actual_vchain}; $self->reset_actual_vchain(); map_vchain_indexes($vchain_fraction, sub { return $vchain_fraction->{vcol_count}-$_[0]; } ); my $vchain_data={}; $vchain_data->{weigth}=$vchain_fraction->{weigth}; my $vchain_weigth_list=$self->vchain_number_reprocess($vchain_fraction); foreach my $vchain (@$vchain_weigth_list) { map_vchain_indexes($vchain, sub { return $vchain->{vcol_count}-$_[0]; } ); } return $vchain_weigth_list; 1; } ################################################################################ # sub merge_vchain_float_lists # Description: merge int and float vchain lists together.(and add a '.' inbet.) ################################################################################ sub merge_vchain_float_lists { my $self =shift; my $vchain_sign_list =shift; my $vchain_integer_list =shift; my $vchain_float_list =shift; my $vchain_exp_list =shift; my $vchain_merge_list =[]; my $vchain_zero =undef; foreach my $vchain_integer (@$vchain_integer_list) { map_vchain_indexes($vchain_integer, sub { return 1+$_[0] ;}); $vchain_integer->{vcol_hash}->{0}->{value_column}=$vchain_sign_list; $vchain_integer->{vcol_count}++; } if (@$vchain_exp_list ==0) { my $vchain_exp={}; $vchain_exp->{vcol_hash}->{0}->{value_column}=['0']; $vchain_exp->{vcol_count}++; push(@$vchain_exp_list,$vchain_exp); } foreach my $vchain_exp (@$vchain_exp_list) { map_vchain_indexes($vchain_exp, sub { return 1+$_[0] ;}); $vchain_exp->{vcol_hash}->{0}->{value_column}=['E']; $vchain_exp->{vcol_count}++; } my $vchain_exp = $vchain_exp_list->[0]; foreach my $vchain_integer (@$vchain_integer_list) { foreach my $vchain_float (@$vchain_float_list) { foreach my $vchain_exp (@$vchain_exp_list) { my $vchain_merged={}; $vchain_merged->{vcol_count}=$vchain_integer->{vcol_count}; map($vchain_merged->{vcol_hash}->{$_}->{value_column}= $vchain_integer->{vcol_hash}->{$_}->{value_column}, (0..$vchain_integer->{vcol_count})); $vchain_merged->{vcol_count}++; $vchain_merged->{vcol_hash}->{$vchain_merged->{vcol_count}}->{value_column}=['.']; map($vchain_merged->{vcol_hash}->{$vchain_merged->{vcol_count}+1+$_} ->{value_column}=$vchain_float->{vcol_hash}->{$_}->{value_column}, (0..$vchain_float->{vcol_count})); $vchain_merged->{vcol_count}+=$vchain_float->{vcol_count}+1; # avoid double +/-0.0 , skip exp processing if (($#{$vchain_merged->{vcol_hash}->{1}->{value_column}}==0) && ($vchain_merged->{vcol_hash}->{1}->{value_column}->[0]==0) && ($#{$vchain_merged->{vcol_hash}->{2}->{value_column}}==0) && ($vchain_merged->{vcol_hash}->{2}->{value_column}->[0] eq '.') && ($#{$vchain_merged->{vcol_hash}->{3}->{value_column}}==0) && ($vchain_merged->{vcol_hash}->{3}->{value_column}->[0]==0) && ($vchain_merged->{vcol_count}==3) ) { next if defined $vchain_zero; $vchain_merged->{vcol_hash}->{0}->{value_column}=['+']; $self->bind_vchain($vchain_merged); push(@$vchain_merge_list,$vchain_merged); $vchain_zero=$vchain_merged; next; } map($vchain_merged->{vcol_hash}->{$vchain_merged->{vcol_count}+1+$_} ->{value_column}=$vchain_exp->{vcol_hash}->{$_}->{value_column}, (0..$vchain_exp->{vcol_count})); $vchain_merged->{vcol_count}+=$vchain_exp->{vcol_count}+1; $self->bind_vchain($vchain_merged); push(@$vchain_merge_list,$vchain_merged); } } } return $vchain_merge_list; 1; } ################################################################################ # sub vchain_date_fraction_process # Description: reorganizes the internal vchain structure of date types with # fraction values due to the possible presence of trailing zeros. ################################################################################ sub vchain_float_process { my $self =shift; if ($self->{actual_vchain}->{chain_subtype} eq 'FLOATLIST' ) { $self->bind_vchain($self->{actual_vchain}); $self->reset_actual_vchain(); return; } if ($self->{actual_vchain}->{chain_subtype} eq 'FLOATINTPART' ) { $self->{FLOAT_CHAIN_START}=1+$#{$self->{vchain_array}}; $self->{FLOAT_CHAIN_SIGN}=[]; push (@{$self->{FLOAT_CHAIN_SIGN}},'+') if (! exists $self->{actual_vchain}->{sign} || exists $self->{actual_vchain}->{sign}->{'+'} ); push (@{$self->{FLOAT_CHAIN_SIGN}},'-') if ( exists $self->{actual_vchain}->{sign} && exists $self->{actual_vchain}->{sign}->{'-'} ); my $actual_vchain= $self->{actual_vchain}; $self->reset_actual_vchain(); $self->{FLOAT_INTEGER_PART}=$self->vchain_number_reprocess($actual_vchain); return; } if ($self->{actual_vchain}->{chain_subtype} eq 'FLOATFRACTION' ) { $self->{FLOAT_FRACTION_PART}=$self->vchain_fraction_process(); my $actual_vchain= $self->{actual_vchain}; $self->reset_actual_vchain(); return; } if ($self->{actual_vchain}->{chain_subtype} eq 'FLOATEXP' ) { $self->{FLOAT_EXP_PART}=$self->vchain_integer_process(); return; } croak "Error in float parsing $self->{actual_vchain}->{chain_subtype} " unless $self->{actual_vchain}->{chain_subtype} eq 'FLOATTOTAL'; # print "*********************".$self->{actual_vchain}->{weigth}."\n"; $self->{FLOAT_CHAIN_WEIGTH}=$self->{actual_vchain}->{weigth}; unless (exists $self->{FLOAT_EXP_PART}) { $self->{actual_vchain}->{chain_subtype}= 'FLOATEXP'; push(@{$self->{actual_vcol}->{value_term_list}},0); $self->bind_actual_vcol(); $self->{FLOAT_EXP_PART}=$self->vchain_integer_process(); $self->{zzzzFLOAT_EXP_PART}=$self->{FLOAT_EXP_PART}; } foreach my $vchain_id ($self->{FLOAT_CHAIN_START}..$#{$self->{vchain_array}}) { delete $self->{vchain_hash}->{$vchain_id}; pop(@{$self->{vchain_array}}); } my $merge_list=$self->merge_vchain_float_lists($self->{FLOAT_CHAIN_SIGN}, $self->{FLOAT_INTEGER_PART}, $self->{FLOAT_FRACTION_PART}, $self->{FLOAT_EXP_PART}); calculate_vchain_list_weigth($merge_list,$self->{FLOAT_CHAIN_WEIGTH}); delete $self->{FLOAT_CHAIN_START}; delete $self->{FLOAT_CHAIN_SIGN}; delete $self->{FLOAT_CHAIN_WEIGTH}; delete $self->{FLOAT_INTEGER_PART}; delete $self->{FLOAT_FRACTION_PART}; delete $self->{FLOAT_EXP_PART}; 1; } ################################################################################ # sub vchain_integer_process # Description: reorganizes the internal vchain structure of integer types. # due to the possible presence of leading zeros. ################################################################################ # INT (9) +/- [3,0] [21,3,0] [4,0] # # + 0 0 4 -> converted to + 0 | + 3 0 4 | + 21 4| + 4 # - 3 21 0 | - 21 0 | - 3 0| - # 3 | 3 | | # | | | # # degr of freedom = 1 + 12 + 8 + 2 = 23 # -210','-214','-30','-300','-304','-3210','-3214','-330','-334','-34','-4','0','210','214','30','300', # '304','3210','3214 sub vchain_integer_process { my $self =shift; my $last_vchain=$self->{actual_vchain}; $self->reset_actual_vchain(); my $vchain_data={}; $vchain_data->{weigth}=$last_vchain->{weigth}; push (@{$vchain_data->{sign}},'+') if (! exists $last_vchain->{sign} || exists $last_vchain->{sign}->{'+'} ); push (@{$vchain_data->{sign}},'-') if ( exists $last_vchain->{sign} && exists $last_vchain->{sign}->{'-'} ); delete $last_vchain->{sign}; my $vchain_weigth_list=$self->vchain_number_reprocess($last_vchain); foreach my $vchain (@$vchain_weigth_list) { next if $vchain->{vcol_count}==0 && @{$vchain->{vcol_hash}->{0}->{value_column}}==1 && $vchain->{vcol_hash}->{0}->{value_column}->[0]==0; map_vchain_indexes($vchain,sub { return 1+$_[0];}); $vchain->{vcol_count}++; @{$vchain->{vcol_hash}->{0}->{value_column}}=@{$vchain_data->{sign}}; } # weigth has to be recalculated now. calculate_vchain_list_weigth($vchain_weigth_list,$vchain_data->{weigth}); return $vchain_weigth_list; } ################################################################################ # sub vchain_number_reprocess # Description: reorganizes the internal vchain structure of numeric types. # Due to the possible presence of leading or trailing zeros, we have to # restructure the vcols in vchains to avoid duplicates (001, 01 problem). # Other solutions are either too memory intensive (build the output values at # vchain binding) or lead to incorrect cardinality calculation (eliminate # duplicates at output data production); ################################################################################ # INT (9) +/- [3,0] [21,3,0] [4,0] # # + 0 0 4 -> converted to + 0 | + 3 0 4 | + 21 4| + 4 # - 3 21 0 | - 21 0 | - 3 0| - # 3 | 3 | | # | | | # # degr of freedom = 1 + 12 + 8 + 2 = 23 # -210','-214','-30','-300','-304','-3210','-3214','-330','-334','-34','-4','0','210','214','30','300', # '304','3210','3214','330','334','34','4 sub vchain_number_reprocess { my $self =shift; my $last_vchain =shift; my $vcol_nonzero_list=[]; my $vcol_zero_list=[]; my $vchain_weigth_list=[]; while($last_vchain->{vcol_count}>=0) { my $vcol_list= $last_vchain->{vcol_hash}->{0}->{value_column}; $vcol_nonzero_list=[]; $vcol_zero_list=[]; foreach my $vcol_value (@$vcol_list) { push (@$vcol_nonzero_list,$vcol_value) unless $vcol_value =~ /^0+$/; push (@$vcol_zero_list,$vcol_value) if $vcol_value =~ /^0+$/; } if(@$vcol_nonzero_list >0) { $last_vchain->{vcol_hash}->{0}->{value_column} =$vcol_nonzero_list; $self->bind_vchain($last_vchain); push(@$vchain_weigth_list,$self->{vchain_hash} ->{$#{$self->{vchain_array}}}); } last unless(@$vcol_zero_list>0); my $next_vchain={}; $next_vchain->{vcol_count}=$last_vchain->{vcol_count}; map($next_vchain->{vcol_hash}->{$_}->{value_column}= $last_vchain->{vcol_hash}->{$_}->{value_column}, (0..$last_vchain->{vcol_count})); map_vchain_indexes($next_vchain,sub { return undef if $_[0]==0; return $_[0]-1; }); $next_vchain->{vcol_count}--; $last_vchain=$next_vchain; } if (@$vcol_zero_list>0) { # add now 0 chain in place of +/- $last_vchain->{vcol_hash}->{0}->{value_column}=[0]; $last_vchain->{vcol_count}++; $self->bind_vchain($last_vchain); push(@$vchain_weigth_list,$self->{vchain_hash} ->{$#{$self->{vchain_array}}}); } return $vchain_weigth_list; } ################################################################################ # Description: helper function. Calculate weigth for a group of vchains ################################################################################ sub calculate_vchain_list_weigth { my $vchain_list =shift; my $weigth =shift; my $card= calculate_vchain_list_degrees_of_freedom($vchain_list); map($_->{weigth}=$weigth,@$vchain_list); map($_->{weigth}*=$_->{vchain_card},@$vchain_list); map($_->{weigth}/=$card,@$vchain_list); } ################################################################################ # Description: helper function.Change internal vcol indexes of a vchain ################################################################################ sub map_vchain_indexes { my $vchain =shift; my $change_function =shift; foreach my $index (0..$vchain->{vcol_count}) { my $new_index=&$change_function($index); next unless defined $new_index; $vchain->{vcol_hash_tmp}->{$new_index}->{value_column}= $vchain->{vcol_hash}->{$index}->{value_column}; } $vchain->{vcol_hash}=$vchain->{vcol_hash_tmp}; delete $vchain->{vcol_hash_tmp}; } ################################################################################ # Description: helper function ################################################################################ sub check_input_limits { my $type =shift; my $value =shift; # no type defined, no ranges to check return unless defined $type; return unless exists $Data::Generate::vcol_type->{$type}; my $limit_check_hash=$Data::Generate::vcol_type->{$type}; if ((exists $limit_check_hash->{lowlimit}) && (defined $limit_check_hash->{lowlimit})) { croak " $limit_check_hash->{type} went out of range,". " $value < $limit_check_hash->{lowlimit} " if $value < $limit_check_hash->{lowlimit}; } if ((exists $limit_check_hash->{highlimit}) && (defined $limit_check_hash->{highlimit})) { croak " $limit_check_hash->{type} went out of range,". " $value > $limit_check_hash->{highlimit} " if $value > $limit_check_hash->{highlimit}; } } ################################################################################ # sub # vcol_add_term_range # Description: # add an expression (a..b) after parsing ################################################################################ sub add_weekday_term_range { my $self =shift; my $min =shift; my $max =shift; my $act_vcol=$self->{actual_vcol}; if ($min>$max) { # index 6 is sunday push(@{$self->{actual_vcol}->{weekday_term_list}},($min..6)); # index 0 is monday push(@{$self->{actual_vcol}->{weekday_term_list}},(0..$max)); return; } push(@{$self->{actual_vcol}->{weekday_term_list}},($min..$max)); } ################################################################################ # sub # vcol_add_term_range # Description: # add an expression (a..b) after parsing ################################################################################ sub add_term_range { my $self =shift; my $min =shift; my $max =shift; my $minmax=check_range_order($min,$max); my $act_vcol=$self->{actual_vcol}; push(@{$self->{actual_vcol}->{value_term_list}}, ($minmax->[0]..$minmax->[1])); } ################################################################################ # sub # add_value_column_range # Description: # add an expression (a..b) after parsing ################################################################################ sub bind_vcol_range { my $self =shift; my $type =shift; my $act_vcol=$self->{actual_vcol}; foreach my $value (@{$self->{actual_vcol}->{value_term_list}}) { check_input_limits($type,$value); } $act_vcol->{type}=$type; $self->bind_actual_vcol(); } ################################################################################ # sub # add_value_column_range # Description: # add an expression (a..b) after parsing ################################################################################ sub bind_vcol_literal { my $self =shift; my $type =shift; my $act_vcol=$self->{actual_vcol}; check_input_limits($type,$self->{actual_vcol}->{literal_value}); $self->{actual_vcol}->{value_term_list}= [$act_vcol->{literal_value}]; $act_vcol->{type}=$type; $self->bind_actual_vcol(); } ################################################################################ # sub # sub set_actual_vchain_weigth # Description: # add weigth to actual value chain ################################################################################ sub reset_actual_vchain { my $self =shift; $self->{actual_vchain} = {}; $self->{actual_vchain}->{vchain_length} = 0; $self->{actual_vchain}->{weigth}=100; } ################################################################################ # sub bind_actual_vcol # Description: Postprocessing action. # At the end of each value column production, add actual value column to the # actual vchain. Afterwards reset actual_vcol to an empty hash # ################################################################################ sub bind_actual_vcol { my $self =shift; my $quantifier=1; $quantifier=$self->{actual_vcol}->{quantifier} if exists $self->{actual_vcol}->{quantifier}; if ((defined $self->{actual_vcol}->{type} ) && ($self->{actual_vcol}->{type} =~ /^(day|month|year)$/ )) { $self->vcol_date_process(); } elsif ((defined $self->{actual_vcol}->{type} ) && ($self->{actual_vcol}->{type} eq 'sign' )) { $self->{sign_value_list}=$self->{actual_vcol}->{value_term_list}; $self->reset_actual_vcol(); } else { $self->add_value_column($self->{actual_vcol}->{value_term_list}) foreach(1..$quantifier); } $self->{actual_vcol} = {}; $self->{actual_vcol}->{type} = undef; } sub reset_actual_vcol { my $self =shift; $self->{actual_vcol} = {}; $self->{actual_vcol}->{type} = undef; } ################################################################################ # Description: helper function. add vchain to generator object ################################################################################ sub bind_vchain { my $self =shift; my $vchain =shift; push(@{$self->{vchain_array}},$vchain); $self->{vchain_hash} ->{$#{$self->{vchain_array}}}=$vchain; } ################################################################################ # sub bind_actual_vchain # Description: Postprocessing action. # At the end of each chain production, add actual value chain to the chain list # root structure, and afterwards reset actual_vchain to an empty hash # ################################################################################ sub bind_actual_vchain { my $self =shift; if ($self->{chain_type} eq 'INTEGER') { $self->vchain_integer_process(); return; } if ((exists $self->{actual_vchain}->{chain_subtype}) && ($self->{actual_vchain}->{chain_subtype} eq 'DATEWITHFRACTION')) { $self->vchain_date_fraction_process(); return; } if ($self->{chain_type} eq 'FLOAT') { $self->vchain_float_process(); return; } $self->bind_vchain($self->{actual_vchain}); $self->reset_actual_vchain(); } ################################################################################ # sub add_value_column # Description: # add array of terms. # ################################################################################ sub add_value_column { my $self = shift; my $tmp_value_column = shift; my $value_column = []; my $vcol_maxlength=0; my $ix=0; my $unique={}; foreach my $value_term (@{$tmp_value_column}) { my $vterm_length=length($value_term); if (exists $self->{ct_length} && defined $self->{ct_length} && $self->{actual_vchain}->{vchain_length}+ $vterm_length>$self->{ct_length}) { carp "Maximal length for type $self->{chain_type}($self->{ct_length}) " ."exceeded for \n$self->{vchain_text}\n" ."Element \'$value_term\' will be removed from output structures.\n" ."Please check your data creation rules\n"; next; } elsif ($unique->{$value_term}++>0) { carp "Duplicate entry \'$value_term\' found while building up internal structures.\n" ."Element \'$value_term\' will be removed from output structures.\n" ."Please check your data creation rules\n"; next; } else { push(@{$value_column},$value_term); $vcol_maxlength =($vterm_length>$vcol_maxlength?$vterm_length:$vcol_maxlength); $ix++; } }; $self->{actual_vchain}->{vchain_length}+=$vcol_maxlength; if ($#{$value_column}==-1) { return 1; } if (exists $self->{actual_vchain}->{vcol_count}) { $self->{actual_vchain}->{vcol_count}++ } else {$self->{actual_vchain}->{vcol_count}=0} $self->{actual_vchain}->{vcol_hash}->{$self->{actual_vchain}->{vcol_count}}->{value_column} = $value_column; } ################################################################################ # sub get_value_column_reverse # Description: fill array in place with complementary ascii chars # ################################################################################ sub get_value_column_reverse { my $self = shift; my $value_column = shift; my @complement = map(chr,(0..255)); my $hash={}; $hash->{$_}++ foreach (@{$value_column}); $value_column=[]; foreach (@complement) { push(@$value_column,$_) unless $hash->{$_}; } return $value_column; } ################################################################################ # sub get_occupation_ratio # Description: # Based on input cardinality and degrees of freedom calculate # the ratio of array elements to give / total number of elements # ################################################################################ sub set_occupation_ratio { my $self = shift; foreach my $actual_vchain (@{$self->{vchain_array}}) { my $occupation_ratio = 0; $occupation_ratio = log($actual_vchain->{data_card}/$actual_vchain->{vchain_card}) / ($actual_vchain->{vcol_count}+1); $occupation_ratio =exp($occupation_ratio); $actual_vchain->{vchain_occupation_ratio}= $occupation_ratio; } } ################################################################################ # sub calculate_occupation_levels # Description: # based on input cardinality calculate occupation levels. # ################################################################################ sub calculate_occupation_levels { my $self = shift; my $data_card = shift; $self->check_input_card($data_card); $self->set_occupation_ratio(); foreach my $actual_vchain (@{$self->{vchain_array}}) { my $vchain_occupation_ratio =$actual_vchain->{vchain_occupation_ratio}; foreach (values %{$actual_vchain->{vcol_hash}}) { my $vcol_degrees_of_freedom =$#{$_->{value_column}}+1; if ($vchain_occupation_ratio ==1) { $_->{occupation_level} = $vcol_degrees_of_freedom } else { $_->{occupation_level} = int($vchain_occupation_ratio*$vcol_degrees_of_freedom)+1; } } } return ; } ################################################################################ # sub get_degrees_of_freedom # Description: # get maximal cardinality # ################################################################################ sub get_degrees_of_freedom { my $self = shift; return $self->{card}; } ################################################################################ # sub get_degrees_of_freedom for a vchain # Description: # get maximal cardinality # ################################################################################ sub calculate_vchain_list_degrees_of_freedom { my $vchain_list = shift; my $card=0; foreach my $vchain_ref (@$vchain_list) { $vchain_ref->{vchain_card}=1; foreach my $vcol_ref (values %{$vchain_ref->{vcol_hash}}) { $vchain_ref->{vchain_card}*=$#{$vcol_ref->{value_column}}+1 } $card+=$vchain_ref->{vchain_card}; } return $card; } ################################################################################ # sub get_degrees_of_freedom # Description: # get maximal cardinality # ################################################################################ sub calculate_degrees_of_freedom { my $self = shift; $self->{card}= calculate_vchain_list_degrees_of_freedom($self->{vchain_array}); return $self->{card}; } ################################################################################ # sub get_degrees_of_freedom # Description: # get maximal cardinality # ################################################################################ sub calculate_weigth { my $self = shift; my $weigth=0.0; foreach my $vchain_ref (@{$self->{vchain_array}}) { $weigth+= $vchain_ref->{weigth}; } foreach my $vchain_ref (@{$self->{vchain_array}}) { $vchain_ref->{weigth}/=$weigth } } ################################################################################ # sub check_input_card # Description: # ensures that degrees of freedom >= input_card # generates a warning when input_card is bigger # ################################################################################ sub check_input_card { my $self = shift; my $data_card = shift; if ($data_card > $self->{card}) { carp "Input card ".$data_card." too big, maximal nr of ". "values is $self->{card}.\nReturn only ". $self->{card} ." values. \n"; $data_card=$self->{card}; } foreach my $vchain_ref (@{$self->{vchain_array}}) { $vchain_ref->{data_card}=$data_card; $vchain_ref->{data_card}*=$vchain_ref->{weigth}; # $vchain_ref->{data_card}=int($vchain_ref->{data_card}); # $vchain_ref->{data_card}=1 if $vchain_ref->{data_card}==0; if (int($vchain_ref->{data_card}) >$vchain_ref->{vchain_card}) { carp "Either input card ".$data_card." too big or vchain weigth ". "$vchain_ref->{weigth} too high.\nShould produce ". $vchain_ref->{data_card}." values, can't produce more than ". $vchain_ref->{vchain_card}." different values.\nReturn only ". $vchain_ref->{vchain_card} ." values. \n"; $vchain_ref->{data_card}=$vchain_ref->{vchain_card}; } } } ################################################################################ # sub fisher_yates_shuffle # Description: create a randomized array order. From Perl Cookbook # ################################################################################ # fisher_yates_shuffle( \@array ) : generate a random permutation # of @array in place sub fisher_yates_shuffle { my $array = shift; my $i; for ($i = @$array; --$i; ) { my $j = int rand ($i+1); next if $i == $j; @$array[$i,$j] = @$array[$j,$i]; } } ################################################################################ # sub is_valid # Description: check if generator structure was built up successfully # ################################################################################ sub is_valid { my $self = shift; return undef if @{$self->{vchain_array}} ==0; 1; } ################################################################################ # sub get_data # Description: # get data # ################################################################################ sub get_unique_data { my $self = shift; my $data_card =shift; $self->calculate_occupation_levels($data_card); my $data =[]; my $chain_type=$self->{chain_type}; foreach my $actual_vchain (@{$self->{vchain_array}}) { my $tmpdata =['']; foreach my $value_column_index (0..$actual_vchain->{vcol_count}) { my $value_column=$actual_vchain->{vcol_hash}->{$value_column_index}; my @tmp_value_column_copy=@{$value_column->{value_column}}; my @value_column_array =(); while(@value_column_array<$value_column->{occupation_level}) { my $rnd_index=int(rand(@tmp_value_column_copy)); push(@value_column_array,splice(@tmp_value_column_copy,$rnd_index,1)); } my $format=undef; $format= $Data::Generate::vchain_type->{$chain_type}->{vcol_output_format} ->[$value_column_index] if ((exists $Data::Generate::vchain_type->{$chain_type}) && (exists $Data::Generate::vchain_type ->{$chain_type}->{vcol_output_format})); $tmpdata=vcol_chain($tmpdata, \@value_column_array, $actual_vchain->{data_card},$format); } push(@$data,@$tmpdata); } # makes a random order fisher_yates_shuffle($data); # take away too much produced data shift(@$data) while(@$data>$data_card); map($_=&{$Data::Generate::vchain_type ->{$chain_type}->{output_format_fct}}($_),@$data) if ((exists $Data::Generate::vchain_type->{$chain_type}) && (exists $Data::Generate::vchain_type ->{$chain_type}->{output_format_fct})); @$data = map(int($_),@$data) if $chain_type eq 'INTEGER'; @$data = sort(@$data); my $uniq=[]; my $last=''; my $duplicates=0; foreach my $item (@$data) { if ($last eq $item) { $duplicates++; next; } push(@$uniq, $item); $last=$item; } carp "$duplicates duplicates found while generating ouput values.\n" ."Check syntax of statements" if $duplicates>0; return $uniq; } ################################################################################ # sub vcol_chain # Description: # make a cross product of two value columns and concatenate the values. # if type is with formatted output prepare values with a pipe inbetween. # ################################################################################ sub vcol_chain { my @original=@{shift()}; my @added =@{shift()}; my $card=shift; my $format=shift; $format= "%s" unless defined $format; my @composed =(); foreach my $ele (@added) { foreach my $e2 (@original) { push(@composed,$e2.sprintf($format,$ele)); next unless defined $card; return \@composed if(@composed>=$card); } } return \@composed; }; ################################################################################ # sub parse # Description: # parse given text. # ################################################################################ sub parse($) { my ($text) = @_; # check that parser is up and running $Data::Generate::Parser=load_parser() unless (defined $Data::Generate::Parser); # create a new generator and set it as global for parse routines $Data::Generate::ACTUAL_VALUE_COLUMN=undef; $Data::Generate::VC_RANGE_REVERSE_FLAG=undef; $Data::Generate::current= Data::Generate->new($text); $Data::Generate::Parser->start($text); $Data::Generate::current->is_valid() or croak "Error in parsing, invalid generator for $text"; $Data::Generate::current->calculate_weigth(); $Data::Generate::current->calculate_degrees_of_freedom(); return $Data::Generate::current; } 1; __END__