package SQL::Interpolate; our $VERSION = '0.33'; use strict; use warnings; use Carp; use base 'Exporter'; our @EXPORT; our %EXPORT_TAGS = (all => [qw( make_sql_interp sql_interp sql_var sql sql_literal )]); # note: sql_literal depreciated our @EXPORT_OK = @{ $EXPORT_TAGS{all} }; # whether TRACE_SQL is enabled my $trace_sql_enabled = 0; # whether TRACE_FILTER is enabled my $trace_filter_enabled = 0; # whether macros are enabled my $macros_enabled = 0; # regexes my $id_match = qr/[a-zA-Z_\.]+/; my $table_name_match = $id_match; # next ID to use for table alias # [local to sql_interp functions] my $alias_id = 0; # current index in interpolation list # [local to sql_interp functions] my $idx = 0; # current interpolation list # [local to sql_interp functions] my $items_ref = undef; # whether typed sql_var() ever used (if so, # format of @bind result is more complicated) # [local to sql_interp functions] my $is_var_used = 0; # state item (SQL::Iterpolate or DBI handle) used in interpolation. # [local to sql_interp functions] my $state = undef; # bind elements in interpolation # [local to sql_interp functions] my @bind; sub import { my $class = shift; my @params = @_; # process any special "use" parameters my $filter_enabled = 0; # whether filtering enabled my $is_wrapped = 0; # whether module wrapped # (e.g. by DBIx::Interpolate) my %action_for = ( FILTER => sub { $filter_enabled = shift @params; }, TRACE_SQL => sub { $trace_sql_enabled = shift @params; print STDERR "TRACE_SQL enabled\n" if $trace_sql_enabled; }, TRACE_FILTER => sub { $trace_filter_enabled = shift @params; print STDERR "TRACE_FILTER enabled\n" if $trace_filter_enabled; }, __WRAP => sub { $is_wrapped = shift @params; } ); @_ = ($class); # unprocessed params while (my $item = shift @params) { my $action = $action_for{$item}; if ($action) { $action->(); } else { push @_, $item; } } # handle exports my $level = $is_wrapped ? 2 : 1; __PACKAGE__->export_to_level($level, @_); # handle source filtering (if enabled) if ($filter_enabled) { require SQL::Interpolate::Filter; goto &SQL::Interpolate::Filter::import; # @_ } return; } sub new { my $class = shift; # process special params. my $dbh; my $filters = []; while (ref $_[0] ne '') { if (UNIVERSAL::isa($_[0], 'DBI::db')) { $dbh = shift; } elsif (UNIVERSAL::isa($_[0], 'SQL::Interpolate::SQLFilter')) { push @$filters, shift; } } my %params = @_; # build indicies on $filters for quick access my $filters_hash = {}; # filter class name --> [filter] my $text_filters = []; # filter my $inits = []; # filter my $text_fragment_filters = []; # filter for my $filter (@$filters) { push @{$filters_hash->{ref $filter}}, $filter; push @$text_filters, $filter if $filter->can("filter_text"); push @$inits, $filter if $filter->can("init"); push @$text_fragment_filters, $filter if $filter->can("filter_text_fragment"); } # build object my $self = bless { dbh => $dbh, filters => $filters, filters_hash => $filters_hash, text_filters => $text_filters, inits => $inits, text_fragment_filters => $text_fragment_filters }, $class; return $self; } # note: sql_interp is not reentrant. sub sql_interp { my @items = @_; # clear call state $alias_id = 0; $idx = 0; $items_ref = undef; $is_var_used = 0; $state = undef; @bind = (); # extract state item (if any) my $interp; if (UNIVERSAL::isa($items[0], 'SQL::Interpolate')) { $state = $interp = $items[0]; } elsif (UNIVERSAL::isa($items[0], 'DBI::db')) { $state = $items[0]; } # process macros (if enabled) if ($macros_enabled) { if ($interp) { for my $initer (@{$interp->{inits}}) { $initer->init(); } } @items = SQL::Interpolate::Macro::sql_flatten(@items); } else { shift @items if $state; } $items_ref = \@items; # interpolate! my $sql = _sql_interp(@items); # convert bind values to complex format (if needed) if ($is_var_used) { for my $val (@bind) { my $valcopy = $val; ! ref $val and $val = [$val, sql_var(\$valcopy)]; } } # process text filters (if any) if ($interp) { for my $text_filter (@{$interp->{text_filters}}) { $sql = $text_filter->filter_text($sql); } } $trace_sql_enabled and print STDERR "DEBUG:interp[sql=$sql,bind=" . join(':', @bind) . "]\n"; return ($sql, @bind); } # helper called by sql_interp() # @items - interpolation list (no macros) sub _sql_interp { my (@items) = @_; my $sql = ''; foreach my $item (@items) { my $varobj; my $bind_size = @bind; if (ref $item eq 'SQL::Interpolate::Variable') { unless (keys %$item == 1 && defined($item->{value})) { $varobj = $item; $is_var_used = 1; } $item = $item->{value}; } if (ref $item eq 'SQL::Interpolate::SQL') { my ($sql2, @bind2) = _sql_interp(@$item); $sql .= ' ' if $sql ne ''; $sql .= $sql2; push @bind, @bind2; } elsif (ref $item) { if ($sql =~ /\bIN\s*$/si) { $item = [ $$item ] if ref $item eq 'SCALAR'; if (ref $item eq 'ARRAY') { if (@$item == 0) { $sql =~ s/$id_match\s+IN\s*$/1=0/si or croak 'ASSERT'; } else { $sql .= " (" . join(', ', map { _sql_interp_data($_); } @$item) . ")"; } } else { _error_item($idx, \@items); } } elsif ($sql =~ /\bSET\s*$/si && ref $item eq 'HASH') { _error('Hash has zero elements.') if keys %$item == 0; $sql .= " " . join(', ', map { my $key = $_; my $val = $item->{$key}; "$key=" . _sql_interp_data($val); } keys %$item); } elsif ($sql =~ /\bINSERT[\w\s]*\sINTO\s*$id_match\s*$/si) { $item = [ $$item ] if ref $item eq 'SCALAR'; if (ref $item eq 'ARRAY') { $sql .= " VALUES(" . join(', ', map { _sql_interp_data($_); } @$item) . ")"; } elsif (ref $item eq 'HASH') { $sql .= " (" . join(', ', keys %$item) . ")" . " VALUES(" . join(', ', map { _sql_interp_data($_); } values %$item) . ")"; } else { _error_item($idx, \@items); } } elsif ($sql =~ /(?:\bFROM|JOIN)\s*$/si) { # table reference # get alias for table my $table_alias = undef; # alias given to table my $next_item = $items[$idx + 1]; if(defined $next_item && ref $next_item eq '' && $next_item =~ /\s*AS\b/is) { $table_alias = undef; # provided by client } else { $table_alias = 'tbl' . $alias_id++; } $sql .= ' ' unless $sql eq ''; $sql .= _sql_interp_resultset($item); $sql .= " AS $table_alias" if defined $table_alias; } elsif (ref $item eq 'SCALAR') { push @bind, $$item; $sql .= ' ?'; } elsif (ref $item eq 'HASH') { # e.g. WHERE {x = 3, y = 4} if (keys %$item == 0) { $sql .= ' 1=1'; } else { my $s = join ' AND ', map { my $key = $_; my $val = $item->{$key}; if (! defined $val) { "$key IS NULL"; } elsif (ref $val eq 'ARRAY') { _sql_interp_list($key, $val); } else { "$key=" . _sql_interp_data($val); } } keys %$item; $s = "($s)" if keys %$item > 1; $s = " $s"; $sql .= $s; } } elsif (ref $item eq 'ARRAY') { # result set $sql .= ' ' unless $sql eq ''; $sql .= _sql_interp_resultset($item); } else { _error_item($idx, \@items); } } else { $sql .= ' ' unless $sql =~ /(^|\s)$/ || $item =~ /^\s/; # style $sql .= $item; } # attach $varobj to any bind values it generates if ($varobj) { my $num_pushed = @bind - $bind_size; for my $val (@bind[-$num_pushed..-1]) { $val = [$val, $varobj]; } } $idx++; } return $sql; } # sql_interp helper function. # Interpolate data element in aggregate variable (hashref or arrayref). # $ele - raw input element from aggregate. # returns $sql sub _sql_interp_data { my ($ele) = @_; if (ref $ele) { # e.g. sql() my ($sql2, @bind2) = _sql_interp($ele); push @bind, @bind2; $is_var_used = 1 if ref $bind2[0]; return $sql2; } else { push @bind, $ele; return '?'; } } # sql_interp helper function to interpolate "key IN list", # assuming context ("WHERE", {key => $list, ...}). sub _sql_interp_list { my ($key, $list) = @_; if (@$list == 0) { return "1=0"; } else { my @sqle; for my $ele (@$list) { my $sqle = _sql_interp_data($ele); push @sqle, $sqle; } my $sql2 = $key . " IN (" . join(', ', @sqle) . ")"; return $sql2; } } # sql_interp helper function to interpolate result set, # e.g. [[1,2],[3,4]] or [{a=>1,b=>2},{a=>3,b=>4}]. sub _sql_interp_resultset { my($item) = @_; my $sql = ''; if (ref $item eq 'ARRAY') { _error("table reference has zero rows") # improve? if @$item == 0; my $sql2 = ''; if(ref $item->[0] eq 'ARRAY') { _error("table reference has zero columns") # improve? if @{ $item->[0] } == 0; for my $row ( @$item ) { my $is_first_row = ($sql2 eq ''); $sql2 .= ' UNION ALL ' unless $is_first_row; $sql2 .= "SELECT " . join(', ', map { _sql_interp_data($_) } @$row); } } elsif(ref $item->[0] eq 'HASH') { _error("table reference has zero columns") # improve? if keys %{ $item->[0] } == 0; my $first_row = $item->[0]; for my $row ( @$item ) { my $is_first_row = ($sql2 eq ''); $sql2 .= ' UNION ALL ' unless $is_first_row; $sql2 .= "SELECT " . join(', ', map { my($key, $val) = ($_, $row->{$_}); my $sql3 = _sql_interp_data($val); $sql3 .= " AS $key" if $is_first_row; $sql3; } keys %$first_row); } } else { _error_item($idx, $items_ref); } $sql .= ' ' unless $sql eq ''; $sql .= "($sql2)"; } else { _error_item($idx, $items_ref); } return $sql; } sub sql { return SQL::Interpolate::SQL->new(@_); } sub make_sql_interp { my (@params) = @_; my $interp = sub { return sql_interp(@params, @_); }; return $interp; } sub sql_var { return SQL::Interpolate::Variable->new(@_); } # helper function to throw error sub _error_item { my ($idx, $items_ref) = @_; my $prev = $idx > 0 ? $items_ref->[$idx-1] : undef; my $prev_text = defined($prev) ? " following '$prev'" : ""; my $cur = $items_ref->[$idx]; _error("SQL::Interpolate error: Unrecognized " . "'$cur'$prev_text in interpolation list."); return; } sub _error { croak "SQL::Interpolate error: $_[0]"; } # This shall only be called by SQL::Interpolate::Macro. sub _enable_macros { scalar(caller()) eq 'SQL::Interpolate::Macro' or die 'ASSERT'; $macros_enabled = 1; # enable macros return; } # This shall only be called by DBIx::Interpolate. sub _use_params { scalar(caller()) eq 'DBIx::Interpolate' or die 'ASSERT'; # supported use parameters. return qw(FILTER TRACE_SQL TRACE_FILTER); } # depreciated sub sql_literal { print STDERR "SQL::Interpolate - WARNING: " . "sql_literal() is depreciated. use sql() instead.\n"; return sql(@_); } 1; package SQL::Interpolate::Variable; use strict; use Carp; sub new { my ($class, $value, %params) = @_; SQL::Interpolate::_error( "Value '$value' in sql_var constructor is not a reference") if ! ref $value; my $self = bless {value => $value, %params}, $class; return $self; } 1; package SQL::Interpolate::SQL; use strict; use Carp; use overload '.' => \&concat, '""' => \&stringify; sub new { my ($class, @list) = @_; my $self = \@list; bless $self, $class; return $self; } # Concatenate SQL object with another expression. # An SQL object can be concatenated with another SQL object, # variable reference, or an SQL string. # This is particularly useful to SQL::Interpolate::Filter. sub concat { my ($a, $b, $inverted) = @_; my @params = ( @$a, ref $b eq __PACKAGE__ ? @$b : $b ); @params = reverse @params if $inverted; my $o = SQL::Interpolate::SQL->new(@params); return $o; } sub stringify { my ($a) = @_; return $a; } 1; __END__ =head1 NAME SQL::Interpolate - Interpolate Perl variables into SQL statements =head1 SYNOPSIS use SQL::Interpolate qw(:all); # Some sample data to interpolate: my $s = 'blue'; my @v = (5, 6); # Variable references are transformed into bind parameters. # The most basic usage involves scalarrefs (as well as arrayrefs # preceeded by "IN"). my ($sql, @bind) = sql_interp 'SELECT * FROM table WHERE x = ', \$s, 'AND y IN', \@v; # RESULT: # $sql = 'SELECT * FROM mytable WHERE x = ? AND y IN (?, ?)' # @bind = ($s, @v); # In certain contexts, an arrayref or hashref acts as a single tuple: my ($sql, @bind) = sql_interp 'INSERT INTO table', {x => $s, y => 1}; # RESULT: # $sql = 'INSERT INTO mytable (x, y) VALUES(?, ?)'; # @bind = ($s, 1); my ($sql, @bind) = sql_interp 'UPDATE table SET', {x => $s, y => 1}, 'WHERE y <> ', \2; # RESULT: # $sql = 'UPDATE mytable SET x = ?, y = ? WHERE y <> ?'; # @bind = ($s, 1, 2); # In general, a hashref provides a shortcut for specifying # a logical-AND construction: my ($sql, @bind) = sql_interp 'SELECT * FROM table WHERE', {x => $s, y => \@v}; # RESULT: # $sql = 'SELECT * FROM mytable WHERE (x = ? AND y IN (?, ?))'; # @bind = ($s, @v); # In general, an arrayref acts as a result set or reference to # a temporary table: my ($sql, @bind) = sql_interp [[1, 2], [4, 5]], 'UNION', [{x => 2, y => 3}, {x => 5, y => 6}; # RESULT: # $sql = '(SELECT ?, ? UNION ALL SELECT ?, ?) UNION # (SELECT ? AS x, ? AS y UNION ALL SELECT ?, ?)'; # @bind = (1,2,4,5, 2,3,5,6); my ($sql, @bind) = sql_interp 'SELECT * FROM', [[1, 2], [4, 5]] # RESULT: # $sql = 'SELECT * FROM (SELECT ?, ? UNION ALL SELECT ?, ?) AS tbl0'; # @bind = (1,2,4,5); # Each result above is suitable for passing to DBI: my $res = $dbh->selectall_arrayref($sql, undef, @bind); # Besides these simple techniques shown, SQL-Interpolate includes # various optional modules to further integrate SQL::Interpolate with # DBI and streamline the syntax with source filtering and macros (see # the L section): use DBIx::Interpolate FILTER => 1; ... my $rows = $dbx->selectall_arrayref(sql[ SELECT thid, date, title, subject FROM threads WHERE date > $x AND subject IN @subjects ]); =head1 DESCRIPTION =head2 Purpose SQL::Interpolate interpolates Perl variables into SQL statements in a simplified manner. It converts a list of intermixed SQL fragments and variable references into a conventional SQL string and I suitable for passing onto DBI. When using plain DBI, one traditionally interpolates reliably with bind values, which can become unwieldy: $dbh->do(qq( INSERT INTO table (color, shape, width, height, length) VALUES(?, ?, ?, ?, ? ) ), undef, $c, $s, $w, $h, $l); This can be ameliorated somewhat with "SQL building techniques," but SQL::Interpolate eliminates much of this need with a terse Perl-like syntax: my ($sql, @bind) = sql_interp 'INSERT INTO table', {color => $c, shape => $s, width => $w, height => $h, length => $l}; $dbh->do($sql, undef, @bind); =head2 Security notes SQL::Interpolate properly binds or escapes variables. This recommended practice safeguards against "SQL injection" attacks. The L documentation has several links on the topic. =head1 INTERFACE The central function of this module is C. The rest of this distribution provides alternative interfaces, supporting functionality, and wrappers for C. =head2 C ($sql, @bind) = sql_interp @params; # functional interface ($sql, @bind) = $interp->sql_interp(@params); # OO interface C strings together the given list of elements to interpolate (@params), called the "interpolation list," and returns both an SQL string ($sql) with placeholders ("?") and a corresponding list of bind values (@bind) suitable for passing to DBI. The interpolation list can contain elements of these types (the first two are most often used): * B - string containing a raw SQL fragment such as 'SELECT * FROM mytable WHERE'. * B - scalarref, arrayref, hashref, or L object referring to data to interpolate between the SQL. * B - object that may be further expanded into previous three types of elements. Some strings may expand into macro objects. Macros are explained in L. * B - an interpolation list can be nested inside another interpolation list. This is possible with the sql() function. In addition, the first element in the interpolation list may represent state data in the form of a database handle or (for the OO interface) an instance of SQL::Interpolate. sql_interp and macros may use this state data to customize the output, such as for a dialect of SQL. The basic interpolation process is as follows. Strings are appended to the output SQL ($sql), possibly with some context-dependent tweaking. For each scalar variable reference, a corresponding placeholder ("?") and other SQL is appended to $sql, and the de-referenced value is pushed onto @bind. Nested interpolation lists are generally flattened. B # sample data to interpolate my $s = \3; # scalarref my $v = [1, 2]; # arrayref (vector) my $h = {m => 1, n => undef}; # hashref my $hv = {v => $v, s => $$s}; # hashref containing arrayref my $vv = [$v, $v]; # arrayref of arrayref my $vh = [$h, $h]; # arrayref of hashref Let $x stand for \$s, $v, $h, $hv, $vv, or $vh. B -- single bind value INPUT: 'foo', $s, 'bar' OUTPUT: 'foo ? bar', $$s B -- logical-AND INPUT: 'WHERE', $x OUTPUT: 'WHERE (m=? AND n IS NULL)', $h->{m}, # $x = $h OUTPUT: 'WHERE (v IN (?, ?) AND s = ?)', @$v, $$s # $x = $hv B -- invalid # Arrayrefs do not have a default behavior. They cannot # be used except in the special contexts described later. INPUT: 'SELECT', [1, 2, 3] # INVALID B -- list INPUT: 'WHERE x IN', $x OUTPUT: 'WHERE x IN (?, ?)', @$v # $x = $v OUTPUT: 'WHERE x IN (?)', $$s # $x = $s OUTPUT: 'WHERE 1=0' # $x = [] # Note: An arrayref of length 0 is handled specially # because 'WHERE x IN ()' can be invalid SQL (e.g. MySQL/PostgreSQL). B -- tuple INPUT: 'INSERT INTO mytable', $x OUTPUT: 'INSERT INTO mytable (m, n) VALUES(?, ?)', # $x = $h $h->{m}, $h->{n} OUTPUT: 'INSERT INTO mytable VALUES(?, ?)', @$v; # $x = $v OUTPUT: 'INSERT INTO mytable VALUES(?)', $$s; # $x = $s B -- tuple INPUT: 'UPDATE mytable SET', $h OUTPUT: 'UPDATE mytable SET m = ?, n = ?', $h->{m}, $h->{n} B -- result set INPUT: $x OUTPUT: '(SELECT ?, ? UNION ALL SELECT ?, ?)', map {@$_} @$v # $x = $vv OUTPUT: '(SELECT ? as m, ? as n UNION ALL SELECT ?, ?)', $vh->[0]->{m}, $vh->[0]->{n}, $vh->[1]->{m}, $vh->[1]->{n} # $x = $vh # Typical usage: INPUT: $x INPUT: $x, 'UNION [ALL|DISTINCT]', $x INPUT: 'INSERT INTO mytable', $x INPUT: 'SELECT * FROM mytable WHERE x IN', $x B -- table reference IN: 'SELECT * FROM', $x OUT: 'SELECT * FROM (SELECT ?, ? UNION ALL SELECT ?, ?) as t001', map {@$_} @$v # $x = $vv OUT: 'SELECT * FROM (SELECT ? as m, ? as n UNION ALL SELECT ?, ?) as temp001', $vh->[0]->{m}, $vh->[0]->{n}, $vh->[1]->{m}, $vh->[1]->{n} # $x = $vh IN: 'SELECT * FROM', $vv, 'AS t' OUT: 'SELECT * FROM (SELECT ?, ? UNION ALL SELECT ?, ?) AS t', map {@$_} @$v # Example usage (where $x and $y are table references): 'SELECT * FROM', $x, 'JOIN', $y B Whitespace is automatically added between parameters: INPUT: 'UPDATE', 'mytable SET', {x => 2}, 'WHERE y IN', \@colors; OUTPUT: 'UPDATE mytable SET x = ? WHERE y in (?, ?)', 2, @colors Variables must be passed as references (possibly using the sql// operator when source filtering is enabled); otherwise, they will processed as SQL fragments and interpolated verbatim into the result SQL string, negating the security and performance benefits of binding values. In contrast, any scalar values I an arrayref or hashref are by default treated as binding variables, not SQL. The contained elements may be also be sql_var(), sql(), or macro objects. sql_interp will Do The Right Thing(TM) on trivial cases: INPUT: 'SELECT * FROM table WHERE color IN', [] OUTPUT: 'SELECT * FROM table WHERE 1=0' # invalid to MySQL: SELECT * FROM table WHERE color IN () SQL::Interpolate does not attempt to further optimize away such expressions. Databases are designed to do query optimization, without loss of generality. Variable interpolation is context-sensitive. The same variable references can generate different SQL sub-expressions depending on context: INPUT: 'INSERT INTO mytable', $h OUTPUT: 'INSERT INTO mytable (m, n) VALUES(?, ?)', ... INPUT: 'UPDATE mytable SET', $h OUTPUT: 'UPDATE mytable SET m = ?, n = ?', ... B On error, sql_interp will croak with a string message. B An interesting property is that result sets allow passing DBI results back into the interpolation list: # DBIx::Interpolate example (note: $vv2 is identical to $vv) my $vv2 = $dbx->selectall_arrayref($dbx->selectall_arrayref($vv)); In certain cases (e.g. for INSERT and IN), a result set can be used as an alternative to a single tuple: # (tuple) INPUT: 'SELECT * FROM mytable WHERE x IN', $v OUTPUT: 'SELECT * FROM mytable WHERE x IN (?, ?)', @$v # (subselect) INPUT: 'SELECT * FROM mytable WHERE x IN', [$v] OUTPUT: 'SELECT * FROM mytable WHERE x IN (SELECT ?, ?)', @$v # (tuple) INPUT: 'INSERT INTO mytable', $v OUTPUT: 'INSERT INTO mytable VALUES (?, ?)', @$v # (subselect) INPUT: 'INSERT INTO mytable', [$v] OUTPUT: 'INSERT INTO mytable (SELECT ?, ?)', @$v =head2 C my $sqlobj = sql(@params); C creates an object of type SQL::Interpolate::SQL representing an interpolation list of the elements in @params. sql() is useful only in exceptional cases. For example, if you want insert raw SQL as a value in an arrayref or hashref, it cannot be done with a plain string because any scalar value in an arrayref or hashref is interpreted as a binding variable. An sql object must be used explicitly for force a change in context: sql_interp 'INSERT INTO mytable', {x => $x, y => sql('CURRENT_TIMESTAMP')}; # OUTPUT: 'INSERT INTO mytable (x, y) VALUES(?, CURRENT_TIMESTAMP)', $x sql_interp 'INSERT INTO mytable', [$x, sql(\$y, '*', \$z)]; # OUTPUT: 'INSERT INTO mytable VALUES(?, ? * ?)', $x, $y, $z =head2 C my $sqlvar = sql_var($value_ref, type => $sql_type, %params); C create an I object (of type SQL::Interpolate::Variable), which provides a general way to represent a binding variable I metadata. $value_ref - variable reference contained $sql_type - any DBI SQL_DATA_TYPE (e.g. SQL_INTEGER). Optional. Default is undef. Any other named parameters (%params) passed in will be saved into the object as attributes. sql_var objects are useful only in special cases where additional information should be tagged onto the variable. For example, DBI allows bind variables to be given an explicit type: my ($sql, @bind) = sql_interp 'SELECT * FROM mytable WHERE', 'x=', \$x, 'AND y=', sql_var(\$y, SQL_VARCHAR), 'AND z IN', sql_var([1, 2], SQL_INTEGER); # RESULT: @bind = # ([$x, sql_var(\$x)], [$y, sql_var(\$y, type => SQL_VARCHAR)], # [1, sql_var([1, 2], type => SQL_INTEGER)], # [2, sql_var([1, 2], type => SQL_INTEGER)]); If the interpolation list contains at least one sql_var object, then all the variable references are transparently converted into sql_var objects, and the elements of @bind take a special form: an arrayref consisting of the bind value and the sql_var object that generated the bind value. Note that a single sql_var holding an aggregate (arrayref or hashref) may generate multiple bind values. See L for example usage. =head2 C my $interp = SQL::Interpolate->new([$dbh|$filter]...); @result = $interp->sql_interp([$dbh|$filter]...); Creates a new SQL::Interpolate object, which can configure the interpolation process. The arguments can be - $dbh - zero or one DBI database handle. - $filter - zero or more SQL filters (derived from L). The OO interface often is not needed, but it is useful if you need to configure the behavior of many calls to sql_interp, such as when using some macros. =head2 C my $sql_interp = make_sql_interp(@params); # functional my $sql_interp = $interp->make_sql_interp(@params); # OO Creates a closure that wraps the sql_interp function such that the parameters passed to the sql_interp consist of @params following by the parameters passed to the closure. This function is typically used to eliminate a need to always pass in a database handle into sql_interp: my $interp = make_sql_interp($dbh); my ($sql, @bind) = $interp->(...); =head2 Exports and use parameters =over 4 =item TRACE_SQL To enable tracing on C, do use SQL::Interpolate TRACE_SQL => 1; The generated SQL statements and bind values of all C calls will be sent to STDERR. DEBUG:interp[sql=INSERT INTO mytable VALUES(?),bind=5] =item EXPORTS use SQL::Interpolate qw(:all); ':all' exports these functions: make_sql_interp, sql_interp, sql_var, and sql. =back =head1 DEPENDENCIES This module has no major dependencies. If using the optional modules, see their DEPENDENCIES sections. =head1 ADDITIONAL EXAMPLES These are more advanced examples. =head2 Preparing and reusing a statement handle my $sth; for my $href (@array_of_hashrefs) { my @list = ('SELECT * FROM mytable WHERE', $href); my ($sql, @bind) = sql_interp @list; die 'ASSERT' if $sth && $sth->{Statement} ne $sql; $sth = $dbh->prepare($sql) unless $sth; $sth->execute(@list); $sth->fetchall_arrayref(); } The above code requires that $sql never changes. If $sql does change, you would have to prepare a new statement handle. DBIx::Interpolate implements a streamlined solution that caches statement handles. =head2 Binding variables types (DBI bind_param) my ($sql, @bind) = sql_interp 'SELECT * FROM mytable WHERE', 'x=', \$x, 'AND y=', sql_var(\$y, SQL_VARCHAR), 'AND z IN', sql_var([1, 2], SQL_INTEGER); # RESULT: # @bind = ([$x, sql_var(\$x)], [$y, sql_var(\$y, type => SQL_VARCHAR)], # [1, sql_var([1, 2], type => SQL_INTEGER)], # [2, sql_var([1, 2], type => SQL_INTEGER)]); die 'ASSERT' if ref $bind[0] ne 'ARRAY'; my $sth = $dbh->prepare($sql); my $idx = 1; for my $var (@bind) { $sth->bind_param($idx++, $var->[0], $var->[1]->{type}); } $sth->execute(); my $ret = $sth->selectall_arrayref(); This kludge is similar to the approach in L. DBIx::Interpolate provides a simpler way of handling bind_type. =head1 DESIGN NOTES The section covers the design choices used in this module. =head2 Philosophy and requirements These principles have guided the design of SQL-Interpolate. B. (Mark Stosberg) SQL-Interpolate has one central function, sql_interp, which is relatively simple and reusable in itself such that all other functionality is built upon it. Complicated (macro) and less robust (source filtering) capabilities have been extracted out of the core module and into optional modules. Source filtering, for example, is optional and off by default since many distrust source filtering and fear it will cause bugs that are especially difficult to debug because "it's not Perl anymore." B. This module necessarily imposes I overhead, largely due to the added string and regex processing. The author has not quantified this overhead but expects it to be low compared to database concerns such as disk access and query processing and network concerns such as latency. It may be possible to avoid rerunning C when only the binding variables change (e.g. my ($sql, $bindobj) = sql_interp(...); @bind = $bindobj->(x => 1); @bind = $bindobj->(x => 2)), but this is probably does not provide much benefit. B. There are other modules (such as SQL::Abstract) that abstract the SQL language behind either object-oriented (OO) method calls and/or Perl data structures (hashes and arrays). The former may be undesirable in some cases since it replaces one language with another and hides the full capabilities and expressiveness of your database's native SQL language. The latter may load too much meaning into the syntax of "{, "[" and "\" thereby rendering the meaning less clear: SQL::Abstract example: %where = (lname => {like => '%son%'}, age => [-and => {'>=', 10}, {'<=', 20}]) Plain SQL: "lname LIKE '%son' AND (age >= 10 AND age <= 20)" In contrast, SQL::Interpolate does not abstract away your SQL but rather makes it easier to interpolate Perl variables into your SQL. Now, SQL::Interpolate I load some meaning into "{, "[" and "\", but we try to limit its use to obvious cases as justified below. Since your raw SQL is exposed, you can use your particular dialect of SQL, assuming the variable interpolation rules are relatively normal. Database independence is a worthy goal, but it can be quite difficult to achieve and is beyond the scope of SQL::Interpolate (though you might wish to build such features on-top-of SQL::Interpolate). B The syntax is intended to be natural and terse for the most common cases. This is demonstrated in the examples. Now, it may be a bit inconsistent that a hashref has two meanings. The hashref in ('WHERE', \%hash) represents a logical AND-equal construction, whereas the hashref in ('INSERT INTO mytable', \%hash) and ('UPDATE mytable SET', \%hash) represents a tuple or portion of it. However, there is little ambiguity since a swap of the two meanings results in illogical statements. There is a limited number of plausible meanings and these constructions, and these two are the most common and useful ones in practice. Admittedly, the former case might alternately be understood as an logical OR (rather than AND) construction, but it the AND construction is more common in a WHERE clause and a natural "Do What I Mean." (Similarly, a query "Perl MySQL" posed to a search engine usually implies "Perl AND MySQL" not "Perl OR MySQL.) In the latter interpretation of \%hash, the hashref very well models a tuple that is more named rather than ordered. Using an arrayref [x => $x, y => $y] rather than a hashref for the AND'ed construction could work just as well, and it allows duplicate keys and non-scalar keys. However, SQL::Interpolate reserves [...] for future use. SQL::Interpolate interprets an arrayref inside a hashref such as {x => \@y, ...} as an 'x IN y' construction. This was independently suggested by a number of people and unlikely to be confused with the 'x = y' since and x and y have different dimensions (one is scalar and the other is a vector). It may be a bit inconsistent that scalars inside hashrefs and arrayrefs are interpreted as binding variables rather than SQL as is the case outside. 'WHERE', 'x = ', \$x # variables outside must be referenced 'WHERE', {x => $x} # variables inside should not be referenced 'WHERE', [$x,$y] # variables inside should not be referenced However, this not too much a stretch of logicality, and the alternatives are not pretty and do not satisfy the commonest case. Consider: 'WHERE', {x => \$x, y => \$y, z => 'CURRENT_TIMESTAMP'} 'WHERE x IN', [\1, \2, \3] 'WHERE x IN', \\@colors # double referencing Exceptions from the commonest case require C. =head2 Limitations / characteristics This module is still a bit under development, so interfaces could change some, particularly in the more esoteric features. Still, it is expected you will find this module quite stable, robust, tested, simple, flexible, and well documented. If you're new to this module, it's a good idea to examine the generated SQL (e.g. the TRACE_SQL option) to ensure you're getting what you think you're getting. Be careful to reference the variables you interpolate to prevent SQL injection (see discussion in L). This module does not parse your SQL fragments except to the extent required for variable interpolation, so it does not guarantee that the generated SQL is valid but leaves that responsibility to your database. This is intentional so that it will works well even with non-standard SQL dialects. Some types of interpolation are context-sensitive and involve examination of your SQL fragments. The examination could fail on obscure syntax, but it is often simple and robust. Look at the examples to see the types of interpolation that are accepted, and if doubt, examine the SQL output yourself (use Data::Dumper to examine the output of sql_interp or enable the TRACE_SQL option) or look at the source code of sql_interp. If needed, you can disable context sensitivity by inserting a null-string before a variable. A few things are just not possible with the ('WHERE', \%hashref) syntax, so in such case, use a more direct syntax: # ok--direct syntax sql_interp '...WHERE', {x => $x, y => $y}, 'AND y = z'; # bad--trying to impose a hashref but keys must be scalars and be unique sql_interp '...WHERE', {sql_var(\$x) => sql('x'), y => $y, y => sql('z')}; In the cases where this module parses or generates SQL fragments, this module should work for many databases, but its been tested mostly on MySQL and PostgreSQL. Please information the author of any incompatibilities. =head2 Proposed enhancements The following enhancements to SQL::Interpolate have been proposed (not all may be implemented). The most important suggestions are generally listed at top. A tutorial could be useful. (Wojciech) This might be useful: IN: 'bla IN', [undef, 2] OUT: '(blah IS NULL OR bla IN (?))', 2 # possilby also: {bla => [undef, 2]} Possibly support result sets inside the hashref logical-AND construct: IN: {x => [[1],[2]]} OUT: 'x IN (SELECT ? UNION ALL SELECT ?)', 1, 2 Possibly support naming columns on table references when expressed as an arrayref of arrayrefs. Denoting this might require a typed object: IN: 'SELECT * FROM', sql_table($vv, ['x', 'y']) OUT: 'SELECT * FROM (SELECT ? AS x, ? AS y UNION ALL SELECT ?, ?) as t001', map {@$_} @$v The following additional type of INSERT might be supported (markt): # possible syntax IN: 'INSERT INTO temp (id,val)', [1,2] IN: 'INSERT INTO temp', sql_row([1, 2], ['id', 'val']) IN: 'INSERT INTO temp', sql_row(id => 1, val => 2) # note: an equivalent is currently possible with result sets: IN: 'INSERT INTO temp (id,val)', [[1,2]] OUT: 'INSERT INTO temp (id,val) (SELECT 1, 2)' Possibly support MySQL and DB2 (any other databases?) style multi-row INSERT syntax (unlikely): # MySQL INSERT INTO table (a,b,c) VALUES (1,2,3),(4,5,6) # But this might not be needed since it can be done already # with result sets (e.g. MySQL and PostgreSQL): INSERT INTO table (SELECT 1,2,3 UNION SELECT 4,5,6) Support for placeholders might be added for cases when placing the variable references in-line is inconvenient or not desired (similar to SQL::String): IN: 'SELECT * FROM mytable WHERE x = ? and y = ?', \$x, \$y OUT: 'SELECT * FROM mytable WHERE x = ? and y = ?', $x, $y IN: 'SELECT * FROM mytable WHERE x = :x and y = :y', {x => $x, y => $y} OUT: 'SELECT * FROM mytable WHERE x = ? and y = ?', $x, $y Similarly, named placeholders might be supported (possibly via sql_var()): 'SELECT * FROM mytable WHERE', 'x=', sql_var('color'), 'and', {val => sql_var('weight')}, sql_val(weight => 100), sql_val(color => 'blue') Another idea to to add some methods to the sql() objects that allow sql_interp to be called in a more object-oriented manner (like in SQL::String): my @bind = sql('WHERE x=', \$x)->get_bind(); my $sql = sql('WHERE x=', \$x)->get_sql(); However, it's not certain that those methods would be used that much. Possibly allow context to be explicitly specified: IN: 'TABLEREF', [[1]] OUT: '(SELECT ?) as tbl0', 1 IN: 'RESULTSET', [[1]] OUT: '(SELECT ?)', 1 IN: 'ANDCLAUSE', {x => 1} OUT: '(x = ?)', 1 IN: 'TUPLE', {x => 1} OUT: 'x = ?', 1 # or possibly use macros IN: sql_tableref( [[1]] ) OUT: '(SELECT ?) as tbl0', 1 sql_and and sql_or macros might support hashrefs as well (the former for clarity and the latter for terseness): 'SELECT * FROM mytable WHERE', {x => 2, y => 5} 'SELECT * FROM mytable WHERE', sql_and {x => 2, y => 5} # same as above 'SELECT * FROM mytable WHERE', sql_or {x => 2, y => 5} Logical operations might be supported as follows (e.g. junctions). This might be rejected due to unintuitive syntax. 'SELECT * FROM mytable WHERE', { x => sql_not(2), # x <> 2 y => 5 } 'SELECT * FROM mytable WHERE', sql_or { # might be difficult to read x => sql_or(2, 3, sql_and(4, 5)), y => 5 } # or 'SELECT * FROM mytable WHERE', { sql_not(x => 2), # x <> 2 sql_lt(y => 5), # y < 5 } Support for tuples (e.g. MySQL/PostgreSQL) might be added, but this is probably too uncommon to be worthwhile to implement: INPUT: 'SELECT * FROM edge WHERE', {'(sid, did)' => [5, 1]} OUTPUT: 'SELECT * FROM edge WHERE (sid, did) = (?, ?)', 5, 1 INPUT: 'SELECT * FROM edge WHERE', {'(sid, did)' => [[5, 1], [2, 3]]} OUTPUT: 'SELECT * FROM edge WHERE (sid, did) IN ((5,1),(2,3))', 5, 1, 2, 3 # MySQL (not PostgreSQL) OUTPUT: 'SELECT * FROM edge WHERE (sid, did) IN (SELECT ?,? UNION ALL ?,?)', 5, 1, 2, 3 # MySQL or PostgreSQL =head2 Implementation notes Oracle (unlike MySQL) does not allow 'WHERE id = 5 and 1' nor 'WHERE id = 5 or 0'. SQL::Interpolate therefore generates the more portable 'WHERE id = 5 and 1=1' and 'WHERE id = 5 or 1=0'. =head1 CONTRIBUTORS David Manura (L) (author). Feedback incorporated from Mark Stosberg (L) (recommended simplifying the core module, simplified the docs, and provided a bunch of other highly useful feedback), Mark Tiefenbruck (syntax), Wojciech Pietron (Oracle compat), Jim Chromie (DBIx::Interpolate idea), Juerd Waalboer, Terrence Brannon (early feedback), and others. =head1 FEEDBACK Bug reports and comments on the design are most welcome. rt.cpan.org can be used for specific bug reporting, but you may also use the discussion list (L) or contact me directly (L). See also the project page at L. =head1 LEGAL Copyright (c) 2003-2005, David Manura. This module is free software. It may be used, redistributed and/or modified under the same terms as Perl itself. See L. =head1 SEE ALSO =head2 Other modules in this distribution L extends this module slightly, allowing certain DBI methods to accept an C-like interpolation list rather than the traditional ($statement, \%attr, @bind_values)-like parameter list. L streamlines the the SQL::Interpolate syntax with direct SQL interpolation analogous to Perl string interpolation. L provides an assortment of macros and SQL filters, which allow you to write simpler, more robust, and possibly more portable queries. =head2 Related modules L shares with C the purpose of making SQL generation easier. SQL::Abstract differs in that it expresses queries in terms of OO method calls. It's syntax may impair readability because it uses the subtle difference between a brace and bracket to denote the difference between AND and OR in a query (the user can change whether a bracket implies "AND" or "OR"). Some complex where clauses are difficult or impossible with L. SQL::Interpolate gives the author more direct access to the underlying SQL. This permits using the full expressivity of the database query language. L strives to simplify SQL generation as well as the data structures returned from L. C and the related modules don't try to handle C's results at all. C currently can use L to help generate SQL, and it may be possible in the future for the same to be done with C. L is a popular "complete" solution for abstract database access through an OO interface. It currently has a plugin called L that allows it to use C to generate SQL. It's possible that C could be integrated with it as well. L provides an "embedded SQL" approach where the Perl and SQL languages are extended (via source filtering) to support interwoven Perl and SQL. The syntax supports interpolating Perl variables into SQL and passing query results back into Perl variables. In contrast, SQL::Interpolate extends neither SQL nor Perl (except with the optional source filtering module), and it deals only with interpolating Perl variables into queries, whereas returning variables from queries is the job of something like DBI, DBIx::Interpolate, or DBIx::Simple. L shares a number of similiarities to SQL::Interpolate but it is more rudimentary. Both let you combine "chunks" of SQL that have their parameters attached to them and then transform it into an SQL string and list of bind parameters suitable for passing to DBI. =head2 Related resources SQL Interpolate Project Page: L . Full example code - Meset::MessageBoard in Meset (L). =cut