package SQL::Interp; our $VERSION = '1.10'; use strict; use warnings; use Carp; use base 'Exporter'; our @EXPORT; our %EXPORT_TAGS = (all => [qw( sql_interp sql_type sql )]); our @EXPORT_OK = @{ $EXPORT_TAGS{all} }; # whether TRACE_SQL is enabled my $trace_sql_enabled = $ENV{TRACE_SQL} || 0; # regexes my $id_match = qr/[a-zA-Z_][a-zA-Z0-9_\$\.]*/; my $table_name_match = $id_match; # next ID to use for table alias # [local to sql_interp functions] my $alias_id = 0; # current index in interpolation list # [local to sql_interp functions] my $idx = 0; # current interpolation list # [local to sql_interp functions] my $items_ref = undef; # whether typed sql_type() ever used (if so, # format of @bind result is more complicated) # [local to sql_interp functions] my $is_var_used = 0; # state item (SQL::Iterpolate or DBI handle) used in interpolation. # [local to sql_interp functions] my $state = undef; # bind elements in interpolation # [local to sql_interp functions] my @bind; sub import { my $class = shift; my @params = @_; # process any special "use" parameters my $is_wrapped = 0; # whether module wrapped # (e.g. by DBIx::Interp) my %action_for = ( TRACE_SQL => sub { $trace_sql_enabled = shift @params; print STDERR "TRACE_SQL enabled\n" if $trace_sql_enabled; }, __WRAP => sub { $is_wrapped = shift @params; } ); @_ = ($class); # unprocessed params while (my $item = shift @params) { my $action = $action_for{$item}; if ($action) { $action->(); } else { push @_, $item; } } # handle exports my $level = $is_wrapped ? 2 : 1; __PACKAGE__->export_to_level($level, @_); return; } # only used by DBIx::Interp, so not further documented here sub new { my $class = shift; # process special params. my $dbh; while (ref $_[0] ne '') { if (UNIVERSAL::isa($_[0], 'DBI::db')) { $dbh = shift; } } my %params = @_; # build object my $self = bless { dbh => $dbh, }, $class; return $self; } # note: sql_interp is not reentrant. sub sql_interp { my @items = @_; # clear call state $alias_id = 0; $idx = 0; $items_ref = undef; $is_var_used = 0; $state = undef; @bind = (); # extract state item (if any) my $interp; if (UNIVERSAL::isa($items[0], 'SQL::Interp')) { $state = $interp = $items[0]; } elsif (UNIVERSAL::isa($items[0], 'DBI::db')) { $state = $items[0]; } shift @items if $state; $items_ref = \@items; # interpolate! my $sql = _sql_interp(@items); # convert bind values to complex format (if needed) if ($is_var_used) { for my $val (@bind) { my $valcopy = $val; ! ref $val and $val = [$val, sql_type(\$valcopy)]; } } $trace_sql_enabled and print STDERR "DEBUG:interp[sql=$sql,bind=" . join(':', @bind) . "]\n"; return ($sql, @bind); } # helper called by sql_interp() # @items - interpolation list sub _sql_interp { my (@items) = @_; my $sql = ''; foreach my $item (@items) { my $varobj; my $bind_size = @bind; if (ref $item eq 'SQL::Interp::Variable') { unless (keys %$item == 1 && defined($item->{value})) { $varobj = $item; $is_var_used = 1; } $item = $item->{value}; } if (ref $item eq 'SQL::Interp::SQL') { my ($sql2, @bind2) = _sql_interp(@$item); $sql .= ' ' if $sql ne ''; $sql .= $sql2; push @bind, @bind2; } elsif (ref $item) { if ($sql =~ /\b(NOT\s+)?IN\s*$/si) { my $not = quotemeta($1 || ''); $item = [ $$item ] if ref $item eq 'SCALAR'; # allow double references $item = $$item if ref $item eq 'REF' ; if (ref $item eq 'ARRAY') { if (@$item == 0) { my $dummy_expr = $not ? '1=1' : '1=0'; $sql =~ s/$id_match\s+${not}IN\s*$/$dummy_expr/si or croak 'ASSERT'; } else { $sql .= " (" . join(', ', map { _sql_interp_data($_); } @$item) . ")"; } } else { _error_item($idx, \@items); } } elsif ($sql =~ /\b(?:ON\s+DUPLICATE\s+KEY\s+UPDATE|SET)\s*$/si && ref $item eq 'HASH') { _error('Hash has zero elements.') if keys %$item == 0; $sql .= " " . join(', ', map { my $key = $_; my $val = $item->{$key}; "$key=" . _sql_interp_data($val); } (sort keys %$item)); } elsif ($sql =~ /\b(REPLACE|INSERT)[\w\s]*\sINTO\s*$id_match\s*$/si) { $item = [ $$item ] if ref $item eq 'SCALAR'; if (ref $item eq 'ARRAY') { $sql .= " VALUES(" . join(', ', map { _sql_interp_data($_); } @$item) . ")"; } elsif (ref $item eq 'HASH') { my @keyseq = sort keys %$item; $sql .= " (" . join(', ', @keyseq) . ")" . " VALUES(" . join(', ', map { _sql_interp_data($item->{$_}); } @keyseq) . ")"; } else { _error_item($idx, \@items); } } elsif ($sql =~ /(?:\bFROM|JOIN)\s*$/si) { # table reference # get alias for table my $table_alias = undef; # alias given to table my $next_item = $items[$idx + 1]; if(defined $next_item && ref $next_item eq '' && $next_item =~ /\s*AS\b/is) { $table_alias = undef; # provided by client } else { $table_alias = 'tbl' . $alias_id++; } $sql .= ' ' unless $sql eq ''; $sql .= _sql_interp_resultset($item); $sql .= " AS $table_alias" if defined $table_alias; } elsif (ref $item eq 'SCALAR') { push @bind, $$item; $sql .= ' ?'; } elsif (ref $item eq 'HASH') { # e.g. WHERE {x = 3, y = 4} if (keys %$item == 0) { $sql .= ' 1=1'; } else { my $s = join ' AND ', map { my $key = $_; my $val = $item->{$key}; if (! defined $val) { "$key IS NULL"; } elsif (ref $val eq 'ARRAY') { _sql_interp_list($key, $val); } else { "$key=" . _sql_interp_data($val); } } (sort keys %$item); $s = "($s)" if keys %$item > 1; $s = " $s"; $sql .= $s; } } elsif (ref $item eq 'ARRAY') { # result set $sql .= ' ' unless $sql eq ''; $sql .= _sql_interp_resultset($item); } else { _error_item($idx, \@items); } } else { $sql .= ' ' unless $sql =~ /(^|\s)$/ || $item =~ /^\s/; # style $sql .= $item; } # attach $varobj to any bind values it generates if ($varobj) { my $num_pushed = @bind - $bind_size; for my $val (@bind[-$num_pushed..-1]) { $val = [$val, $varobj]; } } $idx++; } return $sql; } # sql_interp helper function. # Interpolate data element in aggregate variable (hashref or arrayref). # $ele - raw input element from aggregate. # returns $sql sub _sql_interp_data { my ($ele) = @_; if (ref $ele) { # e.g. sql() my ($sql2, @bind2) = _sql_interp($ele); push @bind, @bind2; $is_var_used = 1 if ref $bind2[0]; return $sql2; } else { push @bind, $ele; return '?'; } } # sql_interp helper function to interpolate "key IN list", # assuming context ("WHERE", {key => $list, ...}). sub _sql_interp_list { my ($key, $list) = @_; if (@$list == 0) { return "1=0"; } else { my @sqle; for my $ele (@$list) { my $sqle = _sql_interp_data($ele); push @sqle, $sqle; } my $sql2 = $key . " IN (" . join(', ', @sqle) . ")"; return $sql2; } } # sql_interp helper function to interpolate result set, # e.g. [[1,2],[3,4]] or [{a=>1,b=>2},{a=>3,b=>4}]. sub _sql_interp_resultset { my($item) = @_; my $sql = ''; if (ref $item eq 'ARRAY') { _error("table reference has zero rows") # improve? if @$item == 0; my $sql2 = ''; if(ref $item->[0] eq 'ARRAY') { _error("table reference has zero columns") # improve? if @{ $item->[0] } == 0; for my $row ( @$item ) { my $is_first_row = ($sql2 eq ''); $sql2 .= ' UNION ALL ' unless $is_first_row; $sql2 .= "SELECT " . join(', ', map { _sql_interp_data($_) } @$row); } } elsif(ref $item->[0] eq 'HASH') { _error("table reference has zero columns") # improve? if keys %{ $item->[0] } == 0; my $first_row = $item->[0]; for my $row ( @$item ) { my $is_first_row = ($sql2 eq ''); $sql2 .= ' UNION ALL ' unless $is_first_row; $sql2 .= "SELECT " . join(', ', map { my($key, $val) = ($_, $row->{$_}); my $sql3 = _sql_interp_data($val); $sql3 .= " AS $key" if $is_first_row; $sql3; } (sort keys %$first_row)); } } else { _error_item($idx, $items_ref); } $sql .= ' ' unless $sql eq ''; $sql .= "($sql2)"; } else { _error_item($idx, $items_ref); } return $sql; } sub sql { return SQL::Interp::SQL->new(@_); } sub sql_type { return SQL::Interp::Variable->new(@_); } # helper function to throw error sub _error_item { my ($idx, $items_ref) = @_; my $prev = $idx > 0 ? $items_ref->[$idx-1] : undef; my $prev_text = defined($prev) ? " following '$prev'" : ""; my $cur = $items_ref->[$idx]; _error("SQL::Interp error: Unrecognized " . "'$cur'$prev_text in interpolation list."); return; } sub _error { croak "SQL::Interp error: $_[0]"; } # This shall only be called by DBIx::Interp. sub _use_params { scalar(caller()) eq 'DBIx::Interp' or die 'ASSERT'; # supported use parameters. return qw(TRACE_SQL); } 1; package SQL::Interp::Variable; use strict; use Carp; sub new { my ($class, $value, %params) = @_; SQL::Interp::_error( "Value '$value' in sql_type constructor is not a reference") if ! ref $value; my $self = bless {value => $value, %params}, $class; return $self; } 1; package SQL::Interp::SQL; use strict; use Carp; use overload '.' => \&concat, '""' => \&stringify; sub new { my ($class, @list) = @_; my $self = \@list; bless $self, $class; return $self; } # Concatenate SQL object with another expression. # An SQL object can be concatenated with another SQL object, # variable reference, or an SQL string. sub concat { my ($a, $b, $inverted) = @_; my @params = ( @$a, ref $b eq __PACKAGE__ ? @$b : $b ); @params = reverse @params if $inverted; my $o = SQL::Interp::SQL->new(@params); return $o; } sub stringify { my ($a) = @_; return $a; } 1; __END__ =head1 NAME SQL::Interp - Interpolate Perl variables into SQL statements =head1 SYNOPSIS use SQL::Interp ':all'; my ($sql, @bind) = sql_interp 'INSERT INTO table', \%item; my ($sql, @bind) = sql_interp 'UPDATE table SET', \%item, 'WHERE y <> ', \2; my ($sql, @bind) = sql_interp 'DELETE FROM table WHERE y = ', \2; # These two select syntax produce the same result my ($sql, @bind) = sql_interp 'SELECT * FROM table WHERE x = ', \$s, 'AND y IN', \@v; my ($sql, @bind) = sql_interp 'SELECT * FROM table WHERE', {x => $s, y => \@v}; =head1 DESCRIPTION SQL::Interp converts a list of intermixed SQL fragments and variable references into a conventional SQL string and I suitable for passing onto DBI. This simple technique creates database calls that are simpler to create and easier to read, while still giving you full access to custom SQL. SQL::Interp properly binds or escapes variables. This recommended practice safeguards against "SQL injection" attacks. The L documentation has several links on the topic. Besides the simple techniques shown above, The SQL-Interpolate distribution includes the optional L module, which integrates with DBI: use DBIx::Interp ':all'; ... my $rows = $dbx->selectall_arrayref(" SELECT title FROM threads WHERE date > ",\$x," AND subject IN ",\@subjects ); Since DBIx::Interp still allows you complete access to the DBI API, using it as wrapper is recommended for most applications. =head1 The One Function You Really Need =head2 C ($sql, @bind) = sql_interp @params; C is the one central function you need to know. C strings together the given list of elements and returns both an SQL string ($sql) with placeholders ("?") and a corresponding list of bind values (@bind) suitable for passing to DBI. The interpolation list can contain elements of these types: * B - string containing a raw SQL fragment such as 'SELECT * FROM mytable WHERE'. * B - scalarref, arrayref, hashref, or A L object referring to data to interpolate between the SQL. * B - an interpolation list can be nested inside another interpolation list. This is possible with the L function. B The following variable names will be used in the below examples: $sref = \3; # scalarref $aref = [1, 2]; # arrayref $href = {m => 1, n => undef}; # hashref $hv = {v => $v, s => $$s}; # hashref containing arrayref $vv = [$v, $v]; # arrayref of arrayref $vh = [$h, $h]; # arrayref of hashref Let $x stand for any of these. =head3 Default scalarref behavior A scalarref becomes a single bind value. IN: 'foo', $sref, 'bar' OUT: 'foo ? bar', $$sref =head3 Default hashref behavior A hashref becomes a logical AND IN: 'WHERE', $href OUT: 'WHERE (m=? AND n IS NULL)', $h->{m}, IN: 'WHERE', $hv OUT: 'WHERE (v IN (?, ?) AND s = ?)', @$v, $$s =head3 Default arrayref of (hashref or arrayref) behavior I IN: $vv OUT: '(SELECT ?, ? UNION ALL SELECT ?, ?)', map {@$_} @$v IN: $vh OUT: '(SELECT ? as m, ? as n UNION ALL SELECT ?, ?)', $vh->[0]->{m}, $vh->[0]->{n}, $vh->[1]->{m}, $vh->[1]->{n} # Typical usage: IN: $x IN: $x, 'UNION [ALL|DISTINCT]', $x IN: 'INSERT INTO mytable', $x IN: 'SELECT * FROM mytable WHERE x IN', $x =head3 Context ('IN', $x) A scalarref or arrayref can used to form an "IN" clause. As a convenience, a reference to an arrayref is also accepted. This way, you can simply provide a reference to a value which may be a single-valued scalar or a multi-valued arrayref. IN: 'WHERE x IN', $aref OUT: 'WHERE x IN (?, ?)', @$aref IN: 'WHERE x IN', $sref OUT: 'WHERE x IN (?)', $$sref IN: 'WHERE x IN', [] OUT: 'WHERE 1=0' IN: 'WHERE x NOT IN', [] OUT: 'WHERE 1=1' =head3 Context ('INSERT INTO tablename', $x) IN: 'INSERT INTO mytable', $href OUT: 'INSERT INTO mytable (m, n) VALUES(?, ?)', $href->{m}, $href->{n} IN: 'INSERT INTO mytable', $aref OUT: 'INSERT INTO mytable VALUES(?, ?)', @$aref; IN: 'INSERT INTO mytable', $sref OUT: 'INSERT INTO mytable VALUES(?)', $$sref; MySQL's "REPLACE INTO" is supported the same way. =head3 Context ('SET', $x) IN: 'UPDATE mytable SET', $href OUT: 'UPDATE mytable SET m = ?, n = ?', $href->{m}, $href->{n} MySQL's "ON DUPLICATE KEY UPDATE" is supported the same way. =head3 Context ('FROM | JOIN', $x) I IN: 'SELECT * FROM', $vv OUT: 'SELECT * FROM (SELECT ?, ? UNION ALL SELECT ?, ?) as t001', map {@$_} @$v IN: 'SELECT * FROM', $vh OUT: 'SELECT * FROM (SELECT ? as m, ? as n UNION ALL SELECT ?, ?) as temp001', $vh->[0]->{m}, $vh->[0]->{n}, $vh->[1]->{m}, $vh->[1]->{n} IN: 'SELECT * FROM', $vv, 'AS t' OUT: 'SELECT * FROM (SELECT ?, ? UNION ALL SELECT ?, ?) AS t', map {@$_} @$v # Example usage (where $x and $y are table references): 'SELECT * FROM', $x, 'JOIN', $y =head3 Preparing and reusing a statement handle The following code reuses a statement handle and even reprepares the statement handle if the SQL changes. L provides a streamlined solution that transparently caches statement handles. my $sth; for my $href (@array_of_hashrefs) { my @list = ('SELECT * FROM mytable WHERE', $href); my ($sql, @bind) = sql_interp @list; if (! defined $sth || $sth->{Statement} ne $sql) { $sth = $dbh->prepare($sql); } $sth->execute(@bind); $sth->fetchall_arrayref(); } =head3 Other Rules Whitespace is automatically added between parameters: IN: 'UPDATE', 'mytable SET', {x => 2}, 'WHERE y IN', \@colors; OUT: 'UPDATE mytable SET x = ? WHERE y in (?, ?)', 2, @colors Variables must be passed as references; otherwise, they will processed as SQL fragments and interpolated verbatim into the result SQL string, negating the security and performance benefits of binding values. In contrast, any scalar values I an arrayref or hashref are by default treated as binding variables, not SQL. The contained elements may be also be L or L. =head1 A Couple Helper Functions You Sometimes Need =head2 C sql_interp 'INSERT INTO mytable', {x => $x, y => sql('CURRENT_TIMESTAMP')}; # OUT: 'INSERT INTO mytable (x, y) VALUES(?, CURRENT_TIMESTAMP)', $x sql() is useful if you want insert raw SQL as a value in an arrayref or hashref. =head2 C my $sqlvar = sql_type($value_ref, type => $sql_type, %params); C provides a general way to represent a binding variable I metadata. It is necessary in rare applications which you need to explicity give the bind type of a SQL variable. $value_ref - variable reference contained $sql_type - any DBI SQL_DATA_TYPE (e.g. SQL_INTEGER). Optional. Default is undef. Any other named parameters (%params) passed in will be saved into the object as attributes. sql_type objects are useful only in special cases where additional information should be tagged onto the variable. For example, DBI allows bind variables to be given an explicit type: my ($sql, @bind) = sql_interp 'SELECT * FROM mytable WHERE', 'x=', \$x, 'AND y=', sql_type(\$y, SQL_VARCHAR), 'AND z IN', sql_type([1, 2], SQL_INTEGER); # RESULT: @bind = # ([$x, sql_type(\$x)], [$y, sql_type(\$y, type => SQL_VARCHAR)], # [1, sql_type([1, 2], type => SQL_INTEGER)], # [2, sql_type([1, 2], type => SQL_INTEGER)]); my $idx = 1; for my $var (@bind) { $sth->bind_param($idx++, $var->[0], $var->[1]->{type}); } $sth->execute(); my $ret = $sth->selectall_arrayref(); If the interpolation list contains at least one sql_type object, then all the variable references are transparently converted into sql_type objects, and the elements of @bind take a special form: an arrayref consisting of the bind value and the sql_type object that generated the bind value. Note that a single sql_type holding an aggregate (arrayref or hashref) may generate multiple bind values. =head1 Enabling debugging output To have the generated SQL and bind variables sent to STDOUT, you can set the environment variable C to "1" TRACE_SQL=1 perl my_script.pl Alternatively, you can do this: use SQL::Interp TRACE_SQL => 1; Here's some example output: DEBUG:interp[sql=INSERT INTO mytable VALUES(?),bind=5] =head1 Philosophy B. There are other modules, such as L, that hide SQL behind method calls and/or Perl data structures (hashes and arrays). The former may be undesirable in some cases since it replaces one language with another and hides the full capabilities and expressiveness of your database's native SQL language. The latter may load too much meaning into the syntax of "{, "[" and "\" thereby rendering the meaning less clear: SQL::Abstract example: %where = (lname => {like => '%son%'}, age => [-and => {'>=', 10}, {'<=', 20}]) Plain SQL: "lname LIKE '%son' AND (age >= 10 AND age <= 20)" In contrast, SQL::Interp does not abstract away your SQL but rather makes it easier to interpolate Perl variables into your SQL. Now, SQL::Interp I load some meaning into "{, "[" and "\", but we try to limit its use to obvious cases. Since your raw SQL is exposed, you can use your particular dialect of SQL. =head1 Limitations Some types of interpolation are context-sensitive and involve examination of your SQL fragments. The examination could fail on obscure syntax, but it is generally robust. Look at the examples to see the types of interpolation that are accepted, and if doubt, examine the SQL output yourself with the TRACE_SQL option. If needed, you can disable context sensitivity by inserting a null-string before a variable. "SET", "", \$x A few things are just not possible with the ('WHERE', \%hashref) syntax, so in such case, use a more direct syntax: # ok--direct syntax sql_interp '...WHERE', {x => $x, y => $y}, 'AND y = z'; # bad--trying to impose a hashref but keys must be scalars and be unique sql_interp '...WHERE', {sql_type(\$x) => sql('x'), y => $y, y => sql('z')}; In the cases where this module parses or generates SQL fragments, this module should work for many databases, but its been tested mostly on MySQL and PostgreSQL. Please inform the author of any incompatibilities. =head1 Contributors David Manura (L) (author). Mark Stosberg (L) created and maintains the SQL::Interp fork. Also thanks to: Mark Tiefenbruck (syntax), Wojciech Pietron (Oracle compat), Jim Chromie (DBIx::Interp idea), Juerd Waalboer, Terrence Brannon (early feedback), and others. =head1 Bug Reporting Use rt.cpan.org for bug reports. =head1 License Copyright (c) 2003-2005, David Manura. This module is free software. It may be used, redistributed and/or modified under the same terms as Perl itself. See L. =head1 See Also =head2 Fork This module was forked from L, around version 0.40. The core functionality remains unchanged, but the following incompatible changes have been made: =over 4 =item * The optional source filtering feature was removed. =item * The optional "macro" feature was removed. =item * A legacy, deprecated function "sql_literal" was removed. =item * The docs were overhauled to be simpler and clearer. =back So if you want those removed features, you should use L. I used it for years without those optional features and never missed them. =head2 Other modules in this distribution L allows DBI methods to accept an C-like interpolation list rather than the traditional ($statement, \%attr, @bind_values) parameter list. =head2 Related modules =head3 SQL::Abstract L shares with C the purpose of making SQL generation easier. SQL::Abstract differs in that it expresses queries in terms of OO method calls. It's syntax may impair readability because it uses the subtle difference between a brace and bracket to denote the difference between AND and OR in a query (the user can change whether a bracket implies "AND" or "OR"). Some complex where clauses are difficult or impossible with L. SQL::Interp gives the author more direct access to the underlying SQL. This permits using the full expressivity of the database query language. =head3 DBIx::Simple L strives to simplify SQL generation as well as the data structures returned from L. C currently can use L to help generate SQL. =head3 Class::DBI L is a popular "complete" solution for abstract database access through an OO interface. It currently has a plugin called L that allows it to use C to generate SQL. It's possible that C could be integrated with it as well. =head3 SQL::Preproc L provides an "embedded SQL" approach where the Perl and SQL languages are extended (via source filtering) to support interwoven Perl and SQL. The syntax supports interpolating Perl variables into SQL and passing query results back into Perl variables. In contrast, SQL::Interp extends neither SQL nor Perl, and it deals only with interpolating Perl variables into queries, whereas returning variables from queries is the job of something like DBI, DBIx::Interp, or DBIx::Simple. =head3 SQL::String L shares a number of similiarities to SQL::Interp but it is more rudimentary. Both let you combine "chunks" of SQL that have their parameters attached to them and then transform it into an SQL string and list of bind parameters suitable for passing to DBI. =head3 SQL::KeywordSearch L helps generate SQL for a keyword search. It can return the result in a format compatible with L to become part of a larger query. =cut