File Coverage

File:lib/DBIx/SchemaChecksum.pm
Coverage:73.4%

linestmtbrancondsubtimecode
1package DBIx::SchemaChecksum;
2
3
12
12
12
502833
30
343
use 5.010;
4
12
12
12
2315
5155350
194
use Moose;
5
12
12
12
123627
5224
157
use version; our $VERSION = version->new('0.28');
6
7
12
12
12
8670
76186
512
use DBI;
8
12
12
12
2558
3985
332
use Digest::SHA1;
9
12
12
12
3015
36595
456
use Data::Dumper;
10
12
12
12
1433
148846
389
use Path::Class;
11
12
12
12
38
11
363
use Carp;
12
12
12
12
2520
74927
172
use File::Find::Rule;
13with 'MooseX::Getopt';
14
15has 'dbh' => ( is => 'ro', required=>1 );
16
17has 'catalog' => (
18    is => 'ro',
19    isa => 'Str',
20    default => '%',
21    documentation => q[might be required by some DBI drivers]
22);
23
24has 'schemata' => (
25    is => 'ro',
26    isa => 'ArrayRef[Str]',
27    default => sub { ['%'] },
28    documentation => q[List of schematas to include in checksum]
29);
30
31has 'tabletype' => (
32    is => 'ro',
33    isa => 'Str',
34    default => 'table',
35    documentation => q[Table type according to DBI->table_info]
36);
37
38has 'sqlsnippetdir' => (
39    isa => 'Str',
40    is => 'ro',
41    documentation => q[Directory containing sql update files],
42);
43
44# mainly needed for scripts
45has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 );
46has 'dry_run' => ( is => 'rw', isa => 'Bool', default => 0 );
47
48# internal
49has '_update_path' => ( is => 'rw', isa => 'HashRef', lazy_build=>1 );
50has '_schemadump' => (
51    isa=>'Str',
52    is=>'rw',
53    lazy_build=>1,
54    clearer=>'reset_checksum',
55);
56
57 - 110
=head1 NAME

DBIx::SchemaChecksum - Generate and compare checksums of database schematas

=head1 SYNOPSIS

    my $sc = DBIx::SchemaChecksum->new( dsn => 'dbi:Pg:name=foo' );
    print $sc->checksum;

=head1 DESCRIPTION

When you're dealing with several instances of the same database (eg.  
developer, testing, stage, production), it is crucial to make sure 
that all databases use the same schema. This can be quite an 
hair-pulling experience, and this module should help you keep your 
hair (if you're already bald, it won't make your hair grow back, 
sorry...)

DBIx::SchemaChecksum connects to your database, gets schema 
information (tables, columns, primary keys, foreign keys) and 
generates a SHA1 digest. This digest can then be used to easily verify schema consistency across different databases.

B<Caveat:> The same schema might produce different checksums on 
different database versions.

DBIx::SchemaChecksum works with PostgreSQL 8.3 and SQLite (but see 
below). I assume that thanks to the abstraction provided by the C<DBI> 
it works with most databases. If you try DBIx::SchemaChecksum with 
different database systems, I'd love to hear some feedback...

=head2 SQLite and column_info

DBD::SQLite doesn't really implement C<column_info>, which is needed 
to generate the checksum. We use the monkey-patch included in
http://rt.cpan.org/Public/Bug/Display.html?id=13631 
to make it work

=head2 Scripts

Please take a look at the scripts included in this distribution:

=head3 schema_checksum.pl

Calculates the checksum and prints it to STDOUT

=head3 schema_update.pl

Updates a schema based on the current checksum and SQL snippet files 

=head1 METHODS 

=head2 Public Methods

=cut
111
112sub checksum {
113
22
18004
    my $self = shift;
114
22
598
    return Digest::SHA1::sha1_hex($self->_schemadump);
115}
116
117 - 124
=head3 schemadump

    my $schemadump = $self->schemadump;

Returns a string representation of the whole schema (as a Data::Dumper 
Dump).

=cut
125
126sub _build__schemadump {
127
13
20
    my $self = shift;
128
129
13
351
    my $tabletype = $self->tabletype;
130
13
351
    my $catalog = $self->catalog;
131
132
13
335
    my $dbh = $self->dbh;
133
134
13
43
    my @metadata = qw(COLUMN_NAME COLUMN_SIZE NULLABLE TYPE_NAME COLUMN_DEF);
135
136
13
25
    my %relevants = ();
137
13
13
12
362
    foreach my $schema ( @{ $self->schemata } ) {
138
13
246
        foreach
139            my $table ( $dbh->tables( $catalog, $schema, '%', $tabletype ) )
140        {
141
33
9287
            $table=~s/"//g;
142
33
30
            my %data;
143
144            # remove schema name from table
145
33
37
            my $t = $table;
146
33
84
            $t =~ s/^.*?\.//;
147
148
33
555
            my @pks = $dbh->primary_key( $catalog, $schema, $t );
149
33
33967
            $data{primary_keys} = \@pks if @pks;
150
151            # columns
152
33
564
            my $sth_col = $dbh->column_info( $catalog, $schema, $t, '%' );
153
154
33
25350
            my $column_info = $sth_col->fetchall_hashref('COLUMN_NAME');
155
156
33
7210
            while ( my ( $column, $data ) = each %$column_info ) {
157
75
375
63
454
                my $info = { map { $_ => $data->{$_} } @metadata };
158
159                # add postgres enums
160
75
128
                if ( $data->{pg_enum_values} ) {
161
0
0
                    $info->{pg_enum_values} = $data->{pg_enum_values};
162                }
163
164                # some cleanup
165
75
93
                if (my $default = $info->{COLUMN_DEF}) {
166
0
0
                    if ( $default =~ /nextval/ ) {
167
0
0
                        $default =~ m{'([\w\.\-_]+)'};
168
0
0
                        if ($1) {
169
0
0
                            my $new = $1;
170
0
0
                            $new =~ s/^\w+\.//;
171
0
0
                            $default = 'nextval:' . $new;
172                        }
173                    }
174
0
0
                    $default=~s/["'\(\)\[\]\{\}]//g;
175
0
0
                    $info->{COLUMN_DEF}=$default;
176                }
177
178
75
234
                $info->{TYPE_NAME} =~ s/^(?:.+\.)?(.+)$/$1/g;
179
180
75
221
                $data{columns}{$column} = $info;
181            }
182
183            # foreign keys
184
33
129
            my $sth_fk
185                = $dbh->foreign_key_info( '', '', '', $catalog, $schema, $t );
186
33
51
            if ($sth_fk) {
187
0
0
                $data{foreign_keys} = $sth_fk->fetchall_arrayref( {
188
0
0
                        map { $_ => 1 }
189                            qw(FK_NAME UK_NAME UK_COLUMN_NAME FK_TABLE_NAME FK_COLUMN_NAME UPDATE_RULE DELETE_RULE)
190                    }
191                );
192                # Nasty workaround
193
0
0
0
0
                foreach my $row (@{$data{foreign_keys}}) {
194
0
0
                    $row->{DEFERRABILITY} = undef;
195                }
196            }
197
198            # postgres unique constraints
199            # very crude hack to see if we're running postgres
200
33
92
            if ( $INC{'DBD/Pg.pm'} ) {
201
0
0
                my @unique;
202
0
0
                my $sth=$dbh->prepare( "select indexdef from pg_indexes where schemaname=? and tablename=?");
203
0
0
                $sth->execute($schema, $t);
204
0
0
                while (my ($index) =$sth->fetchrow_array) {
205
0
0
                    $index=~s/$schema\.//g;
206
0
0
                    push(@unique,$index);
207                }
208
0
0
                @unique = sort (@unique);
209
0
0
                $data{unique_keys} = \@unique if @unique;
210            }
211
212
33
915
            $relevants{$table} = \%data;
213        }
214
215    }
216
13
252
    my $dumper = Data::Dumper->new( [ \%relevants ] );
217
13
597
    $dumper->Sortkeys(1);
218
13
311
    $dumper->Indent(1);
219
13
344
    my $dump = $dumper->Dump;
220
13
2159
    return $dump;
221}
222
223 - 233
=head3 build_update_path

    my $update_info = $self->build_update_path( '/path/to/sql/snippets' )

Builds the datastructure needed by L<apply_sql_update>.
C<build_update_path> reads in all files ending in ".sql" in the
directory passed in (or defaulting to C<< $self->sqlsnippetdir >>). It 
builds something like a linked list of files, which are chained by 
their C<preSHA1sum> and C<postSHA1sum>.

=cut
234
235sub _build__update_path {
236
9
12
    my $self = shift;
237
9
196
    my $dir = $self->sqlsnippetdir;
238
9
18
    croak("Please specify sqlsnippetdir") unless $dir;
239
9
79
    croak("Cannot find sqlsnippetdir: $dir") unless -d $dir;
240
241
7
146
    say "Checking directory $dir for checksum_files" if $self->verbose;
242
243
7
8
    my %update_info;
244
7
132
    my @files = File::Find::Rule->file->name('*.sql')->in($dir);
245
246
7
5285
    foreach my $file ( sort @files ) {
247
18
2926
        my ( $pre, $post ) = $self->get_checksums_from_snippet($file);
248
249
18
24
        if ( !$pre && !$post ) {
250
0
0
            say "skipping $file (has no checksums)" if $self->verbose;
251
0
0
            next;
252        }
253
254
18
31
        if ( $pre eq $post ) {
255
3
7
            if ( $update_info{$pre} ) {
256
3
7
                my @new = ('SAME_CHECKSUM');
257
3
3
3
7
                foreach my $item ( @{ $update_info{$pre} } ) {
258
6
294
                    push( @new, $item ) unless $item eq 'SAME_CHECKSUM';
259                }
260
3
7
                $update_info{$pre} = \@new;
261            }
262            else {
263
0
0
                $update_info{$pre} = ['SAME_CHECKSUM'];
264            }
265        }
266
267
18
47
        if ( $update_info{$pre}
268            && $update_info{$pre}->[0] eq 'SAME_CHECKSUM' )
269        {
270
3
6
            if ( $post eq $pre ) {
271
3
3
4
41
                splice( @{ $update_info{$pre} },
272                    1, 0, Path::Class::File->new($file), $post );
273            }
274            else {
275
0
0
0
0
                push( @{ $update_info{$pre} },
276                    Path::Class::File->new($file), $post );
277            }
278        }
279        else {
280
15
248
            $update_info{$pre} = [ Path::Class::File->new($file), $post ];
281        }
282    }
283
284
7
1234
    return $self->_update_path( \%update_info ) if %update_info;
285
1
31
    return;
286}
287
288 - 301
=head3 get_checksums_from_snippet

    my ($pre, $post) = $self->get_checksums_from_snippet( $file );

Returns a list of the preSHA1sum and postSHA1sum for the given file.

The file has to contain this info in SQL comments, eg:

  -- preSHA1sum: 89049e457886a86886a4fdf1f905b69250a8236c
  -- postSHA1sum: d9a02517255045167053ea92dace728e1389f8ca

  alter table foo add column bar;

=cut
302
303sub get_checksums_from_snippet {
304
22
5904
    my ($self, $filename) = @_;
305
22
48
    die "need a filename" unless $filename;
306
307
21
16
    my %checksums;
308
309
21
284
    open( my $fh, "<", $filename ) || croak "Cannot read $filename: $!";
310
20
186
    while (<$fh>) {
311
114
252
        if (m/^--\s+(pre|post)SHA1sum:?\s+([0-9A-Fa-f]{40,})\s+$/) {
312
39
114
            $checksums{$1} = $2;
313        }
314    }
315
20
57
    close $fh;
316
20
40
25
152
    return map { $checksums{$_} || '' } qw(pre post);
317}
318
319 - 362
=head2 Attributes generated by Moose

All of this methods can also be set from the commandline. See 
MooseX::Getopts.

=head3 dbh

The database handle (DBH::db). 

=head3 dsn

The dsn.

=head3 user

The user to use to connect to the DB.

=head3 password

The password to use to authenticate the user.

=head3 catalog

The database catalog searched for data. Not implemented by all DBs. See C<DBI::table_info>

Default C<%>.

=head3 schemata

An Arrayref containg names of schematas to include in checksum calculation. See C<DBI::table_info>

Default C<%>.

=head3 tabletype

What kind of tables to include in checksum calculation. See C<DBI::table_info>

Default C<table>.

=head3 verbose

Be verbose or not. Default: 0

=cut
363
364q{ Favourite record of the moment: The Dynamics - Version Excursions }
365