#!/usr/bin/env perl use strict; use warnings; use Git; use Git::FastExport; our $VERSION = 0.02; my %repo; # process command-line parameters while (@ARGV) { my ( $repo, $dir ) = split /:/, shift @ARGV, 2; $dir ||= ''; # create an export parser for each repo my $parser = Git::FastExport->new( Git->repository( Directory => $repo ) ); $parser->fast_export(qw( --progress=1 --all --date-order )); $parser->{mapdir} = $dir; # update the %repo hash $repo = $parser->{source}; $repo{$repo}{repo} = $repo; $repo{$repo}{dir} = $dir; $repo{$repo}{parser} = $parser; } # repositories that we will process my @repos = values %repo; my $mark = 1_000_000; # mark counter in the new rpo my %mark_map; # map marks in source repos to marks in the new repo # get the first commits $_->{commit} = next_commit( $_->{parser} ) for @repos; # main loop use Data::Dumper; my $last; my %commits; while (@repos) { # sort by date @repos = sort { $a->{commit}{date} <=> $b->{commit}{date} } @repos; my $repo = $repos[0]; # next commit to dump my $commit = $repo->{commit}; # update marks & dir in files for ( @{ $commit->{files} } ) { s/^M (\d+) :(\d+)/M $1 :$mark_map{$repo->{repo}}{$2}/; if ( my $dir = $repo->{dir} ) { s!^(M \d+ :\d+) (.*)!$1 $dir/$2!; # filemodify s!^D (.*)!D $dir/$1!; # filedelete # /!\ quotes may happen - die and fix if needed die "Choked on quoted paths in $repo->{repo}! Culprit:\n$_\n" if /^[CR] \S+ \S+ /; # filecopy | filerename s!^([CR]) (\S+) (\S+)!$1 $dir/$2 $dir/$3!; } } # first commit in the old repo linked to latest commit in new repo if ( $last && !$commit->{from} ) { $commit->{from} = ["from :$last"]; } # update historical information my ($id) = $commit->{mark}[0] =~ /:(\d+)/g; $last = $id; # last commit applied my $branch = ( split / /, $commit->{header} )[1]; my $node = $commits{$id} = { name => $id, repo => $repo->{repo}, branch => $branch, children => [], merge => exists $commit->{merge}, }; # this commit's parents my @parents = map {/:(\d+)/g} @{ $commit->{from} || [] }, @{ $commit->{merge} || [] }; # map each parent to its last "alien" commit my %parent_map = map { $_ => last_alien_child( $commits{$_}, $repo->{repo}, $branch )->{name} } @parents; # map parent marks for ( @{ $commit->{from} || [] }, @{ $commit->{merge} || [] } ) { if (m/^(from|merge) /) { s/:(\d+)/:$parent_map{$1}/g; } } # update the parents information for my $parent ( map { $parent_map{$_} } @parents ) { push @{ $commits{$parent}{children} }, $node; } # dump the commit print $commit->as_string; # load next commit $repo->{commit} = next_commit( $repo->{parser} ) or shift @repos; # no more blocks in this export } # return the next commit # - print out the intermediate blocks # - offset the old marks sub next_commit { my ($parser) = @_; my $block; while ( $block = $parser->next_block() ) { # map to the new mark for ( @{ $block->{mark} || [] } ) { s/:(\d+)/:$mark/ and $mark_map{ $parser->{source} }{$1} = $mark++; } # update marks in from & merge for ( @{ $block->{from} || [] }, @{ $block->{merge} || [] } ) { if (m/^(from|merge) /) { s/:(\d+)/:$mark_map{$parser->{source}}{$1}/g; } } last if $block->{type} eq 'commit'; print $block->as_string(); } return $block; } # find the last child of this node # that has either no child # or a child in our repo sub last_alien_child { my ( $node, $repo, $branch ) = @_; while (1) { # no children nodes return $node if ( !@{ $node->{children} } ); # some children nodes are local return $node if grep { $_->{repo} eq $repo } @{ $node->{children} }; # there's a child in the same branch if ( my ($peer) = grep { $_->{branch} eq $branch } @{ $node->{children} } ) { # but don't go past another repo's merges return $node if $peer->{merge}; $node = $peer; } # or pick the first child (as good as any) else { $node = $node->{children}[0]; } } } __END__ =head1 NAME git-stitch-repo - Stitch several git repositories into a git-fast-import stream =head1 SYNOPSIS git-stitch-repo repo1 repo2:dir2 ... =head1 DESCRIPTION B will process the output of C on the git repositories given on the command-line, and create a stream suitable for B that will create a new repository containing all the commits in a new commit tree that respects the history of all the source repositories. =head2 Example Imagine we have two repositories A and B that we want to stitch into a repository C so that all the files from A are in subdirectory F and all the files from B are in subdirectory F. Repository A: topic ' ,master A3---A5 / / A1---A2---A4 Branch I points to A5 and branch I points to A3. Repository B: ,topic ,master B3---B5---B7---B8 / / B1---B2---B4---B6 Branch I points to B8 and branch I points to B5. The C repository should preserve chronology, commit relationships and branches as much as possible, while giving the impression that the directories F & F did live side-by-side all the time. Assuming additional timestamps not shown on the above graphs, B will produce a B stream that will create the following history: ,topic ,master A3---B3---A5---B5---B7---B8 / / / A1---B1---A2---B2---A4---B4--------B6 Note that the current result is slightly buggy, since A5 wasn't on the I branch in the original graph for A. =head1 AUTHOR Philippe Bruhat (BooK), C<< >>. =head1 ACKNOWLEDGEMENTS The original version of this script was created as part of my work for BOOKING.COM, which authorized its publication/distribution under the same terms as Perl itself. =head1 COPYRIGHT Copyright 2008 Philippe Bruhat (BooK), All Rights Reserved. =head1 LICENSE This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut