package Graph::ChuLiuEdmonds; use warnings; use strict; =head1 NAME Graph::ChuLiuEdmonds - Find minimum spanning trees in a directed graph. =head1 VERSION Version 0.02 =cut use Carp; our $VERSION = '0.02'; our $DEBUG; =head1 SYNOPSIS This module implements Chu-Liu-Edmonds L<[1]>,L<[2]> algorithm for finding a minimum spanning tree (MST) in a directed graph. use Graph; use Graph::ChuLiuEdmonds; my $graph = Graph::Directed->new(vertices=>[qw(a b c d)]); $graph->add_weighted_edges(qw(a b 3 c d 7 d a 2 d b 1 c a 2)); my $msts = $graph->MST_ChuLiuEdmonds($graph); ... =head1 EXPORT None. =head1 FUNCTIONS =head2 MST_ChuLiuEdmond my $msts = $graph->MST_ChuLiuEdmond(); Returns MSTs for a given directed graph. Minimum Spanning Trees or MSTs are directed tree subgraphs derived from a directed graph that "span the graph" (covering all the vertices) using as lightly weighted (hence the "minimum") edges as possible. =cut sub Graph::MST_ChuLiuEdmonds_no_copy { my ($graph)=@_; carp("graph not directed") unless $graph->is_directed; return _MST($graph); } =head2 MST_ChuLiuEdmonds_no_copy my $msts = $graph->MST_ChuLiuEdmond(); Like the method above, only avoiding deep-copying the graph; the method prunes $graph so as only the MSTs remain of it. =cut sub Graph::MST_ChuLiuEdmonds { my ($graph)=@_; carp("graph not directed") unless $graph->is_directed; return _MST($graph->deep_copy); } my $cycle_no=0; sub _MST { my ($g)=@_; my %in; # in the resulting (or partial) MST, this will map a vertex Y to the vertex X # in which the unique edge incoming to Y starts # i.e maps Y => X if X->Y is an edge of the resulting MST # phase 1: add best edges and contract cycles my @V = $g->vertices; my @C; my ($x,$y,$w,$e); print STDERR "Graph: $g\n" if $DEBUG; while (@V) { $y = shift @V; print STDERR "processing $y\n" if $DEBUG; my $best_w; for my $e ($g->edges_to($y)) { $w = $g->get_edge_weight( $e->[0], $y ); if (!defined($best_w) or $w<$best_w) { $best_w=$w; $x=$e->[0]; } } next unless defined $best_w; print STDERR "best $x-$y: $best_w\n" if $DEBUG; # we add the best incoming edge edge to $y $in{$y}=$x; # now we check it does not add a cycle to the MST: my @cycle_nodes=($y); do { unshift @cycle_nodes, $x; $x=$in{$x}; } while (defined($x) and $x ne $y); if (defined $x) { # the new edge made a cycle: # contract print STDERR "cycle: @cycle_nodes\n" if $DEBUG; my @cycle_weights = map { print STDERR " $_: $cycle_nodes[$_-1],$cycle_nodes[$_]\n" if $DEBUG; $g->get_edge_weight($cycle_nodes[$_-1],$cycle_nodes[$_]) } 0..$#cycle_nodes; print STDERR "cycle weights: @cycle_weights\n" if $DEBUG; my $cycle = 'CYCLE:'.($cycle_no++); push @V,$cycle; $g->add_vertex($cycle); # will represent the contracted @cycle_nodes my %in_cycle; @in_cycle{@cycle_nodes}=(); # for each vertex in which ends an edge starting on the cycle, # find the lightest edge to be preserved my %from=(); my %fromW=(); for $x (@cycle_nodes) { for my $e ($g->edges_from($x)) { $y=$e->[1]; next if exists $in_cycle{$y}; $w=$g->get_edge_weight($x,$y); if (!exists($fromW{$y}) or $w < $fromW{$y}) { $from{$y}=$x; $fromW{$y}=$w; } } } for $y (keys %from) { print STDERR "adding edge $cycle -> $y\n" if $DEBUG; $g->add_weighted_edge($cycle, $y, $fromW{$y}); } # Similarly for edges that end on the cycle. # For each such edge X->Y with Y on the cycle # we compute a weight as w(X->Y)+the weight of the arc # of the cycle starting at Y and ending on a node preceding Y # in the cycle. For a fixed X we find Y on the cycle # for which this computed weight is minimal. my %to; my %toW=(); my $i=0; my $C=0; $C+=$_ for @cycle_weights; # weight of the whole cycle for $y (@cycle_nodes) { for $e ($g->edges_to($y)) { $x=$e->[0]; next if exists $in_cycle{$x}; $w=$g->get_edge_weight($x,$y)+$C-$cycle_weights[$i]; if (!exists($toW{$x}) or $w < $toW{$x}) { $to{$x}=$y; $toW{$x}=$w; } } $i++; } for my $x (keys %to) { print STDERR "adding edge $x -> $cycle weight $toW{$x}\n" if $DEBUG; $g->add_weighted_edge($x, $cycle, $toW{$x}); } # delete the nodes of the @cycle_nodes $g->delete_vertices(@cycle_nodes); push @C,[$cycle,\@cycle_nodes,\@cycle_weights,\%to,\%from,\%toW,\%fromW]; } } # ok, now we have processed all nodes, including the nodes # representing the contracted cycles. # there is at most one incoming edge to # each node (and exactly one if there was # at least one in the original graph). # prune all edges that are not in the resulting (contracted) MST for $y ($g->vertices) { $x=$in{$y}; $g->delete_edges(map { @$_[0,1] } grep { $_->[0] ne $x } $g->edges_to($y)); } # phase 2: expand all cycles while (@C) { my $C = pop @C; my ($cycle,$cycle_nodes,$cycle_weights,$to,$from,$toW,$fromW)=@$C; print STDERR "$g\n" if $DEBUG; print STDERR "expanding: $cycle\n" if $DEBUG; $g->add_vertices(@$cycle_nodes); # fix incoming edge ($e) = $g->edges_to($cycle); # should now be at most one if ($e) { $x=$e->[0]; $y = $to->{$x}; $g->add_weighted_edge($x,$y,$toW->{$x}); for my $i (0..$#$cycle_nodes) { $g->add_weighted_edge($cycle_nodes->[$i-1],$cycle_nodes->[$i],$cycle_weights->[$i]) unless $cycle_nodes->[$i] eq $y; } } else { # the whole graph starts at this cycle # find the edge with the lowest weight and disconnect there my $max; my $max_i; # the worst edge on the cycle my $i = 0; for my $w (@$cycle_weights) { if (!defined($max) or $w>$max) { $max = $w; $max_i=$i; } $i++ } for $i (0..$#$cycle_nodes) { $g->add_weighted_edge($cycle_nodes->[$i-1],$cycle_nodes->[$i],$cycle_weights->[$i]) unless $i==$max_i; } } # fix outgoing edge for $e ($g->edges_from($cycle)) { $y = $e->[1]; $x = $from->{$y}; $g->add_weighted_edge($x,$y,$fromW->{$y}); } $g->delete_vertex($cycle); print STDERR "expanded: $cycle\n" if $DEBUG; } # all cycles expanded, we are done! print STDERR "MST: $g\n" if $DEBUG; return $g; } =head1 AUTHOR Petr Pajas, C<< >> =head1 CAVEATS The implementation was not tested on complex examples. Vertices cannot be perl objects (or references). The author did not attempt to compute the actual algorithmic complexity of this particular implementation. The algorithm implemented in this module returns the optimal MSTs. To obtain k-best MSTs, one could implement Camerini's algorithm L<[4]> (also described in [5]). =head1 BUGS Please report any bugs or feature requests to C, or through the web interface at L. I will be notified, and then you'll automatically be notified of progress on your bug as I make changes. =head1 SUPPORT You can find documentation for this module with the perldoc command. perldoc Graph::ChuLiuEdmonds You can also look for information at: =over 4 =item * AnnoCPAN: Annotated CPAN documentation L =item * CPAN Ratings L =item * RT: CPAN's request tracker L =item * Search CPAN L =back =head1 SEE ALSO The implementation follows the algorithm published by Edmonds L<[1]> and independently Chu and Liu L<[2]>, as scatched in the 3rd section of L<[3]>. Note that possibly more efficient implementation is suggested in L<[3]>. =over 4 =item [1] J. Edmonds. 1967. Optimum branchings. Journal of Research of the National Bureau of Standards, 71B:233-240. =item [2] Y.J. Chu and T.H. Liu. 1965. On the shortest arborescence of a directed graph. Science Sinica, 14:1396-1400. =item [3] H. N. Gabow, Z. Galil, T. Spencer and R. E. Tarjan. 1986 Efficient algorithms for finding minimum spanning trees in undirected and directed graphs. Combinatorica 6 (2) 109-122 =item [4] Paolo M. Camerini, Luigi Fratta, and Francesco Maffioli. 1980. The k best spanning arborescences of a network. Networks, 10:91-110. =item [5] Keith Hall. 2007. k-best spanning tree parsing. In (To Appear) Proceedings of the 45th Annual Meeting of the Association for Computational Linguistics. =back =head1 ACKNOWLEDGEMENTS The development of this module was supported by grant GA AV CR 1ET101120503. =head1 COPYRIGHT & LICENSE Copyright 2008 Petr Pajas, all rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut 1; # End of Graph::ChuLiuEdmonds