package Algorithm::MCL; # ABSTRACT: perl module implementing Markov Cluster Algorithm using PDL =head1 SYNOPSIS use Algorithm::MCL; my $obj1 = new MyClass; my $ref2 = {}; my $ref3 = \"abc"; my $ref4 = \$val1; my $ref5 = []; my $mcl1 = Algorithm::MCL->new(); # create graph by adding edges $mcl1->addEdge($obj1, $ref2); $mcl1->addEdge($obj1, $ref3); $mcl1->addEdge($ref2, $ref3); $mcl1->addEdge($ref3, $ref4); $mcl1->addEdge($ref4, $ref5); # run MCL algorithm on created graph my $clusters1 = $mcl1->run(); # get clusters foreach my $cluster ( @$clusters1 ) { say "Cluster size: ". scalar @$cluster; } #################################### my $val1 = \"aaa"; my $val2 = \"bbb"; my $val3 = \"ccc"; my $val4 = \"ddd"; my $val5 = \"eee"; my $mcl2 = Algorithm::MCL->new(); $mcl2->addEdge($val1, $val2); $mcl2->addEdge($val1, $val3); $mcl2->addEdge($val2, $val3); $mcl2->addEdge($val3, $val4); $mcl2->addEdge($val4, $val5); my $clusters2 = $mcl2->run(); # get clusters foreach my $cluster ( @$clusters2 ) { say "Found Cluster"; foreach my $vertex ( @$cluster ) { say " Cluster element: ". $$vertex; } } =head1 DESCRIPTION This module is perl implementation of Markov Cluster Algorithm (MCL) based on Perl Data Language (PDL). MCL is algorithm of finding clusters of vertices in graph. More information about MCL can be found at L. There is also perl script implementing MCL - minimcl L. This module try to solve two problems: - easy integration MCL in perl scripts and modules. Algorithm::MCL accept references as input and every reference will be found later in some cluster. - performance and scale. Algorithm::MCL use Perl Data Language for most of its processing. That means Algorithm::MCL can run very fast on very big clusters. Main Algorithm::MCL procedures are written with "pdlpp". =head1 METHODS =method new() create new Algorithm::MCL object that accumulate graph edges and process data. =method addEdge($ref1, $ref2, $distance) add new edge to graph. first two parameters are reference to vertex objects. third parameter is "distance measurement" between vetices. "distance measurement" should be number between 0 and 1. if "distance measurement" is not defined it set to 1. =method run() apply MCL algorithm on graph. return reference to array that every element is reference to cluser array. =cut use PDL; use Inline 'Pdlpp'; use Mouse; no PDL::NiceSlice; BEGIN { $PDL::BIGPDL = 1; } has '_allVerts' => ( is => 'rw', isa => 'HashRef', default => sub { {} } ); has '_vectors' => ( is => 'rw', isa => 'HashRef', default => sub { {} } ); has '_orderedAllVerts' => ( is => 'rw', isa => 'ArrayRef', default => sub { [] } ); sub addEdge { my $self = shift; my ( $vertex1, $vertex2, $edgeWeight ) = @_; $self->addDirectEdge($vertex1, $vertex2, $edgeWeight); $self->addDirectEdge($vertex2, $vertex1, $edgeWeight); } sub addDirectEdge { my $self = shift; my ( $from, $to, $edgeWeight ) = @_; $edgeWeight ||= 1; unless ( $self->_allVerts->{$from} ) { push @{$self->_orderedAllVerts}, "$from"; $self->_allVerts->{$from} = $from; } unless ( $self->_allVerts->{$to} ) { push @{$self->_orderedAllVerts}, "$to"; $self->_allVerts->{$to} = $to; } my $vertexEdges = $self->_vectors->{ $from }; unless ( $vertexEdges ) { $vertexEdges = {}; $self->_vectors->{ $from } = $vertexEdges; } $vertexEdges->{ $to } = $edgeWeight; my $toVertexEdges = $self->_vectors->{ $to }; unless ( $toVertexEdges ) { $toVertexEdges = {}; $self->_vectors->{ $to } = $toVertexEdges; } unless (defined $toVertexEdges->{$from}) { $toVertexEdges->{$from} = 0; } } sub run { my $self = shift; my @verts = @{$self->_orderedAllVerts}; my $numOfVerts = scalar @verts; my $vertsOffsets = {}; for (my $kk=0; $kk < $numOfVerts; ++$kk) { $vertsOffsets->{ $verts[ $kk ] } = $kk; } my $matrix = zeros($numOfVerts, $numOfVerts); for (my $ii=0; $ii < $numOfVerts; ++$ii) { my $vector = $self->_vectors->{ $verts[ $ii ] }; for (my $jj=0; $jj < $numOfVerts; ++$jj) { if (exists $vector->{ $verts[ $jj ] }) { $matrix->set( $ii, $jj, $vector->{ $verts[ $jj ] } ); } } } $matrix->inplace->addLoops; $matrix->inplace->makeStochastic; my $resultMatrix = $self->mcl( $matrix ); my $clusters = $self->extractClusters($resultMatrix, $numOfVerts); return $clusters; } sub mcl { my $self = shift; my ( $matrix ) = @_; my $chaos = 1; while ($chaos > 0.0001) { my $mx = $matrix x $matrix; my $cList = $mx->inplace->inflate; $chaos = $cList->max; $matrix = $mx; } $matrix->inplace->cleanSmall; return $matrix; } sub extractClusters { my $self = shift; my ( $resultMatrix, $numOfVerts ) = @_; my $clIdxs = $resultMatrix->getClustersIndex; my $clusters = []; for ( my $ii=0; $ii < $numOfVerts; ++$ii ) { if ($clIdxs->at( $ii )) { my $cluster = []; push @$cluster, $self->getIdxObj( $ii ); for ( my $jj=0; $jj < $numOfVerts; ++$jj ) { if ($resultMatrix->at( $ii, $jj )) { if ( $ii != $jj ) { push @$cluster, $self->getIdxObj( $jj ); $clIdxs->set( $jj, 0 ); } } } push @$clusters, $cluster } } return $clusters; } sub getIdxObj { my $self = shift; my ( $idx ) = @_; return$self->_allVerts->{$self->_orderedAllVerts->[$idx]}; } __PACKAGE__->meta->make_immutable(); __DATA__ __Pdlpp__ pp_def('getClustersIndex', Pars => 'a(m, n); [o]idxs(m);', Code => q{ int ii; int m_size = $SIZE(m); for (ii=0; iiii, n=>ii) > 0) { clFlag = 1; } $idxs(m=>ii) = clFlag; } } ); pp_def('addLoops', Pars => 'a(m, n);', Code => q{ int ii; int m_size = $SIZE(m); int n_size = $SIZE(n); for (ii=0; iiii,n=>jj) > vmax) { vmax = $a(m=>ii, n=>jj); } } if ( vmax == 0 ) { vmax = 1; } $a(m=>ii, n=>ii) = vmax; } } ); pp_def('inflate', Pars => 'a(n); [o]chaos();', Code => q{ double vsum; vsum = 0; double sumsq; sumsq = 0; double vmax; vmax = 0; loop(n) %{ if ($a() != 0) { if ($a() < 0.00001) { $a() = 0; } else { $a() = $a() * $a(); vsum += $a(); } } %} if (vsum > 0) { loop(n) %{ $a() = $a() / vsum; sumsq = sumsq + $a() * $a(); if ($a() > vmax) { vmax = $a(); } %} } $chaos() = vmax - sumsq; } ); pp_def('makeStochastic', Pars => 'a(n);', Code => q{ double vsum; vsum = 0; loop(n) %{ if ($a() != 0) { if ($a() < 0.00001) { $a() = 0; } else { vsum += $a(); } } %} if (vsum > 0) { loop(n) %{ $a() = $a() / vsum; %} } } ); pp_def('cleanSmall', Pars => 'a(n);', Code => q{ loop(n) %{ if ($a() < 0.1) { $a() = 0; } %} } ); pp_done();