package BioGraph::Clustering; # Auteur : Tristan Colombo # Librairie de traitement des graphes # Création : le 18/06/2004 # Modifications : le 28/06/2004 $VERSION="1.0.0"; =pod =head1 NAME BioGraph::Clustering =head1 SYNOPSIS use BioGraph::Clustering; =head1 DESCRIPTION Package for manipulate graphs represented as well as adjacent matrix or adjacent list. Library for clustering graphs. =head1 AVAILABLE FUNCTIONS This is the list of the differents functions implemented in this library. =cut use BioGraph::Compute; use BioGraph::Operation; sub ZD_kernel ##################################################### # Tristan Colombo (adaptation du C de A. Guenoche) # # Création : le 28/06/2004 # # Modifications : - # ##################################################### # Calcul des noyaux du graphe # # Input : Type du graphe : 1 = Matrice d'adjacence # # 2 = Liste d'adjacence # # Référence vers la table des densités # # Graphe # # Output : Nombre de sommets # ##################################################### { =pod =over 4 =item B Compute the kernels of the graph =over 6 =item SYNOPSIS %K=ZD_kernel(representation, ref_density, graph) =item PARAMETERS =over 8 =item I the type of representation choosen : 1 = adjacent matrix, and 2 = adjacent list =item I reference to the table of densities =item I the hash table of the graph =back =item OUTPUT The kernels (view as clusters) of the graph computed according to the definition of T. Colombo, A. Guenoche and Y. Quentin, "Research of density areas in a graph - Application to orthologous genes", Proceedings of the Fourth International Conference on Knowledge Discovery and Discrete Mathematics (2003). =back =back =cut my $type=shift; my $ref_density=shift; my %Ttmp=@_; my ($k, $k2, %Clust, @elts, %grpe, @succ); my ($n, $liste)=(1, " "); my %T=BioGraph::Operation::symetric($type, %Ttmp); my %D=BioGraph::Compute::degree($type, %T); my $DeMoy=BioGraph::Compute::mean(%$ref_density); foreach $k (keys %$ref_density) { if ($$ref_density{$k} >= $DeMoy) { $liste.="$k "; } } @elts=split(/\ /, $liste); # On détermine les clusters foreach $k (@elts) { next if ($k eq ""); if ($type==1) { @succ=keys %{$T{$k}}; } else { @succ=split(/\ /, $T{$k}); } foreach $k2 (@succ) { next if ($k2 eq ""); if (defined $grpe{$k2}) { $grpe{$k}=$grpe{$k2}; last; } } if (not defined $grpe{$k}) { $grpe{$k}=$n++; } } # Pour chaque sommet de degré 1 lié à un noyau on l'agglomère foreach $k (keys %D) { next if ($D{$k} != 1); if ($type==1) { $k2=keys %{$T{$k}}; } else { $k2=$T{$k}; $k2=~s/\ //g; } if ($liste=~/\ $k2\ /) { $grpe{$k}=$grpe{$k2}; $liste.="$k "; } } # Résultat final foreach $k (keys %grpe) { if (not defined $Clust{$grpe{$k}}) { $Clust{$grpe{$k}}=" $k "; } else { $Clust{$grpe{$k}}.="$k "; } } return(%Clust); } # fin de 'kernel' sub ZD_extension ##################################################### # Tristan Colombo # # Création : le 28/06/2004 # # Modifications : le 06/07/2004 # ##################################################### # Nombre de sommets d'un graphe # # Input : Type du graphe : 1 = Matrice d'adjacence # # 2 = Liste d'adjacence # # Graphe # # Output : Nombre de sommets # ##################################################### { =pod =over 4 =item B Compute the clusters of the graph =over 6 =item SYNOPSIS %C=ZD_extension(representation, ref_density, ref_kernel, graph) =item PARAMETERS =over 8 =item I the type of representation choosen : 1 = adjacent matrix, and 2 = adjacent list =item I reference to the table of densities =item I reference to the hash table of kernels =item I the hash table of the graph =back =item OUTPUT The clusters of the graph computed according to the definition of T. Colombo, A. Guenoche and Y. Quentin, "Research of density areas in a graph - Application to orthologous genes", Proceedings of the Fourth International Conference on Knowledge Discovery and Discrete Mathematics (2003). =back =back =cut my $type=shift; my $ref_density=shift; my $ref_noyaux=shift; my %Ttmp=@_; my %T=BioGraph::Operation::symetric($type, %Ttmp); my %Clust=%$ref_noyaux; my ($k, $k2, $k3, %connect, @succ, @succ2, %grpe, $max, $id_max); my ($liste, $garbage)=("", 0); foreach $k (keys %$ref_noyaux) { $liste.=$$ref_noyaux{$k}; @succ=split(/\ /, $$ref_noyaux{$k}); foreach $k2 (@succ) { next if ($k2 eq ""); $grpe{$k2}=$k; } } foreach $k (sort { $$ref_density{$b} <=> $$ref_density{$a} } keys %T) { next if ($liste =~ /\ $k\ /); %connect=(); if ($type==1) { @succ=keys %{$T{$k}}; } else { @succ=split(/\ /, $T{$k}); } foreach $k2 (@succ) { if (defined $grpe{$k2}) { if (defined $connect{$grpe{$k2}}) { $connect{$grpe{$k2}}++; } else { $connect{$grpe{$k2}}=1; } } } $max=0; $id_max=0; foreach $k2 (keys %connect) { if ($connect{$k2}>$max) { $max=$connect{$k2}; $id_max=$k2; } } if ($id_max != 0) { $Clust{$id_max}.="$k "; $grpe{$k}=$id_max; } else { $Clust{"Garbage"}.="$k "; $garbage++; } } # Traitement des éléments restant while ($garbage) { @succ=split(/\ /, $Clust{"Garbage"}); $Clust{"Garbage"}=" "; foreach $k2 (@succ) { next if ($k2 eq ""); # On traite $k2 ici et on l'accroche a sa famille ($garbage--) sinon garbage %connect=(); if ($type==1) { @succ2=keys %{$T{$k2}}; } else { @succ2=split(/\ /, $T{$k2}); } foreach $k3 (@succ2) { if (defined $grpe{$k3}) { if (defined $connect{$grpe{$k3}}) { $connect{$grpe{$k3}}++; } else { $connect{$grpe{$k3}}=1; } } } $max=0; $id_max=0; foreach $k3 (keys %connect) { if ($connect{$k3}>$max) { $max=$connect{$k3}; $id_max=$k3; } } if ($id_max != 0) { $Clust{$id_max}.="$k2 ";} $garbage--; } } delete($Clust{"Garbage"}); return(%Clust); } # fin de 'ZD_extension' =pod =head1 AUTHOR AND COPYRIGHT BioGraph::FileAccess is Copyright (C) 2004, Tristan Colombo CNRS - LCB, 31 chemin Joseph Aiguier 13009 Marseille France Email: tristan.colombo@ibsm.cnrs-mrs.fr All rights reserved. You may distribute this package under the terms of the GNU General Public License. =cut 1;