package TM::Analysis; use TM; use Data::Dumper; use Class::Trait 'base'; use Class::Trait 'TM::Graph'; =pod =head1 NAME TM::Analysis - Topic Maps, analysis functions =head1 SYNOPSIS use TM::Materialized::AsTMa; my $tm = new TM::Materialized::AsTMa (file => 'test.atm'); $tm->sync_in; Class::Trait->apply ($tm, 'TM::Analysis'); print Dumper $tm->statistics; print Dumper $tm->orphanage; =head1 DESCRIPTION This package contains some topic map analysis functionality. =head1 INTERFACE =over =item B This (currently quite limited) function computes a reference to hash containing the following fields: =over =item C Nr of midlets in the map. This includes ALL midlets for topics and also those for assertions. =item C Nr of assertions in the map. =item C Nr of clusters according to the C function elsewhere in this document. =back =cut sub statistics { my $self = shift; my %s; # result foreach my $a (@_ ? @_ : qw(nr_midlets nr_maplets nr_clusters)) { # default is all $s{$a} = scalar $self->midlets if $a eq 'nr_midlets'; $s{$a} = scalar $self->match_forall if $a eq 'nr_maplets'; if ($a eq 'nr_clusters') { # clusters Class::Trait->apply ($self, 'TM::Graph'); # make sure we can do it my $clusters = $self->clusters (use_roles => 1, use_type => 1); $s{$a} = scalar @$clusters; } }; # size of map # payload (basenames, occurrence data, variant return \%s; } my %o; return \%o; =pod =item B This computes all topics which have either no supertype and also those which have no type. Without further parameters, it returns a hash reference with the following fields: =over =item C Holds a list reference to all topic ids which have no type. =item C Holds a list reference to all topic ids which have no instance. =item C Holds a list reference to all topic ids which have no superclass. =item C Holds a list reference to all topic ids which have no subclass. =back Optionally, a list of the identifiers above can be passed in so that only that particular information is actually returned (some speedup): my $o = TM::Analysis::orphanage ($tm, 'untyped'); =cut sub orphanage { my $self = shift; my %types = (); # each topic -> how many types my %instances = (); # each topic -> how many instances my %supers = (); # each topic -> how many superclasses my %subs = (); # each topic -> how many subclasses my ($ISA, $ISSC, $CLASS, $INSTANCE) = @{$self->{usual_suspects}}{'isa', 'is-subclass-of', 'class', 'instance'}; foreach my $a (values %{$self->{assertions}}) { $types{$a->[TM->LID]}++; $instances{$a->[TM->TYPE]}++; if ($a->[TM->TYPE] eq $ISA) { my ($class, $instance) = @{ $a->[TM->PLAYERS] }; $types{$instance}++; $instances{$class}++; } elsif ($a->[TM->TYPE] eq $ISSC) { my ($sub, $super) = @{ $a->[TM->PLAYERS] }; $supers{$sub}++; $subs{$super}++; } } #warn Dumper (\%types , \%instances, \%supers, \%subs); my @all = $self->midlets; my %o; foreach my $a (@_ ? @_ : qw(untyped empty unclassified unspecified)) { # default is all $o{$a} = [ grep !$types{$_}, @all ] if $a eq 'untyped'; $o{$a} = [ grep !$instances{$_}, @all ] if $a eq 'empty'; $o{$a} = [ grep !$supers{$_}, @all ] if $a eq 'unclassified'; $o{$a} = [ grep !$subs{$_}, @all ] if $a eq 'unspecified'; }; return \%o; } =pod =back =head1 SEE ALSO L =head1 COPYRIGHT AND LICENSE Copyright 200[3-6] by Robert Barta, Edrrho@cpan.orgE This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut our $VERSION = 0.6; our $REVISION = '$Id: Analysis.pm,v 1.7 2007/07/28 16:41:13 rho Exp $'; 1; __END__ item B I<$hashref> = I<$tm>->info (I) returns some meta/statistical information about the map in form of a hash reference containing one or more of the following components (you might want to discover the return values with Data::Dumper): over item (a) I: this hash reference contains the number of topics, the number of associations, the UNIX date of the last modification and synchronisation with the external tied object and a list reference to other topic maps on which this particular map depends. item (b) I This hash reference contains a list (reference) of topic ids of topics I anywhere in the map. There is also a list (I) of topics which do not contain any baseName (yes this is allowed in section 3.6.1 of the standard). item (c) I This component contains a list reference I containing a list of topic identifiers of topics not defined in the map. item (d) I This component contains a hash reference to various statistics information, as the number of clusters, maximum and minimum size of clusters, number of topics defined and topics mentioned. TODOs: over item detect cyclic dependency of topic types back back You can control via a parameter in which information you are interested in: Example: $my_info = $tm->info ('informational', 'warning', 'errors', 'statistics'); cut sub info { my $self = shift; my @what = @_; my $info; my $usage; foreach my $w (@what) { if ($w eq 'informational') { $info->{$w} = { #address => $self, nr_topics => scalar @{$self->topics}, nr_assocs => scalar @{$self->associations}, last_mod => $self->{last_mod}, last_syncin => $self->{last_syncin}, depends => [ map { $_->{memory}->{id} } @{$self->{depends}} ], tieref => ref ($self->{tie}), id => $self->{memory} ? $self->{memory}->{id} : undef }; } elsif ($w eq 'warnings') { # figure out those topics which do not seem to have a single baseName $info->{$w}->{'no_baseName'} = []; foreach my $tid (@{$self->topics()}) { push @{$info->{$w}->{'no_baseName'}}, $tid unless $self->topic($tid)->baseNames && @{$self->topic($tid)->baseNames}; } $usage = $self->_usage() unless $usage; sub _usage { my $self = shift; my $usage; # figure out which topics are used as topicRef (scope, member, role, instanceOf) foreach my $tid (@{$self->topics()}) { # instanceOfs foreach my $i (@{$self->topic($tid)->instanceOfs}) { $usage->{as_instanceOf}->{$1}++ if $i->reference->href =~ /^\#(.+)/; $usage->{as_instance}->{$tid}++ unless $i->reference->href eq $XTM::PSI::xtm{topic}; } # scopes foreach my $b (@{$self->topic($tid)->baseNames}) { foreach my $s (@{$b->scope->references}) { if ($s->href =~ /^\#(.+)/) { $usage->{as_scope}->{$1}++; } } } foreach my $o (@{$self->topic($tid)->occurrences}) { if ($o->instanceOf->reference->href =~ /^\#(.+)/) { $usage->{as_instanceOf}->{$1}++; } foreach my $r (@{$o->scope->references}) { if ($r->href =~ /^\#(.+)/) { $usage->{as_scope}->{$1}++; } } } } foreach my $aid (@{$self->associations()}) { # instanceOfs if (my $i = $self->association($aid)->instanceOf) { if ($i->reference->href =~ /^\#(.+)/) { $usage->{as_instanceOf}->{$1}++; } } foreach my $m (@{$self->association($aid)->members}) { # roles if ($m->roleSpec) { $usage->{as_role}->{$1}++ if ($m->roleSpec->reference->href =~ /^\#(.+)/); } # members foreach my $r (@{$m->references}) { $usage->{as_member}->{$1}++ if ($r->href =~ /^\#(.+)/); } } } return $usage; } use Data::Dumper; ## print STDERR Dumper \%as_instanceOf, \%as_scope, \%as_member, \%as_role; ##print Dumper $usage; $info->{$w}->{'not_used'} = [ grep (! ( $usage->{as_instanceOf}->{$_} || $usage->{as_instance}->{$_} || $usage->{as_scope}->{$_} || $usage->{as_member}->{$_} || $usage->{as_role}->{$_}), @{$self->topics()}) ]; } elsif ($w eq 'errors') { $usage = $self->_usage() unless $usage; $info->{$w}->{'undefined_topics'} = [ grep (!$self->is_topic($_), (keys %{$usage->{as_instanceOf}}, keys %{$usage->{as_instance}}, keys %{$usage->{as_scope}}, keys %{$usage->{as_member}}, keys %{$usage->{as_role}}) ) ]; } elsif ($w eq 'statistics') { $usage = $self->_usage() unless $usage; #use Data::Dumper; #print STDERR Dumper ($usage); my $clusters = $self->clusters(); my ($tot, $min, $max) = (0, undef, 0); foreach my $c (keys %$clusters) { $tot += scalar @{$clusters->{$c}}; $min = $min ? ($min > scalar @{$clusters->{$c}} ? scalar @{$clusters->{$c}} : $min) : scalar @{$clusters->{$c}}; $max = $max < scalar @{$clusters->{$c}} ? scalar @{$clusters->{$c}} : $max; } $info->{$w} = { nr_topics_defined => scalar @{$self->topics}, nr_assocs => scalar @{$self->associations}, nr_clusters => scalar keys %$clusters, mean_topics_per_cluster => %$clusters ? 1.0 * $tot / scalar keys %$clusters : 1, # empty map => 1 cluster (do not argue with me here) max_topics_per_cluster => $max, min_topics_per_cluster => $min, nr_topics_mentioned => $tot, }; }; # ignore other directives } return $info; } =pod