#!/usr/bin/perl use strict; use vars qw($VERSION); $VERSION = "0.1"; =pod =head1 NAME kif2atm - KIF to AsTMa= converter =head1 SYNOPSIS cat ontology.kif | kif2atm ... > ontology.atm =head1 DESCRIPTION This program reads the taxonomy definitions provided in KIF and returns a topic map in AsTMa= format. http://astma.bond.edu.au/ All concepts and their subclass-superclass relations will be output in topic map form on STDOUT. All other information (first order logic rules) is currently ignored. All conversion problems and warnings will go to STDERR. =head1 OPTIONS =over =item B ...does hopefully what you would expect. =cut my $help; =pod =item B Writes out warnings onto STDERR. =cut my $warn; =pod =item B Writes out statistical information at the end of the run to STDERR. =cut my $stats; =pod =item B (number) If provided, limits the number of processed KIF sentences. =cut my $limit; =pod =item B (number) If provided all lines will be skipped in the KIF file and parsing will begin at this line number. =cut my $start_line_nr; =pod =back =head1 KIF Notation The full notation is documented at L =head1 AsTMa (Asymptotic Topic Map), the Notation Please refer to the online documentation L =head1 AUTHOR INFORMATION Copyright 200[4], Robert Barta , All rights reserved. =cut use Data::Dumper; use Getopt::Long; if (!GetOptions ('help|?|man' => \$help, 'warn' => \$warn, 'stats' => \$stats, 'start:i' => \$start_line_nr, 'limit:i' => \$limit, ) || $help) { use Pod::Usage; pod2usage(-exitstatus => 0, -verbose => 2); } my %stats = (nr_sent => 0, nr_sent_ign => 0, nr_sent_unh => 0); # for statistics #-- this is necessary as some concepts have non-id characters in it my %trans = ('=>' => 'sumo-implies', '<=>' => 'sumo-equiv', '?THING' => 'Thing'); sub _cleanse { my $t = shift; return $trans{$t} ? $trans{$t} : $t; } # here we collect our information my %topics; # all information about one topic my @topics; # just to know the sequence (makes it easier to compare with original KIF my @inverses; use TM::Ontology::KIF; my $kif = new TM::Ontology::KIF (defined $start_line_nr ? (start_line_nr => $start_line_nr) : (), defined $limit ? (sentence_limit => $limit) : (), sentence => sub { my $s = shift; my $t = _cleanse ($s->[1]->[0]); $stats{nr_sent}++; warn "sentence: $stats{nr_sent} ($t)" if $warn; $topics{$t}->{name} = '=>' if $t eq 'sumo-implies'; $topics{$t}->{name} = '<=>' if $t eq 'sumo-equiv'; if ($s->[0] eq 'instance') { push @topics, $t unless $topics{$t}; push @{$topics{$t}->{'is-a'}}, _cleanse ($s->[1]->[1]); } elsif ($s->[0] eq 'subAttribute') { push @topics, $t unless $topics{$t}; push @{$topics{$t}->{subclasses}}, _cleanse ($s->[1]->[1]); } elsif ($s->[0] eq 'contraryAttribute') { push @topics, $t unless $topics{$t}; $topics{$t}->{contrary} = _cleanse ($s->[1]->[1]); } elsif ($s->[0] eq 'exhaustiveAttribute') { push @topics, $t unless $topics{$t}; $topics{$t}->{exhaustive} = $s->[1]; } elsif ($s->[0] eq 'disjointDecomposition') { push @topics, $t unless $topics{$t}; $topics{$t}->{'disjointDecomposition'} = $s->[1]; } elsif ($s->[0] eq 'disjoint') { push @topics, $t unless $topics{$t}; $topics{$t}->{'disjoint'} = _cleanse ($s->[1]->[1]); } elsif ($s->[0] eq 'inverse') { push @topics, $t unless $topics{$t}; $topics{$t}->{'inverse'} = _cleanse ($s->[1]->[1]); } elsif ($s->[0] eq 'domain') { push @topics, $t unless $topics{$t}; do { warn "cannot handle expressions as domains"; return; } if ref ($s->[1]->[2]); push @{$topics{$t}->{'domains'}}, _cleanse ($s->[1]->[2]); # push @{$topics{$t}->{'domains'}}, ref($s->[1]->[2]) eq 'ARRAY' ? @{$s->[1]->[2]} : _cleanse ($s->[1]->[2]); } elsif ($s->[0] eq 'range') { push @topics, $t unless $topics{$t}; $topics{$t}->{'range'} = _cleanse ($s->[1]->[2]); } elsif ($s->[0] eq 'documentation') { push @topics, $t unless $topics{$t}; $topics{$t}->{'documentation'} = $s->[1]->[1]; } elsif ($s->[0] eq 'rangeSubclass') { push @topics, $t unless $topics{$t}; push @{$topics{$t}->{subclasses}}, _cleanse ($s->[1]->[1]); } elsif ($s->[0] eq 'subrelation') { push @topics, $t unless $topics{$t}; push @{$topics{$t}->{subclasses}}, _cleanse ($s->[1]->[1]); } elsif ($s->[0] eq 'subclass') { push @topics, $t unless $topics{$t}; push @{$topics{$t}->{subclasses}}, _cleanse ($s->[1]->[1]); } elsif ($s->[0] eq 'disjointRelation') { warn "ignored $s->[0]" if $warn; $stats{nr_sent_ign}++; } elsif (grep ($s->[0] eq $_, qw(trichotomizingOn initialList equal exists relatedInternalConcept domainSubclass partition => <=> and or not identityElement))) { warn "ignored $s->[0]" if $warn; $stats{nr_sent_ign}++; } else { use Data::Dumper; warn "unhandled sentence: ". Data::Dumper::Dumper ($s) if $warn; $stats{nr_sent_unh}++; } }, ); use IO::Handle; my $input = new IO::Handle; $input->fdopen(fileno(STDIN),"r") || die "Cannot process STDIN"; eval { $kif->parse ($input); }; warn $@ if $@; $input->close; use POSIX qw(strftime); print "#-- # # SUMO (Suggested Upper Merged Ontology) # # Converted to Topic Maps by kif2atm ($VERSION) # # ".(strftime "%a %b %e %H:%M:%S %Y", localtime)." "; foreach my $t (@topics) { print " #-- $t ".($topics{$t}->{'is-a'} ? "(".join (" ", @{$topics{$t}->{'is-a'}}).")" : ''); print " bn: "._clean ($topics{$t}->{'name'}) if $topics{$t}->{'name'}; print " in: "._clean ($topics{$t}->{'documentation'}) if $topics{$t}->{'documentation'}; foreach my $s (@{$topics{$t}->{'subclasses'}}) { print " (is-subclass-of) superclass : $s subclass : $t "; $stats{nr_assocs}++; } if ($topics{$t}->{'domains'}) { if ($topics{$t}->{'range'}) { # oh, a function print " (function-has-domains) function : $t range : $topics{$t}->{'range'} domain : ".join (" ", @{$topics{$t}->{'domains'}})." "; } else { # a relation print " (relation-has-domains) relation : $t domain : ".join (" ", @{$topics{$t}->{'domains'}})." "; } $stats{nr_assocs}++; } if ($topics{$t}->{'inverse'}) { print " (are-inverse) relations : $t $topics{$t}->{'inverse'} "; $stats{nr_assocs}++; } if ($topics{$t}->{'disjointDecomposition'}) { shift @{$topics{$t}->{'disjointDecomposition'}}; # we do not need the topic itself print " (is-disjointly-decomposed) whole : $t component : ".join (" ", @{$topics{$t}->{'disjointDecomposition'}})." "; $stats{nr_assocs}++; } if ($topics{$t}->{'disjoint'}) { print " (are-disjoint) objects : $t $topics{$t}->{'disjoint'} "; $stats{nr_assocs}++; } if ($topics{$t}->{'contrary'}) { print " (are-contrary-attributes) attributes : $t $topics{$t}->{'contrary'} "; $stats{nr_assocs}++; } if ($topics{$t}->{'exhaustive'}) { shift @{$topics{$t}->{'exhaustive'}}; # we do not need the topic itself print " (are-exhaustive-attributes-for) attribute : $t values : ".join (" ", @{$topics{$t}->{'exhaustive'}})." "; $stats{nr_assocs}++; } } if ($stats) { print STDERR " --- Nr of sentences processed: $stats{nr_sent} ($stats{nr_sent_ign} ignored, $stats{nr_sent_unh} not handled) Nr of topics: ".@topics." Nr of associations: $stats{nr_assocs} "; } sub _clean { my $s = shift; $s =~ s/\n/\\\n/g; $s =~ s/^"//; $s =~ s/"$//; $s =~ s/&%//g; return $s; } __END__