package Lingua::Align::Classifier::LibSVM; use vars qw(@ISA); use strict; use FileHandle; use IPC::Open3; use Algorithm::SVM; use Algorithm::SVM::DataSet; @ISA = qw( Lingua::Align::Classifier::Megam ); sub new{ my $class=shift; my %attr=@_; my $self={}; bless $self,$class; foreach (keys %attr){ $self->{$_}=$attr{$_}; } return $self; } sub initialize_training{ my $self=shift; $self->{SVM} = new Algorithm::SVM(Type => 'C-SVC', Kernel => 'radial'); $self->{SVM_TRAINSET}=[]; ## if we want to create a file with training data .... ## -------------------------------------------------------------- # $self->{TRAINFILE} = $self->{-training_data} || '__train.'.$$; # $self->{TRAIN_FH} = new FileHandle; # $self->{TRAIN_FH}->open(">$self->{TRAINFILE}") || # die "cannot open training data file $self->{TRAINFILE}\n"; # binmode($self->{TRAIN_FH}, ":utf8"); } sub add_train_instance{ my ($self,$label,$feat,$weight)=@_; if (not ref($self->{SVM})){ $self->initialize_training(); } if ($label==0){$label='-1';} if ($label==1){$label='+1';} if (defined($weight) && ($weight != 1)){ if ($weight<1){ print STDERR "weights are not supported!\n --> use weight=1!\n"; } } else{$weight=1;} my @data=(); foreach (keys %{$feat}){ if (! exists $self->{__FEATIDS__}->{$_}){ $self->{__FEATCOUNT__}++; $self->{__FEATIDS__}->{$_}=$self->{__FEATCOUNT__}; } $data[$self->{__FEATIDS__}->{$_}]=$$feat{$_}; # print STDERR "feature $_ ($self->{__FEATIDS__}->{$_}) = $$feat{$_} \n"; } my $instance = new Algorithm::SVM::DataSet(Label => $label, Data => \@data); # for (my $i=0;$i<$weight;$i++){ push(@{$self->{SVM_TRAINSET}},$instance); # } ## print data instances to a file .... ##---------------------------------------- # my $fh=$self->{TRAIN_FH}; # print $fh $label; # foreach (0..$#data){ # if ($data[$_]){ # # print STDERR "$_:$data[$_]\n"; # print $fh " $_:$data[$_]"; # } # } # print $fh "\n"; } sub train{ my $self = shift; my $model = shift || '__svm.'.$$; $self->{SVM}->train(@{$self->{SVM_TRAINSET}}); # cross validation on training set if ($self->{-verbose}){ my $accuracy = $self->{SVM}->validate(5); print STDERR "accuracy = $accuracy\n"; } $self->{SVM}->save($model); $self->save_feature_ids($model.'.ids',$self->{__FEATIDS__}); ################################ !!!!!!!!!!!!!!!!!!!!!!!!!!!!! ## save feature ids ...... ## ---> need them for feature extraction for aligning!!!!!!!!! ################################ !!!!!!!!!!!!!!!!!!!!!!!!!!!!! return $model; } sub save_feature_ids{ my $self=shift; my ($file,$feat)=@_; open F,">$file" || die "cannot open feature ID file $file\n"; foreach (keys %{$feat}){ print F "$$feat{$_}\t$_\n"; } close F; } sub load_feature_ids{ my $self=shift; my ($file,$feat)=@_; open F,"<$file" || die "cannot open feature ID file $file\n"; while (){ chomp; my ($id,$f)=split(/\t/); $$feat{$f}=$id; } close F; } sub initialize_classification{ my $self=shift; my $model=shift; $self->{__FEATCOUNT__}=0; $self->{__FEATIDS__}={}; # Kernel => 'radial', # Type => 'one-class', # Gamma => 64, # C => 8); $self->{SVM} = new Algorithm::SVM(Model => $model, C => 2048, Gamma => 0.125, Kernel => 'radial', Type => 'C-SVC'); # features = catpos:moses ??? # # Best c=2048.0, g=0.125 CV rate=97.9978 # Training... # Output model: __train.28910.model # svm_type c_svc # kernel_type rbf # gamma 0.125 # nr_class 2 # total_sv 389 # rho 19.5711 # label -1 1 # nr_sv 213 176 $self->{SVM_MODEL} = $model; $self->load_feature_ids($model.'.ids',$self->{__FEATIDS__}); return 1; } sub add_test_instance{ my ($self,$feat)=@_; my $label = $_[2] || 0; if (not ref($self->{TEST_DATA})){ $self->{TEST_DATA}=[]; $self->{TEST_LABEL}=[]; } # if ($label==0){$label='-1';} # if ($label==1){$label='+1';} my @data=(); foreach (keys %{$feat}){ if (! exists $self->{__FEATIDS__}->{$_}){ if ($self->{-verbose}){ print STDERR "feature $_ does not exist! ignore!\n"; } } $data[$self->{__FEATIDS__}->{$_}]=$$feat{$_}; } my $instance = new Algorithm::SVM::DataSet(Label => $label, Data => \@data); push(@{$self->{TEST_DATA}},$instance); push(@{$self->{TEST_LABEL}},$label); } sub classify{ my $self=shift; my $model = shift || '__svm.'.$$; return () if (not ref($self->{TEST_DATA})); if ($self->{SVM_MODEL} ne $model){ $self->initialize_classification($model); } # send input data to the megam process my @scores=(); my @labels=(); foreach my $data (@{$self->{TEST_DATA}}){ my $res=$self->{SVM}->predict($data); my $val=$self->{SVM}->predict_value($data); # my $prob=$self->{SVM}->getSVRProbability(); # if ($res>0){ # print STDERR "!!!!! positive!!!!!!\n"; # } if ($res>0){ # print STDERR "$res ... $val ...\n"; push (@scores,$res); push (@labels,1); } } delete $self->{TEST_DATA}; delete $self->{TEST_LABEL}; return @scores; } 1;