The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#! /usr/bin/perl -w
use strict;
use 5.010;
use # Hide from release warnings
Data::Dump 'dump';

# Structure of the data we're decoding...

open my $target_fh, '>', 'lib/Lingua/EN/Inflexion/Verbs.pm';
select $target_fh;

my $COMMENT_LINE_PAT = qr{ \A \s* \# }xms;
my $COMMENT_PAT      = qr{ \# .* }xms;
my $BLANK_LINE_PAT   = qr{ \A \s* $ }xms;
my $WS               = qr{ [\s]* }xms;
my $WORD_SEQ         = qr{ \S* (?: \s \S+)* }xms;
my $DATA_PAT         = qr{
    \A
        $WS
        ([*-])?+ ( $WORD_SEQ )      # 3rd person singular
        $WS
        ([*-])?+ ( $WORD_SEQ )      # 3rd person plural
        $WS
        ([*-])?+ ( $WORD_SEQ )      # Simple past (preterite)
        $WS
        ([*-])?+ ( $WORD_SEQ )      # Present continuous participle
        $WS
        ([*-])?+ ( $WORD_SEQ )      # Past participle
        $WS
        $COMMENT_PAT?               # Optional trailing comment
    \Z
}xms;

# The data structures we're building...
my %singular_of;
my %plural_of;
my %past_of;
my %pres_part_of;
my %past_part_of;

my %singular_word;
my %plural_word;
my %past_word;
my %pres_part_word;
my %past_part_word;

# Access verb data...
open my $verb_data_fh, '<', 'verbs.lei';

# Collect and categorize data...
INFLECTION:
while (my $line = readline $verb_data_fh) {

    # Skip noise...
    next if $line =~ $COMMENT_LINE_PAT
         || $line =~ $BLANK_LINE_PAT;

    # Extract data...
    my ($sing_gen, $sing, $plur_gen, $plur, $pret_gen, $pret, $pres_gen, $pres, $past_gen, $past)
        = $line =~ $DATA_PAT
            or die "Unknown input: $line";

    # Expand macros...
    for my $term ($sing, $plur, $pret, $pres, $past) {
        $term =~ s{\(CONS\)}{[^aeiou]}gxms;
        $term =~ s{\(VOWEL\)}{[^aeiou]}gxms;
        $term =~ s{\(VOWELY\)}{[^aeiouy]}gxms;
    }
    for my $gen ($sing_gen, $plur_gen, $pret_gen, $pres_gen, $past_gen) {
        $gen //= '';
        next if !$gen;
        $gen =~ s{-}{.+};
        $gen =~ s{\*}{.*};
    }

    # Build pattern tables...
    if ($sing_gen || $plur_gen || $pret_gen || $pres_gen || $past_gen) {
        # Remove generic markers and optional restriction...
        my $sing_restriction = $sing =~ s{ ( \[.*?\] )+ }{}xms ? $1 : q{};
        my $plur_restriction = $plur =~ s{ ( \[.*?\] )+ }{}xms ? $1 : q{};
        my $pret_restriction = $pret =~ s{ ( \[.*?\] )+ }{}xms ? $1 : q{};
        my $pres_restriction = $pres =~ s{ ( \[.*?\] )+ }{}xms ? $1 : q{};
        my $past_restriction = $past =~ s{ ( \[.*?\] )+ }{}xms ? $1 : q{};

        # Add the pattern data to the list of generics...
        push @{$plural_of{pattern}},    { is   => "($plur_gen$plur_restriction)$plur",
                                          from => "(.*$sing_restriction)$sing",
                                          to   => '${1}'.$plur
                                        };
        push @{$singular_of{pattern}},  { is   => "($sing_gen$sing_restriction)$sing",
                                          from => "(.*$plur_restriction)$plur",
                                          to   => '${1}'.$sing
                                        };
        if ($pret ne '_') {
            push @{$past_of{pattern}},  { is   => "($pret_gen$pret_restriction)$pret",
                                          from => "(.*$sing_restriction)$sing",
                                          to   => '${1}'.$pret
                                        };
            push @{$past_of{pattern}},  { from => "(.*$plur_restriction)$plur",
                                          to   => '${1}'.$pret
                                        };
        }
        if ($pres ne '_') {
            push @{$pres_part_of{pattern}}, { is   => "($pres_gen$pres_restriction)$pres",
                                              from => "(.*$sing_restriction)$sing",
                                              to   => '${1}'.$pres
                                            };
            push @{$pres_part_of{pattern}}, { from => "(.*$plur_restriction)$plur",
                                              to   => '${1}'.$pres
                                            };
        }
        if ($past ne '_') {
            push @{$past_part_of{pattern}}, { is   => "($past_gen$past_restriction)$past",
                                              from => "(.*$sing_restriction)$sing",
                                              to   => '${1}'.$past
                                            };
            push @{$past_part_of{pattern}}, { from => "(.*$plur_restriction)$plur",
                                              to   => '${1}'.$past
                                            };
        }
    }

    # Build literal tables (unless purely a generic rule)...
    if (!($sing_gen && $plur_gen && $pret_gen)) {
        # Build literal tables...
        $plural_of{literal}{$sing}   //= $plur;
        $singular_of{literal}{$plur} //= $sing;

        $singular_word{$sing} //= 1;
        $plural_word{$plur}   //= 1;

        my $pret_plur;
        if ($pret =~ /(.*?) [|] (.*)/xms) {
            ($pret, $pret_plur) = ($1, $2);
        }

        if ($pret ne '') {
            $past_word{$pret} //= 1;

            $past_of{literal}{$sing} //= $pret;
            $past_of{literal}{$past} //= $pret;
            $past_of{literal}{$pres} //= $pret;
            $past_of{literal}{$past} //= $pret;

            if (defined $pret_plur) {
                $past_of{literal}{$plur} //= $pret_plur;
                $past_word{$pret_plur}   //= 1;
            }
            else {
                $past_of{literal}{$plur} //= $pret;

            }
        }

        if ($pres ne '') {
            $pres_part_word{$pres} //= 1;

            $pres_part_of{literal}{$sing} //= $pres;
            $pres_part_of{literal}{$plur} //= $pres;
            $pres_part_of{literal}{$pret} //= $pres;
            $pres_part_of{literal}{$pres} //= $pres;
            $pres_part_of{literal}{$past} //= $pres;
        }

        if ($past ne '') {
            $past_part_word{$past} //= 1;

            $past_part_of{literal}{$sing} //= $past;
            $past_part_of{literal}{$plur} //= $past;
            $past_part_of{literal}{$pret} //= $past;
            $past_part_of{literal}{$pres} //= $past;
            $past_part_of{literal}{$past} //= $past;
        }

        # Allow hyphenated terms without the hyphens...
        if ($sing =~ s{-}{ }gxms) {
             $plur =~ s{-}{ }gxms;
             $pret =~ s{-}{ }gxms;
             $pres =~ s{-}{ }gxms;
             $past =~ s{-}{ }gxms;
             redo;
        }
    }
}

# Emit standard header...
say '##########################################';
say '## NOTE: This module was autogenerated. ##';
say '## Contains no user-servicable parts!!! ##';
say '##########################################';
say '';
say 'package Lingua::EN::Inflexion::Verbs;';
say 'use 5.010; use strict; use warnings;';
say 'no if $] >= 5.018, warnings => "experimental::smartmatch";';
say '';

my ($sec,$min,$hour,$day,$mon,$year) = localtime;
say 'our $VERSION = ', sprintf("%04d%02d%02d.%02d%02d%02d", $year+1900, $mon+1, $day, $hour, $min, $sec), ';';
say '';

# Emit tables...
say 'my $plural_of = ', dump($plural_of{literal}) . ';';
say '';
say 'my $singular_of = ', dump($singular_of{literal}) . ';';
say '';
say 'my $past_of = ', dump($past_of{literal}) . ';';
say '';
say 'my $pres_part_of = ', dump($pres_part_of{literal}) . ';';
say '';
say 'my $past_part_of = ', dump($past_part_of{literal}) . ';';
say '';
say 'my $is_plural = {}; @{$is_plural}{values %{$plural_of} } = ();';
say '                    @{$is_plural}{keys %{$singular_of} } = ();';
say '                    @{$is_plural}{values %{$past_of} } = ();';
say '                    @{$is_plural}{values %{$pres_part_of} } = ();';
say '                    @{$is_plural}{values %{$past_part_of} } = ();';
say '';
say 'my $is_singular = {}; @{$is_singular}{values %{$singular_of} } = ();';
say '                      @{$is_singular}{keys %{$plural_of} } = ();';
say '                      @{$is_singular}{values %{$past_of} } = ();';
say '                      @{$is_singular}{values %{$pres_part_of} } = ();';
say '                      @{$is_singular}{values %{$past_part_of} } = ();';
say '';
say 'my $is_past = {}; @{$is_past}{values %{$past_of} } = ();';
say '';
say 'my $is_past_part = {}; @{$is_past_part}{values %{$past_part_of} } = ();';
say '';
say 'my $is_pres_part = {}; @{$is_pres_part}{values %{$pres_part_of} } = ();';
say '';


# Emit pattern-based inflection subroutines...
emit_converter( 'plural',    $plural_of{pattern}    );
say '';
emit_converter( 'singular',  $singular_of{pattern}  );
say '';
emit_converter( 'past',      $past_of{pattern} );
say '';
emit_converter( 'pres_part', $pres_part_of{pattern} );
say '';
emit_converter( 'past_part', $past_part_of{pattern} );
say '';


# Emit pattern-based recognition subroutines...
emit_recognizer( 'plural',    'singular', @{$plural_of{pattern}} );
say '';
emit_recognizer( 'singular',  'plural',   @{$singular_of{pattern}}   );
say '';
emit_recognizer( 'past',       undef,     @{$past_of{pattern}}   );
say '';
emit_recognizer( 'pres_part',  undef,     @{$pres_part_of{pattern}}   );
say '';
emit_recognizer( 'past_part',  undef,     @{$past_part_of{pattern}}   );
say '';

# Emit stupid trailing package terminator...
say '1;';


# Generate test files...
{
    # 1. is_singular()

    open my $test_file_fh, '>', 't/verb_is_singular.t';
    select $test_file_fh;

    say 'use Test::More;';
    say 'use Lingua::EN::Inflexion;';
    say '';

    for my $singular (keys %singular_word) {
        say "ok verb(q{$singular})->is_singular  => q{is_singular: '$singular'};";
    }

    say '';
    say 'done_testing();';
}
{
    # 2. is_plural()

    open my $test_file_fh, '>', 't/verb_is_plural.t';
    select $test_file_fh;

    say 'use Test::More;';
    say 'use Lingua::EN::Inflexion;';
    say '';

    for my $plural (keys %plural_word) {
        say "ok verb(q{$plural})->is_plural  => q{is_plural: '$plural'};";
    }

    say '';
    say 'done_testing();';
}
{
    # 3. plural()

    open my $test_file_fh, '>', 't/verb_plural.t';
    select $test_file_fh;

    say 'use Test::More;';
    say 'use Lingua::EN::Inflexion;';
    say '';

    for my $sing (keys %{$plural_of{literal}}) {
        my $plural = $plural_of{literal}{$sing};

        say "is verb(q{$sing})->plural,   q{$plural}  => q{plural: '$sing' --> '$plural'};";
        say "is verb(q{$plural})->plural, q{$plural}  => q{plural: '$plural' --> '$plural'};"
            if !$singular_word{$plural};
    }

    say '';
    say 'done_testing();';
}
{
    # 4. singular()

    open my $test_file_fh, '>', 't/verb_singular.t';
    select $test_file_fh;

    say 'use Test::More;';
    say 'use Lingua::EN::Inflexion;';
    say '';

    for my $plural (keys %plural_word) {
        my $sing = $singular_of{literal}{$plural} // next;

        say "is verb(q{$plural})->singular, q{$sing}  => q{singular: '$plural' --> '$sing'};";
        say "is verb(q{$sing})->singular,   q{$sing}  => q{singular: '$sing' --> '$sing'};"
            if !$plural_word{$sing};
    }

    say '';
    say 'done_testing();';
}
{
    # 5. past()

    open my $test_file_fh, '>', 't/verb_past.t';
    select $test_file_fh;

    say 'use Test::More;';
    say 'use Lingua::EN::Inflexion;';
    say '';

    for my $verb (keys %{$past_of{literal}}) {
        my $past = $past_of{literal}{$verb};
        next if $verb eq q{} || $past eq '_';

        say "is verb(q{$verb})->past, q{$past}  => q{past '$verb' --> '$past'};";
    }

    say '';
    say 'done_testing();';
}
{
    # 6. pres_part()

    open my $test_file_fh, '>', 't/verb_pres_part.t';
    select $test_file_fh;

    say 'use Test::More;';
    say 'use Lingua::EN::Inflexion;';
    say '';

    for my $verb (keys %{$pres_part_of{literal}}) {
        my $pres_part = $pres_part_of{literal}{$verb};
        next if $verb eq q{} || $pres_part eq '_';

        say "is verb(q{$verb})->pres_part, q{$pres_part}  => q{pres_part '$verb' --> '$pres_part'};";
    }

    say '';
    say 'done_testing();';
}
{
    # 7. past_part()

    open my $test_file_fh, '>', 't/verb_past_part.t';
    select $test_file_fh;

    say 'use Test::More;';
    say 'use Lingua::EN::Inflexion;';
    say '';

    for my $verb (keys %{$past_part_of{literal}}) {
        my $past_part = $past_part_of{literal}{$verb};
        next if $verb eq q{} || $past_part eq '_';

        say "is verb(q{$verb})->past_part, q{$past_part}  => q{past_part '$verb' --> '$past_part'};";
    }

    say '';
    say 'done_testing();';
}

#====[ Utility subs ]=====================================

sub emit_converter {
    my ($name, $replacement_suffixes) = @_;

    my $type = $name;

    # Generate subroutine structure...
    say "sub convert_to_${name} {";
    say '    my ($word) = @_;';
    say "    return \$${name}_of->{\$word}    if exists \$${name}_of->{\$word};";
    say "    return \$${name}_of->{lc \$word} if exists \$${name}_of->{lc \$word};";
    say "    return \$word                    if is_$type(\$word);";
    say '    given ($word) {';

    # Generate each alternative replacement pattern...
    for my $replacement_ref ( @{$replacement_suffixes} ) {
        say '        when (m{' . $replacement_ref->{from} . '$}i) { return "' . $replacement_ref->{to} .'"; }';
    }

    # Subroutine defaults to failure...
    say '        default { return "_"; }';
    say '    }';
    say '}';
}

sub emit_recognizer {
    my ($name, $compl_name, @replacement_suffixes) = @_;

    # Generate subroutine structure...
    say "sub is_${name} {";
    say '    my ($word) = @_;';
    say "    return 1 if exists \$is_${name}\->{\$word};";
    say "    return 1 if exists \$is_${name}\->{lc \$word};";
    if ($compl_name) {
        say "    return 0 if exists \$is_${compl_name}\->{\$word};";
        say "    return 0 if exists \$is_${compl_name}\->{lc \$word};";
    }
    say '    given ($word) {';

    # Generate each alternative replacement pattern...
    for my $replacement_ref ( @replacement_suffixes ) {
        next if !$replacement_ref->{is};
        say '        when (m{\A' . $replacement_ref->{is} . '$}i) { return 1 }';
    }

    # Subroutine defaults to failure...
    if ($name eq 'singular') {
        say '        default { return !is_plural($word); }';
    }
    else {
        say '        default { return 0; }';
    }
    say '    }';
    say '}';
}