{ 'module' => { 'name' => 'Russian pre-processing', 'submodules' => [ 'pre/ru/tagHunPos', 'pre/ru/malt', ], 'submodule names' => [ 'POS tagger (hunpos)', 'dependency parser (malt)', ], 'stdout' => 'text', }, 'description' => 'This is the pre-processing module for Italian corpora. It includes a basic XML markup tool, a general sentence splitter, and the TreeTagger for Italian which also does tokenization and lemmatization', 'input' => { 'text' => { 'format' => 'xml', 'root' => 's', } }, 'output' => { 'text' => { 'format' => 'xml', 'root' => 's', 'write_mode' => 'overwrite', 'status' => 'tag' } }, 'arguments' => { 'shortcuts' => { 'in' => 'input:text:file', 'out' => 'output:text:file', 'ci' => 'input:text:encoding', 'co' => 'output:text:encoding', } }, 'widgets' => { 'input' => { 'text' => { 'stream name' => 'stream(format=text,language=it)' }, }, } }