#!perl use strict; use Data::Dumper; use Carp; # # This is a SAS Component # =head1 subsystems_to_spreadsheets The subsystem_to_spreadsheets command allows the user to output the entire spreadsheet for a set of input subsystems. The input is a table with a column containing subsystem names. The output is a table with 4 appended columns [Genome,variant-code,role,fid] Example: subsystems_to_spreadsheets [arguments] < input > output The standard input should be a tab-separated table (i.e., each line is a tab-separated set of fields). Normally, the last field in each line would contain the subsystem name. If another column contains the subsystem use -c N where N is the column (from 1) that contains the subsystem. This is a pipe command. The input is taken from the standard input, and the output is to the standard output. =head2 Documentation for underlying call This script is a wrapper for the CDMI-API call subsystems_to_spreadsheets. It is documented as follows: $return = $obj->subsystems_to_spreadsheets($subsystems, $genomes) =over 4 =item Parameter and return types =begin html
$subsystems is a subsystems $genomes is a genomes $return is a reference to a hash where the key is a subsystem and the value is a reference to a hash where the key is a genome and the value is a row subsystems is a reference to a list where each element is a subsystem subsystem is a string genomes is a reference to a list where each element is a genome genome is a string row is a reference to a list containing 2 items: 0: a variant 1: a reference to a hash where the key is a role and the value is a fids variant is a string role is a string fids is a reference to a list where each element is a fid fid is a string=end html =begin text $subsystems is a subsystems $genomes is a genomes $return is a reference to a hash where the key is a subsystem and the value is a reference to a hash where the key is a genome and the value is a row subsystems is a reference to a list where each element is a subsystem subsystem is a string genomes is a reference to a list where each element is a genome genome is a string row is a reference to a list containing 2 items: 0: a variant 1: a reference to a hash where the key is a role and the value is a fids variant is a string role is a string fids is a reference to a list where each element is a fid fid is a string =end text =back =head2 Command-Line Options =over 4 =item -c Column This is used only if the column containing the subsystem is not the last column. =item -i InputFile [ use InputFile, rather than stdin ] =back =head2 Output Format The standard output is a tab-delimited file. It consists of the input file with 4 extra columns added (Genome,Variant,Role,Fid). Input lines that cannot be extended are written to stderr. =cut my $usage = "usage: subsystems_to_spreadsheets [-c column] [g1 g2 g3 ...] < input > output"; use Bio::KBase::CDMI::CDMIClient; use Bio::KBase::Utilities::ScriptThing; my $column; my $input_file; my $kbO = Bio::KBase::CDMI::CDMIClient->new_for_script('c=i' => \$column, 'i=s' => \$input_file); if (! $kbO) { print STDERR $usage; exit } my $ih; if ($input_file) { open $ih, "<", $input_file or die "Cannot open input file $input_file: $!"; } else { $ih = \*STDIN; } while (my @tuples = Bio::KBase::Utilities::ScriptThing::GetBatch($ih, undef, $column)) { my @h = map { $_->[0] } @tuples; my $h = $kbO->subsystems_to_spreadsheets(\@h, \@ARGV); for my $tuple (@tuples) { my ($subsys, $line) = @$tuple; my $v = $h->{$subsys}; if (! $v) { print STDERR $line,"\n"; } else { foreach my $g (sort keys(%$v)) { my $row = $v->{$g}; my($variant,$roleH) = @$row; my @roles = keys(%$roleH); foreach my $role (sort @roles) { my $fids = $roleH->{$role}; foreach my $fid (sort @$fids) { print join("\t",($line,$g,$variant,$role,$fid)),"\n"; } } } } # # Process output here and print. # } }