bin/gonz_sample.pl - metacpan.org

#!/usr/bin/env perl

use strict;
use warnings;
use 5.010;

use Pod::Usage;
use Getopt::Long qw(:config auto_help);
use File::Spec::Functions;
use File::Slurp;

my %opt = ();
GetOptions( \%opt, 'header|h', 'frac=f') or pod2usage(2);

my $f = shift;
pod2usage("$f is no file") unless(-f $f);

<STDIN> if ( $opt{header} );

my $frac     = $opt{frac}


while(<STDIN>) {

      print
    if ( rand() <= $frac );

  my %result;
  for my $g (@go) {
    $result{ join( "\t", @$g ) } = 1 if ( exists( $train_set{ $g->[0] } ) );
  }

  die "not enough data to sample training data set" unless ( keys %result > 0 );

  my $train_file = catfile( $base_dir, "go.cv_train.$i.in" );

  open my $go_train_fh, '>', $train_file or die "Can't open filehandle: $!";
  # no chomp done, so print
  for ( keys %result ) { print $go_train_fh $_; }
  for ( keys %pass ) { print $go_train_fh $_ if ( !exists( $result{$_} ) ); }
  close $go_train_fh;

  # write test and train sets to file, too
  my $cv_split_file = catfile( $base_dir, "go.cv_split.$i.lst" );

  # use say to print also newline
  open my $cv_split_fh, '>', $cv_split_file or die "Can't open filehandle: $!";
  for ( keys %test_set ) { say $cv_split_fh join("\t", $_, 'test'); }
  for ( keys %train_set ) { say $cv_split_fh join("\t", $_, 'train'); }
  close $cv_split_fh;

  system( "gzip", "-f", $cv_split_file );

  system( "gzip", "-f", $train_file );
}

	Global
`s`	Focus search bar
`?`	Bring up this help dialog

	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)

	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse

	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)