The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.



package DataCube::FileUtils::FileReader;

use strict;
use warnings;
use Time::HiRes;
use Fcntl;
use DataCube;
use DataCube::Schema;
use DataCube::FileUtils;
use Storable qw(nstore retrieve);

sub new {
    my($class,%opts) = @_;
    bless { %opts }, ref($class) || $class;
}

sub read {
    my($self,$file) = @_;
    open(my $F, '<' , $file) or die
        "DataCube::FileUtils::FileReader(read):\ncant open file:\n$file\n$!\n";
    my $fields  = <$F>;
    chomp($fields);
    my @fields  = split/\t/,$fields,-1;
    my %fields  = map { $fields[$_] => $_ } 0 .. $#fields;
    my %reverse = reverse %fields;
    $self->{fields}  = \%fields;
    $self->{handle}  = $F;
    $self->{reverse} = \%reverse;
    $self->{columns} = $#fields;
    $self->{nfields} = $#fields + 1;
    return $self; 
}

sub nextrow_hashref {
    my($self) = @_;
    my $F     = $self->{handle};
    my $line  = <$F>;
    return unless defined $line;
    chomp($line);
    return $self->nextrow_hashref unless length($line);
    my @line  = split/\t/,$line,-1;
    my %data;
    $data{$self->{reverse}->{$_}} = $line[$_] for 0 .. $self->{columns}; 
    return \%data;
}

sub slurp {
    my($self,$file) = @_;
    sysopen(my $F, $file, O_RDONLY)
        or die "DataCube::FileUtils::FileReader(slurp):\ncant sysopen:\n$file\n$!\n";
    my $size = -s($file);
    my $read = sysread($F, my $data, $size);
    die "DataCube::FileUtils::FileReader(slurp | bytes):\nsysread return: $read bytes\nwanted to get:  $size bytes\n$!\n"
        unless $size == $read;
    close $F;
    my @lines = split/\n+/,$data;
    $data = undef;
    return () unless @lines;
    my $head   = shift @lines;
    my @heads  = split/\t/,$head,-1;
    my %lookup = map { $_ => $heads[$_] } 0 .. $#heads;  
    my @results;
    while(my $line = shift @lines){
        my @line = split/\t/,$line,-1;
        my %data = map { $lookup{$_} => $line[$_] } 0 .. $#heads;
        push @results, \%data;
    }
    return @results;
}








1;









__END__


=head1 NAME

DataCube::FileUtils::FileReader - Read tab separated value files with header.

=head1 SYNOPSIS

    use strict;
    use warnings;
    use DataCube::FileUtils::FileReader;

    my $file   = 'some_flat_file.tsv';

    my $reader = DataCube::FileUtils::FileReader->new;

    $reader->read($file);

    while($reader->next_line){
        printf "\tcolumn_a  =>  %s\n",$reader->column_a;
        printf "\tcolumn_b  =>  %s\n",$reader->column_b;
        printf "\tcolumn_c  =>  %s\n",$reader->column_c;
        print '-' x 80,"\n";
    }




=head1 AUTHOR

David Williams, E<lt>david@namimedia.comE<gt>

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2009 by David Williams

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.8.8 or,
at your option, any later version of Perl 5 you may have available.


=cut