#! /usr/bin/perl -w # # Perl Power Tools - cut # Author: Greg Hewgill greg@hewgill.com 1999-03-03 use strict; use vars qw($type $opt_b $opt_c $opt_d $opt_f $opt_s); $type = ''; while (@ARGV) { $_ = $ARGV[0]; last if /^-$/; # a single - means stdin last unless /^-/; # doesn't start with -, so we are into filenames shift @ARGV; if (/^-b(.*)/) { if ($type) { print STDERR "$0: only one type of list may be specified\n"; usage_exit(1); } $opt_b = $1 || shift @ARGV; $type = 'b'; } elsif (/^-c(.*)/) { if ($type) { print STDERR "$0: only one type of list may be specified\n"; usage_exit(1); } $opt_c = $1 || shift @ARGV; $type = 'c'; } elsif (/^-d(.*)/) { $opt_d = $1 || shift @ARGV; } elsif (/^-f(.*)/) { if ($type) { print STDERR "$0: only one type of list may be specified\n"; usage_exit(1); } $opt_f = $1 || shift @ARGV; $type = 'f'; } elsif (/^-n/) { # -n ignored } elsif (/^-s/) { $opt_s = 1; } elsif (/^--$/) { last; # allow -- to terminate option processing } else { print STDERR "$0: invalid option $_\n"; usage_exit(1); } } unless ($type) { print STDERR "$0: you must specify a list of bytes, characters, or fields\n"; usage_exit(1); } if ($opt_d && $type ne 'f') { print STDERR "$0: a delimiter may only be specified when operating on fields\n"; usage_exit(1); } $opt_d ||= "\t"; # tab is the default delimiter if ($type eq 'b' || $type eq 'c') { my @ranges = parse_range($opt_b || $opt_c); # adjust each 'end' value to be 'len' instead foreach my $r (@ranges) { $r->[1] = $r->[1] - $r->[0] + 1 if $r->[1]; } while (<>) { chomp; foreach my $r (@ranges) { if ($r->[0] < length) { print $r->[1] ? substr($_, $r->[0], $r->[1]) : substr($_, $r->[0]); } } print "\n"; } } elsif ($type eq 'f') { my @ranges = parse_range($opt_f); while (<>) { chomp; next if ($opt_s && $_ !~ /$opt_d/o); my @a = split /$opt_d/o; my @out = (); foreach my $r (@ranges) { if ($r->[0] < @a) { # ensure the end field is within range my $e = $r->[1] > $#a ? $#a : $r->[1]; push @out, join $opt_d, @a[$r->[0]..$e]; } } print join $opt_d, @out; print "\n"; } } sub parse_range { local $_ = $_[0]; unless ($_) { print STDERR "$0: empty range specification\n"; usage_exit(1); } my @r = (); foreach (split /,/) { if (/^(\d+)$/) { push @r, [$1-1, $1-1]; } elsif (/^(\d+)-$/) { push @r, [$1-1, undef]; } elsif (/^(\d+)-(\d+)$/) { if ($1 > $2) { print STDERR "$0: invalid byte or field list\n"; usage_exit(1); } push @r, [$1-1, $2-1]; } elsif (/^-(\d+)$/) { push @r, [0, $1-1]; } else { print STDERR "$0: invalid byte or field list\n"; usage_exit(1); } } return @r; } sub usage_exit { print STDERR < - select portions of each of a file =head1 SYNOPSIS B -B I [-B] [I ...] B -B I [I ...] B -B I [-B I] [-B] [I ...] =head1 DESCRIPTION The B utility selects portions of each line (as specified by I) from each I (or the standard input by default), and writes them to the standard output. The items specified by I can be in terms of column position or in terms of fields delimited by a special character. Column numbering starts from 1. I is a comma separated set of increasing numbers and/or number ranges. Number ranges consist of a number, a dash (`-'), and a second number and select the fields or columns from the first number to the second, inclusive. Numbers or number ranges may be preceded by a dash, which selects all fields or columns from 1 to the first number. Numbers or number ranges may be followed by a dash, which selects all fields or columns from the last number to the end of the line. Numbers and number ranges may be repeated, overlapping, and in any order. It is not an error to select fields or columns not present in the input line. The options are as follows: -B I The I specifies byte positions. -B I The I specifies character positions. -B I Use the first character of I as the field delimiter character instead of the tab character. -B I The I specifies fields, delimited in the input by a single tab character. Output fields are separated by a single tab character. -B Do not split multi-byte characters. -B Suppresses lines with no field delimiter characters. Unless specified, lines with no delimiters are passed through unmodified. The cut utility exits 0 on success or 1 if an error occurred. =head1 BUGS Bytes and characters are treated identically. No support is provided for multi-byte character sets. =head1 AUTHOR Greg Hewgill 1999-03-03 =head1 COPYRIGHT and LICENSE This program is copyright by Greg Hewgill 1999. This program is free and open software. You may use, copy, modify, distribute and sell this program (and any modified variants) in any way you wish, provided you do not restrict others to do the same.