#!/usr/bin/perl use strict; use warnings; use lib qw( t/lib ); use Test::More; use Test::Framework; use Fcntl qw( :seek ); use File::BOM qw( %enc2bom ); # Expected data for "moose" tests (below) our %should_be = ( 'UTF-8' => "\x{ef}\x{bb}\x{bf}m\x{c3}\x{b8}\x{c3}\x{b8}se\x{e2}\x{80}\x{a6}", 'UTF-16BE' => "\x{fe}\x{ff}\x{0}m\x{0}\x{f8}\x{0}\x{f8}\x{0}s\x{0}e &", 'UTF-16LE' => "\x{ff}\x{fe}m\x{0}\x{f8}\x{0}\x{f8}\x{0}s\x{0}e\x{0}& ", 'UTF-32BE' => "\x{0}\x{0}\x{fe}\x{ff}\x{0}\x{0}\x{0}m\x{0}\x{0}\x{0}\x{f8}\x{0}\x{0}\x{0}\x{f8}\x{0}\x{0}\x{0}s\x{0}\x{0}\x{0}e\x{0}\x{0} &", 'UTF-32LE' => "\x{ff}\x{fe}\x{0}\x{0}m\x{0}\x{0}\x{0}\x{f8}\x{0}\x{0}\x{0}\x{f8}\x{0}\x{0}\x{0}s\x{0}\x{0}\x{0}e\x{0}\x{0}\x{0}& \x{0}\x{0}", ); plan tests => 2 * @test_files + 6 * keys(%enc2bom) + keys(%should_be) + 2; # Work around bug in older PerlIO::via # The PerlIO::via version number was not incremented when the bug was fixed. my $compat = $] >= 5.008007 ? '' : ':utf8'; # Ignore known harmless warning local $SIG{__WARN__} = sub { my $warning = "@_"; if ($warning !~ /^UTF-(?:16|32)LE:Partial character/) { warn $warning; } }; for my $test_file (@test_files) { ok( open(FH, "<:via(File::BOM)$compat", $file2path{$test_file}), "$test_file: opened through layer" ) or diag "$test_file: $!"; my $line = ; chomp $line; is($line, $filecontent{$test_file}, "$test_file: read OK through layer") or diag("HEX: ".hexdump($line)); close FH; } for my $enc (sort keys %enc2bom) { my $file = "test_file-$enc.txt"; ok( open(BOM_OUT, ">:encoding($enc):via(File::BOM)$compat", $file), "Opened file for writing $enc via layer" ) or diag "$file: $!"; my $line_one = "Unicode text\x{2026}"; my $test = print(BOM_OUT "$line_one\n"); ok($test, 'print() through layer') or diag("print() returned ". (defined($test)?$test:'undef')); my $line_two = "\x{62cd}\x{8ce3}"; $test = print(BOM_OUT "$line_two\n"); ok($test, 'print() through layer again') or diag("print() returned ". (defined($test)?$test:'undef')); close BOM_OUT; # check BOM if (open my $fh, '<:bytes', $file) { read $fh, my $sample, $File::BOM::MAX_BOM_LENGTH; like($sample, qr/^\Q$enc2bom{$enc}/, "BOM written correctly"); close $fh; } else { diag "Couldn't open $file: $!"; fail(1); } # now re-read my $line; open(BOM_IN, "<:via(File::BOM)$compat", $file); $line = ; chomp $line; is($line, $line_one, 'BOM was written successfully via layer'); $line = ; chomp $line; is($line, $line_two, 'BOM not written in second print call'); close BOM_IN; unlink $file or diag "Couldn't remove $file: $!"; } # Mark Fowler's "moose" test: { # This is 'moose...' (with slashes in the 'o's them, and the '...' # as one char). As the '...' can't be represented in latin-1 then # perl will store the thing internally as a utf8 string with the # utf8 flag enabled. my $moose = "m\x{f8}\x{f8}se\x{2026}"; for my $enc (keys %should_be) { my $file = "moose-$enc.txt"; open(FH, ">:encoding($enc):via(File::BOM)$compat", $file) or die "Can't write to $file: $!\n"; print FH $moose; close FH; open(FH, '<', $file) or die "Can't read $file: $!\n"; local $/ = undef; my $value = ; close FH; is( reasciify($value), reasciify($should_be{$enc}), "check file for $enc" ); unlink $file or diag "Can't remove '$file': $!"; } } # Spurkis' seek test { use utf8; my $file = 't/data/utf8_data.csv'; open my $fh, '>:utf8', $file or die "Can't write $file: $!"; print $fh <<"END_DATA"; \x{feff}id,street,town,pc,country,english,french,chinese,arabic '10,"écoles",zoom,12,france,auctions,"Enchères","拍賣","مزاد" END_DATA open $fh, '<:via(File::BOM)', $file or die "Can't read $file: $!\n"; my $first_line = <$fh>; my $pos = tell($fh); # position of second line my $rest = join('', <$fh>); seek($fh, 0, SEEK_SET) or die "Couldn't seek: $!"; my $new_first_line = <$fh>; seek($fh, $pos, SEEK_SET) or die "Couldn't seek: $!"; my $new_rest = join('', <$fh>); is($new_first_line, $first_line, "seek() works"); is($new_rest, $rest, "tell() works") or diag "Position was $pos"; close $fh; unlink $file or warn "Couldn't remove $file: $!\n"; } # sub for moose test sub reasciify { my $string = shift; $string = join "", map { my $ord = ord($_); ($ord > 127 || ($ord < 32 && $ord != 10)) ? sprintf '\x{%x}', $ord : $_ } split //, $string } __END__ vim: ft=perl