#! /usr/bin/perl #--------------------------------------------------------------------- # 20-open.t # Copyright 2012 Christopher J. Madsen # # Actually open files and check the encoding #--------------------------------------------------------------------- use strict; use warnings; use Test::More 0.88; plan tests => 85; use IO::HTML; use File::Temp; use Scalar::Util 'blessed'; #--------------------------------------------------------------------- sub test { my ($expected, $out, $data, $name, $nextArg) = @_; local $Test::Builder::Level = $Test::Builder::Level + 1; my $options; if (ref $name) { $options = $name; $name = $nextArg; } unless ($name) { $name = 'test ' . ($expected || 'cp1252'); } my $tmp = File::Temp->new(UNLINK => 1); open(my $mem, '>', \(my $buf)) or die; if ($out) { $out = ":encoding($out)" unless $out =~ /^:/; binmode $tmp, $out; binmode $mem, $out; } print $mem $data; print $tmp $data; close $mem; $tmp->close; my ($fh, $encoding, $bom) = IO::HTML::file_and_encoding("$tmp", $options); if ($options and $options->{encoding}) { ok(blessed($encoding), 'returned an object'); $encoding = eval { $encoding->name }; } is($encoding, $expected || 'cp1252', $name); my $firstLine = <$fh>; like($firstLine, qr/^, $firstLine); close $fh; # Test sniff_encoding: undef $mem; open($mem, '<', \$buf) or die "Can't open in-memory file: $!"; delete $options->{encoding} if $options; ($encoding, $bom) = IO::HTML::sniff_encoding($mem, undef, $options); is($encoding, $expected); seek $mem, 0, 0; $options->{encoding} = 1; ($encoding, $bom) = IO::HTML::sniff_encoding($mem, undef, $options); if (defined $expected) { ok(blessed($encoding), 'encoding is an object'); is(eval { $encoding->name }, $expected); } else { is($encoding, undef); } } # end test #--------------------------------------------------------------------- test 'utf-8-strict' => '' => <<''; test 'utf-8-strict' => ':utf8' => <<""; Foo\xA0Bar test undef, latin1 => <<""; Foo\xA0Bar test 'UTF-16BE' => 'UTF-16BE' => <<""; \x{FeFF}Foo\xA0Bar test 'utf-8-strict' => ':utf8' => <<""; \x{FeFF} test 'utf-8-strict' => ':utf8' => <<""; test 'UTF-16LE' => 'UTF-16LE' => <<""; \x{FeFF} test 'UTF-16LE' => 'UTF-16LE' => <<"", { encoding => 1 }; \x{FeFF} test 'utf-8-strict' => ':utf8' => <<"", { encoding => 1, need_pragma => 0 }; test 'utf-8-strict' => ':utf8' => "Foo\xA0Bar" . ("\x{2014}" x 512) . "\n", 'UTF-8 character crosses boundary'; test 'utf-8-strict' => ':utf8' => "Foo Bar" . ("\x{2014}" x 512) . "\n", 'UTF-8 character crosses boundary 2'; test undef, '', <<'', 'wrong pragma'; Title test 'utf-8-strict', '', <<'', {need_pragma => 0}, 'need_pragma 0'; Title test 'iso-8859-15', '', <<"", { encoding => 1, need_pragma => 0 }; done_testing;