#!/usr/bin/perl use strict; $^W = 1; use Test::More; BEGIN { if ($] < 5.008) { plan skip_all => "UTF8 tests useless in this ancient perl version"; } else { plan tests => 67; } } BEGIN { $ENV{PERL_TEXT_CSV} = 0; require_ok "Text::CSV"; plan skip_all => "Cannot load Text::CSV" if $@; require "t/util.pl"; } # No binary => 1, as UTF8 is supposed to be allowed without it my $csv = Text::CSV->new ({ always_quote => 1, keep_meta_info => 1, }); # Special characters to check: # 0A = \n 2C = , 20 = 22 = " # 0D = \r 3B = ; foreach my $test ( # Space-like characters [ "\x{0000A0}", "U+0000A0 NO-BRAK SPACE" ], [ "\x{00200B}", "U+00200B ZERO WIDTH SPACE" ], # Some characters with possible problems in the code point [ "\x{000122}", "U+000122 LATIN CAPITAL LETTER G WITH CEDILLA" ], [ "\x{002C22}", "U+002C22 GLAGOLITIC CAPITAL LETTER SPIDERY HA" ], [ "\x{000A2C}", "U+000A2C GURMUKHI LETTER BA" ], [ "\x{000E2C}", "U+000E2C THAI CHARACTER LO CHULA" ], [ "\x{010A2C}", "U+010A2C KHAROSHTHI LETTER VA" ], # Characters with possible problems in the encoded representation # Should not be possible. ASCII is coded in 000..127, all other # characters in 128..255 ) { my ($u, $msg) = @$test; ($u = "$u\x{0123}") =~ s/.$//; # Make sure it's marked UTF8 my @in = ("", " ", $u, ""); my $exp = join ",", map { qq{"$_"} } @in; ok ($csv->combine (@in), "combine $msg"); my $str = $csv->string; is_binary ($str, $exp, "string $msg"); ok ($csv->parse ($str), "parse $msg"); my @out = $csv->fields; # Cannot use is_deeply (), because of the binary content is (scalar @in, scalar @out, "fields $msg"); for (0 .. $#in) { is_binary ($in[$_], $out[$_], "field $_ $msg"); } } # Test if the UTF8 part is accepted, but the \n is not is ($csv->parse (qq{"\x{0123}\n\x{20ac}"}), 0, "\\n still needs binary"); is ($csv->binary, 0, "bin flag still unset"); is ($csv->error_diag + 0, 2021, "Error 2021"); # As all utf tests are skipped for older pers, It's safe to use 3-arg open this way my $file = "files/utf8.csv"; SKIP: { open my $fh, "<:encoding(utf8)", $file or skip "Cannot open UTF-8 test file", 6; my $row; ok ($row = $csv->getline ($fh), "read/parse"); is ($csv->is_quoted (0), 1, "First field is quoted"); is ($csv->is_quoted (1), 0, "Second field is not quoted"); is ($csv->is_binary (0), 1, "First field is binary"); is ($csv->is_binary (1), 0, "Second field is not binary"); ok (utf8::valid ($row->[0]), "First field is valid utf8"); $csv->combine (@$row); ok (utf8::valid ($csv->string), "Combined string is valid utf8"); }