t/50_utf8.t - metacpan.org

#!/usr/bin/perl

use strict;
$^W = 1;

use Test::More;

BEGIN {
    if ($] < 5.008) {
	plan skip_all => "UTF8 tests useless in this ancient perl version";
	}
    else {
	plan tests => 67;
	}
    }

BEGIN {
    $ENV{PERL_TEXT_CSV} = 0;
    require_ok "Text::CSV";
    plan skip_all => "Cannot load Text::CSV" if $@;
    require "t/util.pl";
    }

# No binary => 1, as UTF8 is supposed to be allowed without it
my $csv = Text::CSV->new ({
    always_quote   => 1,
    keep_meta_info => 1,
    });

# Special characters to check:
# 0A = \n  2C = ,  20 =     22 = "  
# 0D = \r  3B = ;
foreach my $test (
  # Space-like characters
  [ "\x{0000A0}", "U+0000A0 NO-BRAK SPACE"				],
  [ "\x{00200B}", "U+00200B ZERO WIDTH SPACE"				],
  # Some characters with possible problems in the code point
  [ "\x{000122}", "U+000122 LATIN CAPITAL LETTER G WITH CEDILLA"	],
  [ "\x{002C22}", "U+002C22 GLAGOLITIC CAPITAL LETTER SPIDERY HA"	],
  [ "\x{000A2C}", "U+000A2C GURMUKHI LETTER BA"				],
  [ "\x{000E2C}", "U+000E2C THAI CHARACTER LO CHULA"			],
  [ "\x{010A2C}", "U+010A2C KHAROSHTHI LETTER VA"			],
  # Characters with possible problems in the encoded representation
  #  Should not be possible. ASCII is coded in 000..127, all other
  #  characters in 128..255
  ) {
    my ($u, $msg) = @$test;
    ($u = "$u\x{0123}") =~ s/.$//;	# Make sure it's marked UTF8
    my @in  = ("", " ", $u, "");
    my $exp = join ",", map { qq{"$_"} } @in;

    ok ($csv->combine (@in),		"combine $msg");

    my $str = $csv->string;
    is_binary ($str, $exp,		"string  $msg");

    ok ($csv->parse ($str),		"parse   $msg");
    my @out = $csv->fields;
    # Cannot use is_deeply (), because of the binary content
    is (scalar @in, scalar @out,	"fields  $msg");
    for (0 .. $#in) {
	is_binary ($in[$_], $out[$_],	"field $_ $msg");
	}
    }

# Test if the UTF8 part is accepted, but the \n is not
is ($csv->parse (qq{"\x{0123}\n\x{20ac}"}), 0, "\\n still needs binary");
is ($csv->binary, 0, "bin flag still unset");
is ($csv->error_diag + 0, 2021, "Error 2021");

# As all utf tests are skipped for older pers, It's safe to use 3-arg open this way
my $file = "files/utf8.csv";
SKIP: {
    open my $fh, "<:encoding(utf8)", $file or
	skip "Cannot open UTF-8 test file", 6;

    my $row;
    ok ($row = $csv->getline ($fh), "read/parse");

    is ($csv->is_quoted (0),	1,	"First  field is quoted");
    is ($csv->is_quoted (1),	0,	"Second field is not quoted");
    is ($csv->is_binary (0),	1,	"First  field is binary");
    is ($csv->is_binary (1),	0,	"Second field is not binary");

    ok (utf8::valid ($row->[0]),	"First field is valid utf8");

    $csv->combine (@$row);
    ok (utf8::valid ($csv->string),	"Combined string is valid utf8");
    }

	Global
`s`	Focus search bar
`?`	Bring up this help dialog

	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)

	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse

	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)