#!/usr/bin/env perl use Test::More; use Inline C => < 0xf1 && y[i] < 0xf3 && (y[i+1] & 192) == 128 ) || /* b1 >= F1 and <= F3 */ ( y[i] == 0xf4 && y[i+1] >= 0x80 && y[i+1] <= 0x8f )) && /* b1 == F4 */ ( y[i+2] & 192 ) == 128 && /* byte 3 */ ( y[i+3] & 192 ) == 128 /* byte 4 */ ) { i += 3; } else if ( i+2 < len && /* 3-byte: byte 1 and 2 */ ( ( y[i] == 0xe0 && (y[i+1] & 224) == 160 ) || /* b1 == E0 */ ( y[i] == 0xed && (y[i+1] & 224) == 128 ) || /* b1 == ED */ ( ((y[i] >= 0xe1 && y[i] <= 0xec) || y[i] == 0xee || y[i] == 0xef) /* b1 >= E1 and <= EC or == EE or == EF */ && (y[i+1] & 192) == 128 ) ) && (y[i+2] & 192) == 128) { /* byte 3 */ i += 2; } else if (i+1 < len && /* 2-byte: byte 1 */ y[i] >= 0xc2 && y[i] <= 0xdf && /* b1 >= C2 and <= DF */ (y[i+1] & 192) == 128) { /* byte 2 */ i += 1; } else if ((y[i] & 128) != 0) { return 0; } } return 1; } END_C my %valid = ( "abc123" => 1, "\xc0\x81" => 0, "\xc1\xa0" => 0, "\xc2\x81" => 1, "\xdf\x80" => 1, "\xdf\xc0" => 0, "\xe0\x80" => 0, "\xe0\x81\x80" => 0, "\xe0\xa0\x80" => 1, "\xed\xa0\x80" => 0, "\xee\x81\x81" => 1, "\xe9a" => 0, "\xf0\x90\xbe\xbf" => 1, "\xf2\x79\x80\x80" => 0, "\xf4\x8f\xbf\x80" => 1, ); for my $k (sort keys %valid) { my $bytes = pack('C*', unpack('C*', $k)); my $length = length($bytes); my $isUTF8 = isUTF8($bytes,$length); my $hex = join ' ', map { sprintf( "%x", ord($_) ) } (split(//,$bytes)); my $vStr = ($valid{$k}) ? 'valid' : 'invalid'; is( $isUTF8, $valid{$k}, "utf8-test: $hex ($vStr)" ); } done_testing();