#!/usr/bin/perl use strict; use warnings; use Encode 1.41; use File::Spec; use File::Basename; my $path = dirname($0); conv(File::Spec->catdir($path, 'b2g_map.utf8') => 'trad-simp'); conv(File::Spec->catdir($path, 'g2b_map.utf8') => 'simp-trad'); sub conv { my ($src, $target) = @_; my %count; my @has; open IN, '<:utf8', $src or die $!; open OUT, ">$target.ucm" or die $!; print OUT << "."; # This is generated from $src -- please change that file instead. # Yes, this .ucm map is not round-trip safe; HanConvert is a lossy operation. "$target" . print OUT +HEADER(); ; ; while () { my ($fchar, $tchar) = m/^(.) (.)/; print OUT ucm_entry($fchar, $tchar); $has[ord $fchar] = 1; } close IN; open IN, File::Spec->catdir($path, 'DerivedAge.txt') or die $!; while() { next if / %s |%u\n", ord($tchar), join('', map sprintf('\\x%02X', ord($_)), split('', $utf8)), 0); # XXX - suggestions welcome to the fallback char here } use constant HEADER => << '.'; 1 2 \x3F # CHARMAP . use constant FOOTER => << '.'; END CHARMAP .