#!/usr/bin/perl # Combine TAB and original ucm into enhanced ucm # .Tab + UCM -> UCM # .Tab Unicode -> Encoding # .Ucm Encoding -> Unicode, Original $VERSION = "0.30"; use Encode qw( decode ); &tab2ucm("gb2312-simp", "NJUC2GB.TAB", "euc-cn.ucm", "gb2312-add.dat", "gbk"); &tab2ucm("big5-trad", "NJUC2B5.TAB", "cp950.ucm", "", "big5"); sub tab2ucm{ my ($ucmname, $tabfile, $ucmorg, $patchfile, $encode_from)=@_; my $ucmdst="$ucmname.ucm"; #------------------Read TAB file my $buf=""; sysopen R, $tabfile, 0; sysread R, $buf, 65536*2; close R; #------------------Parse original UCM file my %e2u={}; open RUCM, $ucmorg; while() { chomp; next if !( /^ \\x(..)\\x(..)( \|(.)|)/ ); $ucode_h=$1; $encode_low=$2; $encode_high=$3; $skip_flag=$5; $encode_h=$encode_low.$encode_high; #print "$_ = $ucode_h $encode_low $encode_high $encode_h $skip_flag\n"; next if not($skip_flag==0 or $skip_flag==3 or $skip_flag eq ''); $e2u{$encode_h}=$ucode_h; } close RUCM; #------------------Parse TAB info, ignore duplicated encoding for($i=0, $ucode=0; $i:unix:utf8", "$ucmdst"; use POSIX qw(strftime); $curtime=localtime; print W < "$ucmname" 1 2 \\x3F CHARMAP EOSTART for($i=0, $ucode=0; $i \\x%02X", $ucode, $encode_low; printf W "\\x%02X", $encode_high if $encode_high>0; printf W " |%d", $skip_flag; printf W " # %s", decode($encode_from, $encode) if $encode_low>127 and $encode_high>=32; print W "\n"; } #------------------Read Patch file if($patchfile ne '') { open PATCH, "<:encoding($encode_from)", $patchfile; while() { chomp; next if $_ eq ''; print W $_."\n"; } close PATCH; } print W "END CHARMAP\n"; close W; }