#!/usr/bin/env perl use strict; use warnings; use Text::CSV; use Encode qw(encode decode); my %PinYin; my $pinyin_file = 'pinyin.txt'; open my $fh, $pinyin_file or die "Can't open $pinyin_file for reading: $!\n"; while (<$fh>) { my ($han, $pinyin) = split / /, $_; $pinyin =~ s/\W//g; $han = decode('utf8', $han); next if $PinYin{$han}; $PinYin{$han} = $pinyin; } close $fh; sub to_pinyin { my $s = shift; $s =~ s/^\s+|\s+$//g; $s = decode('utf8', $s); if ($s !~ /\p{Han}/) { return $s; } my $o; for my $h (split //, $s) { #die $h; $o .= $PinYin{$h} || ''; } $o; } my $csv = Text::CSV->new({binary => 1}); while (<>) { s/\s*\|\s*$/\n/g; my @cols = split /\|/; my $count = grep { $_ } @cols; next if !$cols[0] or !$cols[1] or $count < 4; my $name = $cols[0]; my $pinyin = to_pinyin($name); $pinyin =~ s/zhangpojue/zhangxiaojue/; unshift @cols, $pinyin; print join(",", @cols); }