#!/usr/bin/env perl use strict; use warnings; use Encode qw(from_to); my $html = do { local $/; <> }; from_to($html, 'gbk', 'utf8'); $html =~ s/\ / /gs; $html =~ s/\<//gs; $html =~ s/\&/\&/gs; $html =~ s{\s+}{ }gs; $html =~ s{}{|}gsi; $html =~ s{}{\n}sgi; $html =~ s{]+>}{}sg; $html =~ s{}{}sg; $html =~ s/ +//g; print $html;