package Syntax::Highlight::Engine::Simple;
use warnings;
use strict;
use Carp;
use UNIVERSAL::require;
#use version;
our $VERSION = '0.05';
### ----------------------------------------------------------------------------
### constractor
### ----------------------------------------------------------------------------
sub new {
my $class = shift;
my $self =
bless {type => undef, syntax => undef, @_}, $class;
$self->setParams(@_);
if ($self->{type}) {
my $class = "Syntax::Highlight::Engine::Simple::". $self->{type};
$class->require or croak $@;
no strict 'refs';
&{$class. "::setSyntax"}($self);
return $self;
}
$self->setSyntax();
return $self;
}
### ----------------------------------------------------------------------------
### set params
### ----------------------------------------------------------------------------
sub setParams {
my $self = shift;
my %args = (
html_escape_code_ref => \&_html_escape,
@_);
$self->{html_escape_code_ref} = $args{html_escape_code_ref};
}
### ----------------------------------------------------------------------------
### set syntax
### ----------------------------------------------------------------------------
sub setSyntax {
my $self = shift;
my %args = (syntax => [], @_);
$self->{syntax} = $args{syntax};
}
### ----------------------------------------------------------------------------
### append syntax
### ----------------------------------------------------------------------------
sub appendSyntax {
my $self = shift;
my %args = (
syntax => {
regexp => '',
class => '',
container => undef,
}, @_);
push(@{$self->{syntax}}, $args{syntax});
}
### ----------------------------------------------------------------------------
### Highlight multi Line
### ----------------------------------------------------------------------------
sub doStr{
my $self = shift;
my %args = (str => '', tab_width => -1, @_);
defined $args{str} or croak 'doStr method got undefined value';
if ($args{tab_width} > 0) {
my $tabed = '';
foreach my $line (split(/\r\n|\r|\n/, $args{str})) {
$tabed .=
&_tab2space(str => $line, tab_width => $args{tab_width}). "\n";
}
$args{str} = $tabed;
}
return $self->_doLine(str => $args{str});
}
### ----------------------------------------------------------------------------
### Highlight file
### ----------------------------------------------------------------------------
sub doFile {
my $self = shift;
my %args = (
file => '',
tab_width => -1,
encode => 'utf8',
@_);
my $str = '';
require 5.005;
open(my $filehandle, '<'. $args{file}) or croak 'File open failed';
binmode($filehandle, ":encoding($args{encode})");
while (my $line = <$filehandle>) {
if ($args{tab_width} > 0) {
$line = &_tab2space(str => $line, tab_width => $args{tab_width});
}
$str .= $line;
}
close($filehandle);
return $self->_doLine(str => $str);
}
### ----------------------------------------------------------------------------
### Highlight single line
### ----------------------------------------------------------------------------
sub _doLine {
my $self = shift;
my %args = (
str => '',
@_);
my $str = $args{str}; $str =~ s/\r\n|\r/\n/g;
$self->{_markup_map} = [];
### make markup map
my $size = scalar @{$self->{syntax}};
for (my $i = 0; $i < $size; $i++) {
my $synatax_ref = $self->{syntax}->[$i];
$self->_make_map(str => $str, syntax => $synatax_ref, index => $i);
}
if (! scalar @{$self->{_markup_map}}) {
return $args{str}
}
my $outstr = '';
my @markup_array = $self->_restracture_map();
my $last_pos = 0;
### Apply the map to string
foreach my $pos (@markup_array) {
my $str_left = substr($str, $last_pos, $$pos[0] - $last_pos);
no strict 'refs';
$str_left = &{$self->{html_escape_code_ref}}($str_left);
if (defined $$pos[1]) {
$outstr .=
$str_left.
sprintf( "", $$pos[1]->{class});
}
else {
$outstr .= $str_left. '';
}
$last_pos = $$pos[0];
}
no strict 'refs';
$outstr .= &{$self->{html_escape_code_ref}}(substr($str, $last_pos));
return $outstr;
}
### ----------------------------------------------------------------------------
### Make markup map
### ---------------------------------------------
### | open_pos | close_pos | syntax_ref | index
### | open_pos | close_pos | syntax_ref | index
### | open_pos | close_pos | syntax_ref | index
### ---------------------------------------------
### ----------------------------------------------------------------------------
sub _make_map {
no warnings; ### Avoid Deep Recursion warning
my $self = shift;
my %args = (str => '', pos => 0, syntax => '', @_);
my $map_ref = $self->{_markup_map};
my @scraps =
split(/$args{syntax}->{regexp}/, $args{str}, 2);
if ((scalar @scraps) >= 2) {
my $rest = pop(@scraps);
my $ins_pos0 = $args{pos} + length($scraps[0]);
my $ins_pos1 = $args{pos} + (length($args{str}) - length($rest));
### Add markup position
push(
@$map_ref, [
$ins_pos0,
$ins_pos1,
$args{syntax},
$args{index},
]
);
### Recurseion for rest
$self->_make_map(%args, str => $rest, pos => $ins_pos1);
}
### Follow up process
elsif (@$map_ref) {
@$map_ref =
sort {
$$a[0] <=> $$b[0] or
$$b[1] <=> $$a[1] or
$$a[3] <=> $$b[3]
} @$map_ref;
}
return;
}
### ----------------------------------------------------------------------------
### restracture the map data into following format
### --------------------
### | open_pos | class
### | close_pos |
### | open_pos | class
### | close_pos |
### --------------------
### ----------------------------------------------------------------------------
sub _restracture_map {
my $self = shift;
my $map_ref = $self->{_markup_map};
my @out_array;
my $_max_close_pos = 0;
my @root = ();
REGLOOP: for (my $i = 0; $i < scalar @$map_ref; $i++) {
my $allowed = $$map_ref[$i]->[2]->{container};
my $ok = 1;
### entry without allow-array never can be a daughter
### entry with allow-array must have mother at least
if ((! $allowed and $_max_close_pos >= $$map_ref[$i]->[1])
or ($allowed and $_max_close_pos < $$map_ref[$i]->[1])) {
$ok = 0;
}
else {
### Search for container
BACKWARD: for (my $j = scalar @root; $j >= 0; $j--) {
defined $root[$j] or next BACKWARD;
if ($root[$j]->[1] >= $$map_ref[$i]->[0]) {
if ($root[$j]->[1] >= $$map_ref[$i]->[1] and $allowed) {
if (ref $allowed eq 'ARRAY') {
foreach my $class (@$allowed) {
if ($class eq $root[$j]->[2]->{class}) {
last BACKWARD;
}
}
}
elsif ($allowed eq $root[$j]->[2]->{class}) {
last BACKWARD;
}
}
$ok = 0;
last BACKWARD;
}
}
}
if (! $ok) {
splice(@$map_ref, $i--, 1);
next REGLOOP;
}
if ($_max_close_pos < $$map_ref[$i]->[1]) {
$_max_close_pos = $$map_ref[$i]->[1];
}
### no-class records won't be marked up
### but being evaluated for Embracement control of others
if (! $$map_ref[$i]->[2]->{class}) {
next REGLOOP;
}
### Set container candidate
for (my $j = 0; $j < scalar @root; $j++) {
if ($root[$j]->[1] <= $$map_ref[$i]->[0]) {
splice(@root, $j--, 1);
}
}
push(@root, $$map_ref[$i]);
push(
@out_array,
[$$map_ref[$i]->[0], $$map_ref[$i]->[2]],
[$$map_ref[$i]->[1]]
);
}
return sort {$$a[0] <=> $$b[0]} @out_array;
}
### ----------------------------------------------------------------------------
### Return map for debug
### ----------------------------------------------------------------------------
sub _ret_map {
#return shift->{_markup_map};
}
### ----------------------------------------------------------------------------
### replace tabs to spaces
### ----------------------------------------------------------------------------
sub _tab2space {
no warnings; ### Avoid Deep Recursion warning
my %args = (str => '', tab_width => 4, @_);
my @scraps = split(/\t/, $args{str}, 2);
if (scalar @scraps == 2) {
my $num = $args{tab_width} - (length($scraps[0]) % $args{tab_width});
my $right_str = &_tab2space(%args, str => $scraps[1]);
return ($scraps[0]. ' ' x $num. $right_str);
}
return $args{str};
}
### ----------------------------------------------------------------------------
### convert array to regexp
### ----------------------------------------------------------------------------
sub array2regexp {
my $self = shift;
return sprintf('\\b(?:%s)\\b', join('|', @_));
}
### ----------------------------------------------------------------------------
### convert array to regexp
### ----------------------------------------------------------------------------
sub getClassNames {
return map {${$_}{class}} @{shift->{syntax}}
}
### ----------------------------------------------------------------------------
### HTML escape
### ----------------------------------------------------------------------------
sub _html_escape {
my ($str) = @_;
$str =~ s/&/&/g;
$str =~ s/</g;
$str =~ s/>/>/g;
return $str;
}
1; # Magic true value required at end of module
__END__
=head1 NAME
Syntax::Highlight::Engine::Simple - Simple Syntax Highlight Engine
=head1 VERSION
This document describes Syntax::Highlight::Engine::Simple version 0.05
=head1 SYNOPSIS
use Syntax::Highlight::Engine::Simple;
# Constractor
$highlight = Syntax::Highlight::Engine::Simple->new(%hash);
# Parameter configuration
$highlight->setParams(%hash);
# Syntax definision and addition
$highlight->setSyntax(%hash);
$highlight->appendSyntax(%hash);
# Perse
$highlight->doFile(%hash);
$highlight->doStr(%hash);
# Utilities
$highlight->array2regexp(%hash);
$highlight->getClassNames(%hash);
=head1 DESCRIPTION
This is a Syntax highlight Engine.
Advantages are as follows.
=over
=item Simple
Provides you a simple interface for syntax definition by packing the
complicated part of rules into regular expression.
=item Fast
This works much Faster than Text::VimColor or Syntax::Highlight::Engine::Kate.
=item Pure Perl
=back
Here is a working example of This module.
http://jamadam.com/dev/cpan/demo/Syntax/Highlight/Engine/Simple/
=head1 INTERFACE
=head2 new
=over
=item type
File type. This argument causes specific sub class to be loaded.
=item syntax
With this argument, you can assign rules in constractor.
=back
=head2 setParams
=over
=item html_escape_code_ref
HTML escape code ref. Default subroutine escapes 3 charactors '&', '<' and '>'.
=back
=head2 setSyntax
Set the rules for highlight. It calles for a argument I in array.
=over
$highlighter->setSyntax(
syntax => [
{
class => 'tag',
regexp => "<.+?>",
},
{
class => 'quote',
regexp => "'.*?'",
container => 'tag',
},
{
class => 'wquote',
regexp => '".*?"',
container => 'tag',
},
{
class => 'keyword',
regexp => 'somekeyword',
container => ['tag', 'quote', 'wquote'],
},
]
);
=back
The array can contain rules in hash which is consists of 3 keys, I,
I and I.
=over
=item class
This appears to the output SPAN tag.
=item regexp
Regular expression to be highlighted.
=item container
Class names of allowed container. It can be given in Strings or Array. This
restricts the I to stand only in the classes. This parameter also
works to ease the regulation some time. The highlighting rules doesn't stand
in any container in default. This parameter eliminates it.
=back
=head2 appendSyntax
Append syntax by giving a hash.
=over
$highlighter->setSyntax(
syntax => {
class => 'quote',
regexp => "'.*?'",
container => 'tag',
}
);
=back
=head2 doStr
Highlighting strings.
$highlighter->doStr(
str => $str,
tab_width => 4
);
=over
=item str
String.
=item tab_width
Tab width for tab-space conversion. -1 for disable it. -1 is the defult.
=back
=head2 doFile
Highlighting files.
$highlighter->doStr(
str => $str,
tab_width => 4,
encode => 'utf8'
);
=over
=item file
File name.
=item tab_width
Tab width for tab-space conversion. -1 for disable it. -1 is the defult.
=item encode
Set the encode of file. utf8 is the default.
=back
=head2 array2regexp
This is a utility method for converting string array to regular expression.
=over
=back
=head2 getClassNames
Returns the class names in array.
=over
=back
=head1 DIAGNOSTICS
=over
=item C<< doStr method got undefined value >>
=item C<< File open failed >>
=back
=head1 CONFIGURATION AND ENVIRONMENT
Syntax::Highlight::Engine::Simple requires no configuration files or
environment variables. Specific language syntax can be defined with
sub classes and loaded in constractor if you give it the type argument.
=head1 DEPENDENCIES
=over
=item L
=item L
=back
=head1 INCOMPATIBILITIES
None reported.
=head1 BUGS AND LIMITATIONS
No bugs have been reported.
Please report any bugs or feature requests to
C, or through the web
interface at L.
=head1 SEE ALSO
=over
=item L
=item L
=back
=head1 AUTHOR
Sugama Keita C<< >>
=head1 LICENCE AND COPYRIGHT
Copyright (c) 2008, Sugama Keita C<< >>. All rights
reserved.
This module is free software; you can redistribute it and/or
modify it under the same terms as Perl itself. See I.
=head1 DISCLAIMER OF WARRANTY
BECAUSE THIS SOFTWARE IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE SOFTWARE, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE SOFTWARE "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE SOFTWARE IS WITH
YOU. SHOULD THE SOFTWARE PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL
NECESSARY SERVICING, REPAIR, OR CORRECTION.
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE SOFTWARE AS PERMITTED BY THE ABOVE LICENCE, BE
LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL,
OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE
THE SOFTWARE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
FAILURE OF THE SOFTWARE TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.
=cut