# vim: set ts=2 sts=2 sw=2 expandtab smarttab: # # This file is part of Text-VimColor # # This software is copyright (c) 2002-2006 by Geoff Richards. # # This software is copyright (c) 2011 by Randy Stauner. # # This is free software; you can redistribute it and/or modify it under # the same terms as the Perl 5 programming language system itself. # use warnings; use strict; package Text::VimColor; { $Text::VimColor::VERSION = '0.23'; } # git description: v0.22-5-g8b08ddd BEGIN { $Text::VimColor::AUTHORITY = 'cpan:RWSTAUNER'; } # ABSTRACT: Syntax highlight text using Vim use constant HAVE_ENCODING => ($] >= 5.008001); # PerlIO::encoding & utf8::is_utf8 use IO::File; use File::Copy qw( copy ); use File::ShareDir (); use File::Temp qw( tempfile ); use Path::Class qw( file ); use Carp; use IPC::Open3 (); # core use Symbol (); # core # for backward compatibility our $SHARED = File::ShareDir::dist_dir('Text-VimColor'); our $VIM_COMMAND = 'vim'; our @VIM_OPTIONS = (qw( -RXZ -i NONE -u NONE -N -n ), "+set nomodeline"); our $NAMESPACE_ID = 'http://ns.laxan.com/text-vimcolor/1'; our %VIM_LET = ( perl_include_pod => 1, 'b:is_bash' => 1, ); our %SYNTAX_TYPE = ( Comment => 1, Constant => 1, Identifier => 1, Statement => 1, PreProc => 1, Type => 1, Special => 1, Underlined => 1, Error => 1, Todo => 1, ); our %ANSI_COLORS = ( Comment => 'blue', Constant => 'red', Identifier => 'cyan', Statement => 'yellow', PreProc => 'magenta', Type => 'green', Special => 'bright_magenta', Underlined => 'underline', Error => 'on_red', Todo => 'on_cyan', ); # Set to true to print the command line used to run Vim. our $DEBUG = $ENV{TEXT_VIMCOLOR_DEBUG}; sub new { my $class = shift; my $self = { extra_vim_options => [], html_inline_stylesheet => 1, xml_root_element => 1, vim_let => {}, @_, }; $self->{vim_command} = $VIM_COMMAND unless defined $self->{vim_command}; # NOTE: this should be [ @VIM_OPTIONS ] but \@VIM_OPTIONS is backward-compatible $self->{vim_options} = \@VIM_OPTIONS unless defined $self->{vim_options}; # always include these (back-compat) $self->{vim_let} = { %VIM_LET, %{ $self->{vim_let} } }; croak "only one of the 'file' or 'string' options should be used" if defined $self->{file} && defined $self->{string}; bless $self, $class; # run automatically if given a source $self->_do_markup if defined $self->{file} || defined $self->{string}; return $self; } sub dist_file { my $self = shift; return File::ShareDir::dist_file('Text-VimColor', @_); } sub vim_let { my ($self, %option) = @_; while (my ($name, $value) = each %option) { $self->{vim_let}->{$name} = $value; } return $self; } sub syntax_mark_file { my ($self, $file, %options) = @_; local $self->{filetype} = exists $options{filetype} ? $options{filetype} : $self->{filetype}; local $self->{file} = $file; $self->_do_markup; return $self; } sub syntax_mark_string { my ($self, $string, %options) = @_; local $self->{filetype} = exists $options{filetype} ? $options{filetype} : $self->{filetype}; local $self->{string} = $string; $self->_do_markup; return $self; } sub ansi { my ($self) = @_; my $syntax = $self->marked; require Term::ANSIColor; # allow the environment to overwrite: my %colors = ( %ANSI_COLORS, $ENV{TEXT_VIMCOLOR_ANSI} ? split(/\s*[=;]\s*/, $ENV{TEXT_VIMCOLOR_ANSI}) : () ); local $_; # Term::ANSIColor didn't support bright values until version 3 # Handle this here to cover custom colors and not require T::AC until needed if( Term::ANSIColor->VERSION < 3 ){ s/bright_// for values %colors; } # compared to join/map or foreach/my this benched as the fastest: my $ansi = ''; for ( @$syntax ){ $ansi .= $_->[0] eq '' ? $_->[1] : Term::ANSIColor::colored([ $colors{ $_->[0] } ], $_->[1]); } return $ansi; } sub html { my ($self) = @_; my $syntax = $self->marked; my $html = ''; $html .= $self->_html_header if $self->{html_full_page}; foreach (@$syntax) { $html .= _xml_escape($_->[1]), next if $_->[0] eq ''; $html .= "[0]\">" . _xml_escape($_->[1]) . ''; } $html .= "\n\n \n\n" if $self->{html_full_page}; return $html; } sub xml { my ($self) = @_; my $syntax = $self->marked; my $xml = ''; if ($self->{xml_root_element}) { my $filename = $self->input_filename; $xml .= "[1]), next if $_->[0] eq ''; $xml .= "[0]>" . _xml_escape($_->[1]) . "[0]>"; } $xml .= "\n" if $self->{xml_root_element}; return $xml; } sub marked { my ($self) = @_; exists $self->{syntax} or croak "an input file or string must be specified, either to 'new' or". " 'syntax_mark_file/string'"; return $self->{syntax}; } sub input_filename { my ($self) = @_; my $file = $self->{file}; return $file if defined $file && !ref $file; return; } # Return a string consisting of the start of an XHTML file, with a stylesheet # either included inline or referenced with a . sub _html_header { my ($self) = @_; my $input_filename = $self->input_filename; my $title = defined $self->{html_title} ? _xml_escape($self->{html_title}) : defined $input_filename ? _xml_escape($input_filename) : '[untitled]'; my $stylesheet; if ($self->{html_inline_stylesheet}) { $stylesheet = "\n"; } else { $stylesheet = "{html_stylesheet_url} || "file://${\ file($self->dist_file('light.css'))->as_foreign('Unix') }") . "\" />\n"; } "\n" . "\n" . " \n" . " $title\n" . " $stylesheet" . " \n" . " \n\n" . "
";
}

# Return a string safe to put in XML text or attribute values.  It doesn't
# escape single quotes (') because we don't use those to quote
# attribute values.
sub _xml_escape
{
   my ($s) = @_;
   $s =~ s/&/&/g;
   $s =~ s//>/g;
   $s =~ s/"/"/g;
   return $s;
}

# Actually run Vim and turn the script's output into a datastructure.
sub _do_markup
{
   my ($self) = @_;
   my $vim_syntax_script = $self->dist_file('mark.vim');

   croak "Text::VimColor syntax script '$vim_syntax_script' not installed"
      unless -f $vim_syntax_script && -r $vim_syntax_script;

   if ($DEBUG) {
      print STDERR __PACKAGE__."::_do_markup: script: $vim_syntax_script\n";
   }

  my $encoding = $self->{encoding} || '';
  my $binmode = ':raw';

   my $filename = $self->{file};
   my $input_is_temporary = 0;

   if (ref $self->{file}) {
      my $fh;
      ($fh, $filename) = tempfile();
      $input_is_temporary = 1;

      binmode $self->{file};
      binmode $fh;
      copy($self->{file}, $fh);
   }
   elsif (exists $self->{string}) {
      my $fh;
      ($fh, $filename) = tempfile();
      $input_is_temporary = 1;

      my $string = (ref $self->{string} ? ${ $self->{string} } : $self->{string});

      if( HAVE_ENCODING ){
        if( utf8::is_utf8($string) ){
          $encoding ||= 'UTF-8';
          $binmode = ":encoding($encoding)"
            if $encoding;
        }
      }

      binmode $fh, $binmode;
      print $fh $string;
   }
   else {
      croak "input file '$filename' not found"
         unless -f $filename;
      croak "input file '$filename' not accessible"
         unless -r $filename;
   }

   # Create a temp file to put the output in.
   my ($out_fh, $out_filename) = tempfile();

   # Create a temp file for the 'script', which is given to vim
   # with the -s option.  This is necessary because it tells Vim not
   # to delay for 2 seconds after displaying a message.
   my ($script_fh, $script_filename) = tempfile();
   my $filetype = $self->{filetype};
   my $filetype_set = defined $filetype ? ":set filetype=$filetype" : '';
   my $vim_let = $self->{vim_let};

  # on linux '-s' is fast and '--cmd' adds the 2-second startup delay
  # are there situations where --cmd is necessary or useful?
  # XXX: for debugging, may be removed in the future
  my $use_cmd_opt = $ENV{TEXT_VIMCOLOR_CMD_OPT};

  # Specify filename as argument to command (rather than using :edit in script).
  # If using --cmd then the filename needs to be in the script.
  # For some reason windows doesn't seem to like the filename being in the arg list.
  # Are there other times that this is needed?
  my $file_as_arg = ($use_cmd_opt || $^O ne 'MSWin32');

  my @script_lines = (
    map { "$_\n" }
      # The default 'encoding' comes from env so set it explicitly to avoid
      # conversion from 'encoding' to 'fileencoding'.
      # Set 'fileencodings' so vim doesn't try to choose.
      # Set 'nomodified' after 'fenc' so vim doesn't prompt for unsaved changes.
      ($encoding ?
        ":set encoding=$encoding fileencodings=$encoding fileencoding=$encoding nomodified"
        : ()),

      # do :edit before :let or the buffer variables may get reset
      (!$file_as_arg ? ":edit $filename" : ()),

      (
        map  { ":let $_=$vim_let->{$_}" }
        grep { defined  $vim_let->{$_} }
          keys %$vim_let
      ),

      ':filetype on',
       $filetype_set,
      ":source $vim_syntax_script",
      ":write! $out_filename",
      ':qall!',
  );

  print STDERR map { __PACKAGE__ . " | $_" } @script_lines if $DEBUG;

   print $script_fh @script_lines;
   close $script_fh;

  # TODO: it seems we may need to localize and delete $ENV{LANG}
  # to enable encodings other than utf-8 to work.
   $self->_run(
      $self->{vim_command},
      $self->vim_options,
      ($file_as_arg ? $filename : ()),
      (
        $use_cmd_opt
          ? ( '--cmd' => "silent! so $script_filename" )
          : ( '-s'    => $script_filename )
      ),
   );

   unlink $filename
      if $input_is_temporary;
   unlink $out_filename;
   unlink $script_filename;

  binmode $out_fh, $binmode;

   my $data = do { local $/; <$out_fh> };

   # Convert line endings to ones appropriate for the current platform.
   $data =~ s/\x0D\x0A?/\n/g;

   my $syntax = [];
   LOOP: {
      _add_markup($syntax, $1, $2), redo LOOP
         if $data =~ /\G>(.*?)>(.*?)<\1]+)/cgs;
   }

   $self->{syntax} = $syntax;
}

# Given an array ref ($syntax), we add a new syntax chunk to it, unescaping
# the text and making sure that consecutive chunks of the same type are
# merged.
sub _add_markup
{
   my ($syntax, $type, $text) = @_;

   # TODO: make this optional
   # (https://github.com/petdance/vim-perl/blob/master/t/01_highlighting.t#L12)

   # Ignore types we don't know about.  At least one syntax file (xml.vim)
   # can produce these.  It happens when a syntax type isn't 'linked' to
   # one of the predefined types.
   $type = ''
      unless exists $SYNTAX_TYPE{$type};

   # Unescape ampersands and pointies.
   $text =~ s/&l//g;
   $text =~ s/&a/&/g;

   if (@$syntax && $syntax->[-1][0] eq $type) {
      # Concatenate consecutive bits of the same type.
      $syntax->[-1][1] .= $text;
   }
   else {
      # A new chunk of marked-up text.
      push @$syntax, [ $type, $text ];
   }
}

# This is a private internal method which runs a program.
# It takes a list of the program name and arguments.
sub _run
{
   my ($self, $prog, @args) = @_;

   if ($DEBUG) {
      print STDERR __PACKAGE__."::_run: $prog " .
            join(' ', map { "'$_'" } @args) . "\n";
   }

  {
    my ($in, $out) = (Symbol::gensym(), Symbol::gensym());
    my $err_fh = Symbol::gensym();

    my $pid = IPC::Open3::open3($in, $out, $err_fh, $prog => @args);

    # close these to avoid any ambiguity that might cause this to block
    # (see also the paragraph about "select" in IPC::Open3)
    close($in);
    close($out);

    # read handle before waitpid to avoid hanging on older systems
    my $errout = do { local $/; <$err_fh> };

      my $gotpid = waitpid($pid, 0);
      croak "couldn't run the program '$prog'" if $gotpid == -1;
      my $error = $? >> 8;
      if ($error) {
         $errout =~ s/\n+\z//;
         my $details = $errout eq '' ? '' :
                       "\nVim wrote this error output:\n$errout\n";
         croak "$prog returned an error code of '$error'$details";
      }
   }
}

sub vim_options {
  my ($self) = @_;
  return (
    @{ $self->{vim_options} },
    @{ $self->{extra_vim_options} },
  );
}

1;

__END__

=pod

=encoding utf-8

=for :stopwords Geoff Richards Randy Stauner ACKNOWLEDGEMENTS ansi html xml DOCTYPE XHTML
XSL XSLT XSL-FO pdf inline stylesheet filetype unencoded PreProc Todo TODO
syntaxes Moolenaar cpan testmatrix url annocpan anno bugtracker rt cpants
kwalitee diff irc mailto metadata placeholders metacpan

=head1 NAME

Text::VimColor - Syntax highlight text using Vim

=head1 VERSION

version 0.23

=head1 SYNOPSIS

   use Text::VimColor;
   my $syntax = Text::VimColor->new(
      file => $0,
      filetype => 'perl',
   );

   print $syntax->html;
   print $syntax->xml;
   print $syntax->ansi;

=head1 DESCRIPTION

This module tries to markup text files according to their syntax.  It can
be used to produce web pages with pretty-printed colorful source code
samples.  It can produce output in the following formats:

=over 4

=item HTML

Valid XHTML 1.0, with the exact coloring and style left to a CSS stylesheet

=item XML

Pieces of text are marked with XML elements in a simple vocabulary,
which can be converted to other formats, for example, using XSLT

=item Perl array

A simple Perl data structure, so that Perl code can be used to turn it
into whatever is needed

=item ANSI Escape Sequences

A string marked with L
suitable for printing to a terminal.

=back

This module works by running the Vim text editor and getting it to apply its
excellent syntax highlighting (aka 'font-locking') to an input file, and mark
pieces of text according to whether it thinks they are comments, keywords,
strings, etc.  The Perl code then reads back this markup and converts it
to the desired output format.

This is an object-oriented module.  To use it, create an object with
the L function (as shown in L) and then call methods
to get the markup out.

=head1 METHODS

=head2 new

  my $tvc = Text::VimColor->new(%options)

Returns a syntax highlighting object.  Pass it a hash of options.

The following options are recognized:

=over 4

=item file

The file to syntax highlight.  Can be either a filename or an open file handle.

Note that using a filename might allow Vim to guess the file type from its
name if none is specified explicitly.

If the file isn't specified while creating the object, it can be given later
in a call to the L method (see below), allowing a single
C object to be used with multiple input files.

=item string

Use this to pass a string to be used as the input.  This is an alternative
to the C option.  A reference to a string will also work.

The L method is another way to use a string as input.

If you provide a character (unencoded) string (recommended)
it will be passed to vim encoded in UTF-8
and your result will be character string.

=item filetype

Specify the type of file Vim should expect, in case Vim's automatic
detection by filename or contents doesn't get it right.  This is
particularly important when providing the file as a string or file
handle, since Vim won't be able to use the file extension to guess
the file type.

The file types recognized by Vim are short strings like 'perl' or 'lisp'.
They are the names of files in the 'syntax' directory in the Vim
distribution.

This option, whether or not it is passed to L, can be overridden
when calling L and L, so you can
use the same object to process multiple files of different types.

=item html_full_page

By default the L output method returns a fragment of HTML, not a
full file.  To make useful output this must be wrapped in a C<< 
 >>
element and a stylesheet must be included from somewhere.  Setting the
L option will instead make the L method return a
complete stand-alone XHTML file.

Note that while this is useful for testing, most of the time you'll want to
put the syntax highlighted source code in a page with some other content,
in which case the default output of the L method is more appropriate.

=item html_inline_stylesheet

Turned on by default, but has no effect unless L is also
enabled.

This causes the CSS stylesheet defining the colors to be used
to render the markup to be be included in the HTML output, in a
C<<