package Lisp::Reader; use strict; use vars qw($DEBUG $SYMBOLS_AS_STRINGS $NIL_AS_SYMBOL @EXPORT_OK $VERSION); $VERSION = sprintf("%d.%02d", q$Revision: 1.10 $ =~ /(\d+)\.(\d+)/); use Lisp::Symbol qw(symbol); require Exporter; *import = \&Exporter::import; @EXPORT_OK = qw(lisp_read); sub my_symbol { ($_[0] eq "nil" && !$NIL_AS_SYMBOL) ? undef : ($SYMBOLS_AS_STRINGS ? $_[0] : symbol($_[0])); } sub lisp_read { local($_) = shift; my $one = shift; my $level = shift || 0; my $indent = " " x $level; my @stack; my $form = []; if ($DEBUG) { print "${indent}Parse"; print "-one" if $one; print ": $_\n"; } while (1) { if (/\G\s*;+([^\n]*)/gc) { print "${indent}COMMENT $1\n" if $DEBUG; } elsif (/\G\s*([()\[\]])/gc) { print "${indent}PARA $1\n" if $DEBUG; if ($1 eq "(" or $1 eq "[") { my $prev = $form; push(@stack, $prev); push(@$prev, $form = []); bless $form, "Lisp::Vector" if $1 eq "["; } else { last unless @stack; if (ref($form) eq "ARRAY" && @$form == 0) { # () and nil is supposed to be the same thing $stack[-1][-1] = my_symbol("nil"); } $form = pop(@stack); last if $one && !@stack; } } elsif (/\G\s*( [-+]? # optional sign (?:\d+(\.\d*)? # 0 0. 0.0 | \.\d+) # .0 ([eE][-+]?\d+)? # optional exponent ) (?![^\s()\[\];]) # not followed by plain chars /gcx) { print "${indent}NUMBER $1\n" if $DEBUG; push(@$form, $1+0); last if $one && !@stack; } elsif (/\G\s*\?((?:\\[A-Z]-)*(?:\\\^.|\\[0-7]{1,3}|\\.|.))/sgc) { print "${indent}CHAR $1\n" if $DEBUG; push(@$form, parse_char($1)); last if $one && !@stack; } elsif (/\G\s* \"( # start quote [^\"\\]* # unescaped (?:\\.[^\"\\]*)* # (escaped char + unescaped)* )\"/gcxs) # end quote { my $str = $1; # Unescape $str =~ s/\\\n//g; # escaped newlines disappear $str =~ s/((?:\\[A-Z]-)+.)/chr(parse_char($1,1))/ge; $str =~ s/((?:\\[A-Z]-)*\\(?:\^.|[0-7]{1,3}|.))/ chr(parse_char($1,1))/ge; print "${indent}STRING $str\n" if $DEBUG; push(@$form, $str); last if $one && !@stack; } elsif (/\G\s*\'/gc) { print "${indent}QUOTE\n" if $DEBUG; my $old_pos = pos($_); my($subform, $pos) = lisp_read(substr($_, $old_pos), 1, $level+1); pos($_) = $old_pos + $pos; push(@$form, [my_symbol("quote"), $subform]); last if $one && !@stack; } elsif (/\G\s*\./gc) { print "${indent}DOT\n" if $DEBUG; #XXX Should handle (a b . c) correctly and (a . b c) as error bless $form, "Lisp::Cons"; } elsif (/\G\s*\#/gc) { die qq(invalid-read-syntax: "\#"); } elsif (/\G\s* ( [^\s()\[\];\\]* # unescaped plain chars (?:\\.[^\s()\[\];\\]*)* # (escaped char + unescaped)* )/gcsx && length($1)) { # symbols can have space and parentesis embedded if they are # escaped. my $sym = $1; $sym =~ s/\\(.)/$1/g; # unescape print "${indent}SYMBOL $sym\n" if $DEBUG; push(@$form, my_symbol($sym)); last if $one && !@stack; } elsif (/\G\s*(.)/gc) { print "${indent}? $1\n"; die qq(invalid-read-syntax: "$1"); } else { last; } } if (@stack) { warn "Form terminated early"; # or should we die? $form = $stack[0]; } if ($one) { die "More than one form parsed, this should never happen" if @$form > 1; $form = $form->[0]; } wantarray ? ($form, pos($_)) : $form; } sub parse_char { my($char, $instring) = @_; my $ord = 0; my @mod; while ($char =~ s/^\\([A-Z])-//) { push(@mod, $1); } if (length($char) == 1) { $ord = ord($char); # a plain one } elsif ($char =~ /^\\([0-7]+)$/) { $ord = oct($1); } elsif ($char =~ /^\\\^(.)$/) { $ord = ord(uc($1)) - ord("@"); $ord += 128 if $ord < 0; } elsif ($char eq "\\t") { $ord = ord("\t"); } elsif ($char eq "\\n") { $ord = ord("\n"); } elsif ($char eq "\\a") { $ord = ord("\a"); } elsif ($char eq "\\f") { $ord = ord("\f"); } elsif ($char eq "\\r") { $ord = ord("\r"); } elsif ($char eq "\\e") { $ord = ord("\e"); } elsif ($char =~ /^\\(.)$/) { $ord = ord($1); } else { warn "Don't know how to handle character ($char)"; } for (@mod) { if ($_ eq "C") { $ord = ord(uc(chr($ord))) - ord("@"); $ord += 128 if $ord < 0; } elsif ($_ eq "M") { $ord += $instring ? 2**7 : 2**27; } elsif ($_ eq "H") { $ord += 2**24; } elsif ($_ eq "S") { $ord += 2**23; } elsif ($_ eq "A") { $ord += 2**22; } else { warn "Unknown character modified ($_)"; } } $ord; } 1;