The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
/* ------------------------------------------------------------------------
@NAME       : lex_auxiliary.c
@INPUT      : 
@OUTPUT     : 
@RETURNS    : 
@DESCRIPTION: The code and global variables here have three main purposes:
                - maintain the lexical buffer (zztoktext, which
                  traditionally with PCCTS is a static array; I have
                  changed things so that it's dynamically allocated and
                  resized on overflow)
                - keep track of lexical state that's not handled by PCCTS
                  code (like "where are we in terms of BibTeX entries?" or
                  "what are the delimiters for the current entry/string?")
                - everything called from lexical actions is here, to keep
                  the grammar file itself neat and clean
@GLOBALS    : 
@CALLS      : 
@CALLERS    : 
@CREATED    : Greg Ward, 1996/07/25-28
@MODIFIED   : Jan 1997
              Jun 1997
@VERSION    : $Id: lex_auxiliary.c 697 2003-10-04 17:27:18Z greg $
@COPYRIGHT  : Copyright (c) 1996-99 by Gregory P. Ward.  All rights reserved.

              This file is part of the btparse library.  This library is
              free software; you can redistribute it and/or modify it under
              the terms of the GNU Library General Public License as
              published by the Free Software Foundation; either version 2
              of the License, or (at your option) any later version.
-------------------------------------------------------------------------- */

#include "bt_config.h"
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <stdarg.h>
#include <assert.h>
#include "lex_auxiliary.h"
#include "stdpccts.h"
#include "error.h"
#include "prototypes.h"
#include "my_dmalloc.h"

#define DUPE_TEXT 0

extern char * InputFilename;            /* from input.c */

GEN_PRIVATE_ERRFUNC (lexical_warning, (char * fmt, ...),
                     BTERR_LEXWARN, InputFilename, zzline, NULL, -1, fmt)
GEN_PRIVATE_ERRFUNC (lexical_error, (char * fmt, ...),
                     BTERR_LEXERR, InputFilename, zzline, NULL, -1, fmt)



/* ----------------------------------------------------------------------
 * Global variables
 */

/* First, the lexical buffer.  This is used elsewhere, so can't be static */
char *         zztoktext = NULL;

/* 
 * Now, the lexical state -- first, stuff that arises from scanning 
 * at top-level and the beginnings of entries;
 *   EntryState:
 *     toplevel when we start scanning a file, or when we are in in_entry
 *       mode and see '}' or ')'
 *     after_at when we are in toplevel mode and see an '@'
 *     after_type when we are in after_at mode and see a name (!= 'comment')
 *     in_comment when we are in after_at mode and see a name (== 'comment')
 *     in_entry when we are in after_type mode and see '{' or '('
 *   EntryOpener:
 *     the character ('(' or '{') which opened the entry currently being
 *     scanned (we use this to make sure that the entry opener and closer
 *     match; if not, we issue a warning)
 *   EntryMetatype: (NB. typedef for bt_metatype is in btparse.h)
 *     classifies entries according to the syntax we will use to parse them;
 *     also winds up (after being changed to a bt_nodetype value) in the 
 *     node that roots the entry AST:
 *       comment    - anything between () or {}
 *       preamble   - a single compound value
 *       string     - a list of "name = compound_value" assignments; no key
 *       alias      - a single "name = compound_value" assignment (where
 *                    the compound value in this case is presumably a 
 *                    name, rather than a string -- this is not syntactically
 *                    checked though)
 *       modify,
 *       entry      - a key followed by a list of "name = compound_value" 
 *                    assignments
 *   JunkCount:
 *     the number of non-whitespace, non-'@' characters seen at toplevel
 *     between two entries (used to print out a warning when we hit
 *     the beginning of entry, to help people catch "old style" implicit
 *     comments
 */
static enum { toplevel, after_at, after_type, in_comment, in_entry } 
               EntryState;
static char    EntryOpener;             /* '(' or '{' */
static bt_metatype
               EntryMetatype;
static int     JunkCount;               /* non-whitespace chars at toplevel */

/*
 * String state -- these are maintained and used by the functions called
 * from actions in the string lexer.
 *   BraceDepth:
 *     brace depth within a string; we can only end the current string
 *     when this is zero
 *   ParenDepth:
 *     parenthesis depth within a string; needed for @comment entries
 *     that are paren-delimited (because the comment in that case is
 *     a paren-delimited string)
 *   StringOpener:
 *     similar to EntryOpener, but stronger than merely warning of token
 *     mismatch -- this determines which character ('"' or '}') can 
 *     actually end the string
 *   StringStart:
 *     line on which current string started; if we detect an apparent
 *     runaway, this is used to report where the runaway started
 *   ApparentRunaway:
 *     flags if we have already detected (and warned) that the current
 *     string appears to be a runaway, so that we don't warn again
 *     (and again and again and again)
 *   QuoteWarned:
 *     flags if we have already warned about seeing a '"' in a string,
 *     because they tend to come in pairs and one warning per string 
 *     is enough
 *
 * (See bibtex.g for an explanation of my runaway string detection heuristic.)
 */
static char    StringOpener = '\0';     /* '{' or '"' */
static int     BraceDepth;              /* depth of brace-nesting */
static int     ParenDepth;              /* depth of parenthesis-nesting */
static int     StringStart = -1;        /* start line of current string */
static int     ApparentRunaway;         /* current string looks like runaway */
static int     QuoteWarned;             /* already warned about " in string? */



/* ----------------------------------------------------------------------
 * Miscellaneous functions:
 *   lex_info()      (handy for debugging)
 *   zzcr_attr()     (called from PCCTS-generated code)
 */

void lex_info (void)
{
   printf ("LA(1) = \"%s\" token %d, %s\n", LATEXT(1), LA(1), zztokens[LA(1)]);
#ifdef LL_K
   printf ("LA(2) = \"%s\" token %d, %s\n", LATEXT(2), LA(2), zztokens[LA(2)]);
#endif
}


void zzcr_attr (Attrib *a, int tok, char *txt)
{
   if (tok == STRING)
   {
      int   len = strlen (txt);

      assert ((txt[0] == '{' && txt[len-1] == '}')
              || (txt[0] == '\"' && txt[len-1] == '\"'));
      txt[len-1] = (char) 0;            /* remove closing quote from string */
      txt++;                            /* so we'll skip the opening quote */
   }

#if DUPE_TEXT
   a->text = strdup (txt);
#else
   a->text = txt;
#endif
   a->token = tok;
   a->line = zzline;
   a->offset = zzbegcol;
#if DEBUG > 1
   dprintf ("zzcr_attr: input txt = %p (%s)\n", txt, txt);
   dprintf ("           dupe txt  = %p (%s)\n", a->text, a->text);
#endif
}


#if DUPE_TEXT
void zzd_attr (Attrib *attr)
{
   free (attr->text);
}
#endif


/* ----------------------------------------------------------------------
 * Lexical buffer functions:
 *   alloc_lex_buffer()
 *   realloc_lex_buffer()
 *   free_lex_buffer()
 *   lexer_overflow()
 *   zzcopy()              (only if ZZCOPY_FUNCTION is defined and true)
 */


/*
 * alloc_lex_buffer()
 * 
 * allocates the lexical buffer with `size' characters.  Clears the buffer,
 * points zzlextext at it, and sets zzbufsize to `size'.
 *
 * Does nothing if the buffer is already allocated.
 *
 * globals: zztoktext, zzlextext, zzbufsize
 * callers: bt_parse_entry() (in input.c)
 */
void alloc_lex_buffer (int size)
{
   if (zztoktext == NULL)
   {
      zztoktext = (char *) malloc (size * sizeof (char));
      memset (zztoktext, 0, size);
      zzlextext = zztoktext;
      zzbufsize = size;
   }
} /* alloc_lex_buffer() */


/*
 * realloc_lex_buffer()
 * 
 * Reallocates the lexical buffer -- size is increased by `size_increment'
 * characters (which could be negative).  Updates all globals that point
 * to or into the buffer (zzlextext, zzbegexpr, zzendexpr), as well as
 * zztoktext (the buffer itself) zzbufsize (the buffer size).
 *
 * This is only meant to be called (ultimately) from zzgettok(), part of
 * the DLG code.  (In fact, zzgettok() invokes the ZZCOPY() macro, which
 * calls lexer_overflow() on buffer overflow, which calls
 * realloc_lex_buffer().  Whatever.)  The `lastpos' and `nextpos' arguments
 * correspond, respectively, to a local variable in zzgettok() and a static
 * global in dlgauto.h (hence really in scan.c).  They both point into
 * the lexical buffer, so have to be passed by reference here so that
 * we can update them to point into the newly-reallocated buffer.
 * 
 * globals: zztottext, zzbufsize, zzlextext, zzbegexpr, zzendexpr
 * callers: lexer_overflow()
 */
static void
realloc_lex_buffer (int     size_increment, 
                    unsigned char ** lastpos, 
                    unsigned char ** nextpos)
{
   int   beg, end, next;

   if (zztoktext == NULL)
      internal_error ("attempt to reallocate unallocated lexical buffer");

   zztoktext = (char *) realloc (zztoktext, zzbufsize+size_increment);
   memset (zztoktext+zzbufsize, 0, size_increment);
   zzbufsize += size_increment;

   beg = zzbegexpr - zzlextext;
   end = zzendexpr - zzlextext;
   next = *nextpos - (unsigned char *) zzlextext;
   zzlextext = zztoktext;

   if (lastpos != NULL)
      *lastpos = zzlextext+zzbufsize-1;
   zzbegexpr = zzlextext + beg;
   zzendexpr = zzlextext + end;
   *nextpos = zzlextext + next;
   
} /* realloc_lex_buffer() */


/*
 * free_lex_buffer()
 *
 * Frees the lexical buffer allocated by alloc_lex_buffer().
 */
void free_lex_buffer (void)
{
   if (zztoktext == NULL)
      internal_error ("attempt to free unallocated (or already freed) " 
                      "lexical buffer");

   free (zztoktext);
   zztoktext = NULL;
} /* free_lex_buffer() */


/*
 * lexer_overflow()
 *
 * Prints a warning and calls realloc_lex_buffer() to increase the size
 * of the lexical buffer by ZZLEXBUFSIZE (a constant -- hence the buffer
 * size increases linearly, not exponentially).
 *
 * Also prints a couple of lines of useful debugging stuff if DEBUG is true.
 */ 
void lexer_overflow (unsigned char **lastpos, unsigned char **nextpos)
{
#if DEBUG
   char   head[16], tail[16];

   printf ("zzcopy: overflow detected\n");
   printf ("        zzbegcol=%d, zzendcol=%d, zzline=%d\n",
           zzbegcol, zzendcol, zzline);
   strncpy (head, zzlextext, 15); head[15] = 0;
   strncpy (tail, zzlextext+ZZLEXBUFSIZE-15, 15); tail[15] = 0;
   printf ("        zzlextext=>%s...%s< (last char=%d (%c))\n",
           head, tail, 
           zzlextext[ZZLEXBUFSIZE-1], zzlextext[ZZLEXBUFSIZE-1]);
   printf ("        zzchar = %d (%c), zzbegexpr=zzlextext+%d\n",
           zzchar, zzchar, zzbegexpr-zzlextext);
#endif

   notify ("lexical buffer overflowed (reallocating to %d bytes)",
                    zzbufsize+ZZLEXBUFSIZE);
   realloc_lex_buffer (ZZLEXBUFSIZE, lastpos, nextpos);

} /* lexer_overflow () */


#if ZZCOPY_FUNCTION
/*
 * zzcopy()
 * 
 * Does the same as the ZZCOPY macro (in lex_auxiliary.h), but as a
 * function for easier debugging.
 */
void zzcopy (char **nextpos, char **lastpos, int *ovf_flag)
{
   if (*nextpos >= *lastpos)
   {
      lexer_overflow (lastpos, nextpos);
   }

   **nextpos = zzchar;
   (*nextpos)++;
}
#endif



/* ----------------------------------------------------------------------
 * Report/maintain lexical state 
 *   report_state()        (only meaningful if DEBUG)
 *   initialize_lexer_state()
 *
 * Note that the lexical action functions, below, also fiddle with
 * the lexical state variables an awful lot.
 */

#if DEBUG
char *state_names[] =
   { "toplevel", "after_at", "after_type", "in_comment", "in_entry" };
char *metatype_names[] = 
   { "unknown", "comment", "preamble", "string", "alias", "modify", "entry" };

static void
report_state (char *where)
{
   printf ("%s: lextext=%s (line %d, offset %d), token=%d, "
           "EntryState=%s\n",
           where, zzlextext, zzline, zzbegcol, NLA,
           state_names[EntryState]);
}
#else
# define report_state(where)
/*
static void
report_state (char *where) { }
*/
#endif
  
void initialize_lexer_state (void)
{
   zzmode (START);
   EntryState = toplevel;
   EntryOpener = (char) 0;
   EntryMetatype = BTE_UNKNOWN;
   JunkCount = 0;
}


bt_metatype entry_metatype (void)
{
   return EntryMetatype;
}



/* ----------------------------------------------------------------------
 * Lexical actions (START and LEX_ENTRY modes)
 */

/* 
 * newline ()
 * 
 * Does everything needed to handle newline outside of a quoted string:
 * increments line counter and skips the newline.
 */
void newline (void)
{
   zzline++;
   zzskip();
}


void comment (void)
{
   zzline++;
   zzskip();
}
   

void at_sign (void)
{
   if (EntryState == toplevel)
   {
      EntryState = after_at;
      zzmode (LEX_ENTRY);
      if (JunkCount > 0)
      {
         lexical_warning ("%d characters of junk seen at toplevel", JunkCount);
         JunkCount = 0;
      }
   }
   else
   {
   /* internal_error ("lexer recognized \"@\" at other than top-level"); */
      lexical_warning ("\"@\" in strange place -- should get syntax error");
   }
   report_state ("at_sign");
}


void toplevel_junk (void)
{
   JunkCount += strlen (zzlextext);
   zzskip ();
}


void name (void)
{
   report_state ("name (pre)");

   switch (EntryState)
   {
      case toplevel:
      {
         internal_error ("junk at toplevel (\"%s\")", zzlextext); 
         break;
      }
      case after_at: 
      {
         char * etype = zzlextext;
         EntryState = after_type;

         if (strcasecmp (etype, "comment") == 0)
         {
            EntryMetatype = BTE_COMMENT;
            EntryState = in_comment;
         }

         else if (strcasecmp (etype, "preamble") == 0)
            EntryMetatype = BTE_PREAMBLE;

         else if (strcasecmp (etype, "string") == 0)
            EntryMetatype = BTE_MACRODEF;
/*
         else if (strcasecmp (etype, "alias") == 0)
            EntryMetatype = BTE_ALIAS;

         else if (strcasecmp (etype, "modify") == 0)
            EntryMetatype = BTE_MODIFY;
*/
         else
            EntryMetatype = BTE_REGULAR;

         break;
      }
      case after_type:
      case in_comment:
      case in_entry:
         break;                         /* do nothing */
   }

   report_state ("name (post)");

}


void lbrace (void)
{
   /* 
    * Currently takes a restrictive view of "when an lbrace is an entry
    * opener" -- ie. *only* after '@name' (as determined by EntryState),
    * where name is not 'comment'.  This means that lbrace usually
    * determines a string (in particular, when it's seen at toplevel --
    * which will happen under certain error situations), which in turn
    * means that some unexpected things can become strings (like whole
    * entries).
    */

   if (EntryState == in_entry || EntryState == in_comment)
   {
      start_string ('{');
   }
   else if (EntryState == after_type)
   {
      EntryState = in_entry;
      EntryOpener = '{';
      NLA = ENTRY_OPEN;
   }
   else
   {
      lexical_warning ("\"{\" in strange place -- should get a syntax error");
   }

   report_state ("lbrace");
}


void rbrace (void)
{
   if (EntryState == in_entry)
   {
      if (EntryOpener == '(')
         lexical_warning ("entry started with \"(\", but ends with \"}\"");
      NLA = ENTRY_CLOSE;
      initialize_lexer_state ();
   }
   else
   {
      lexical_warning ("\"}\" in strange place -- should get a syntax error");
   }
   report_state ("rbrace");
}


void lparen (void)
{
   if (EntryState == in_comment)
   {
      start_string ('(');
   }
   else if (EntryState == after_type)
   {
      EntryState = in_entry;
      EntryOpener = '(';
   }
   else
   {
      lexical_warning ("\"(\" in strange place -- should get a syntax error");
   }
   report_state ("lparen");
}


void rparen (void)
{
   if (EntryState == in_entry)
   {
      if (EntryOpener == '{')
         lexical_warning ("entry started with \"{\", but ends with \")\"");
      initialize_lexer_state ();
   }
   else
   {
      lexical_warning ("\")\" in strange place -- should get a syntax error");
   }
   report_state ("rparen");
}


/* ----------------------------------------------------------------------
 * Stuff for processing strings.
 */


/*
 * start_string ()
 *
 * Called when we see a '{' or '"' in the field data.  Records which quote
 * character was used, and calls open_brace() to increment the depth
 * counter if it was a '{'.  Switches to LEX_STRING mode, and tells the
 * lexer to continue slurping characters into the same buffer.
 */
void start_string (char start_char)
{
   StringOpener = start_char;
   BraceDepth = 0;
   ParenDepth = 0;
   StringStart = zzline;
   ApparentRunaway = 0;
   QuoteWarned = 0;
   if (start_char == '{')
      open_brace ();
   if (start_char == '(')
      ParenDepth++;
   if (start_char == '"' && EntryState == in_comment)
   {
      lexical_error ("comment entries must be delimited by either braces or parentheses");
      EntryState = toplevel;
      zzmode (START);
      return;
   }

#ifdef USER_ZZMODE_STACK
   if (zzauto != LEX_ENTRY || EntryState != in_entry)
#else
   if (EntryState != in_entry && EntryState != in_comment)
#endif
   {
      lexical_warning ("start of string seen at weird place");
   }

   zzmore ();
   zzmode (LEX_STRING);
}


/*
 * end_string ()
 *
 * Called when we see either a '"' (at depth 0) or '}' (if it brings us
 * down to depth 0) in a quoted string.  Just makes sure that braces are
 * balanced, and then goes back to the LEX_FIELD mode.
 */
void end_string (char end_char)
{
   char   match;

#ifndef ALLOW_WARNINGS
   match = (char) 0;                    /* silence "might be used" */
                                        /* uninitialized" warning */
#endif

   switch (end_char)
   {
      case '}': match = '{'; break;
      case ')': match = '('; break;
      case '"': match = '"'; break;
      default: 
         internal_error ("end_string(): invalid end_char \"%c\"", end_char);
   }

   assert (StringOpener == match);

   /*
    * If we're at non-zero BraceDepth, that probably means mismatched braces
    * somewhere -- complain about it and reset BraceDepth to minimize future
    * confusion.
    */

   if (BraceDepth > 0)
   {
      lexical_error ("unbalanced braces: too many {'s");
      BraceDepth = 0;
   }

   StringOpener = (char) 0;
   StringStart = -1;
   NLA = STRING;

   if (EntryState == in_comment)
   {
      int   len = strlen (zzlextext);

      /* 
       * ARG! no, this is wrong -- what if unbalanced braces in the string 
       * and we try to output put it later? 
       *
       * ARG! again, this is no more wrong than when we strip quotes in
       * post_parse.c, and blithely assume that we can put them back on
       * later for output in BibTeX syntax.  Hmmm.
       *
       * Actually, it looks like this isn't a problem after all: you
       * can't have unbalanced braces in a BibTeX string (at least
       * not as parsed by btparse).
       */

      if (zzlextext[0] == '(')          /* convert to standard quote delims */
      {
         zzlextext[    0] = '{';
         zzlextext[len-1] = '}';
      }

      EntryState = toplevel;
      zzmode (START);
   }
   else
   {
      zzmode (LEX_ENTRY);
   }
      
   report_state ("string");
}


/*
 * open_brace ()
 * 
 * Called when we see a '{', either to start a string (in which case 
 * it's called from start_string()) or inside a string (called directly
 * from the lexer).
 */
void open_brace (void)
{
   BraceDepth++;
   zzmore ();
   report_state ("open_brace");
}


/*
 * close_brace ()
 *
 * Called when we see a '}' inside a string.  Decrements the depth counter
 * and checks to see if we are down to depth 0, in which case the string is
 * ended and the current lookahead token is set to STRING.  Otherwise,
 * just tells the lexer to keep slurping characters into the buffer.
 */
void close_brace (void)
{
   BraceDepth--;
   if (StringOpener == '{' && BraceDepth == 0)
   {
      end_string ('}');
   }

   /* 
    * This could happen if some bonehead puts an unmatched right-brace
    * in a quote-delimited string (eg. "Hello}").  To attempt to recover,
    * we reset the depth to zero and continue slurping into the string.
    */
   else if (BraceDepth < 0)
   {
      lexical_error ("unbalanced braces: too many }'s");
      BraceDepth = 0;
      zzmore ();
   }

   /* Otherwise, it's just any old right brace in a string -- keep eating */
   else
   {
      zzmore ();
   }
   report_state ("close_brace");
}


void lparen_in_string (void)
{
   ParenDepth++;
   zzmore ();
}


void rparen_in_string (void)
{
   ParenDepth--;
   if (StringOpener == '(' && ParenDepth == 0)
   {
      end_string (')');
   }
   else
   {
      zzmore ();
   }
}


/* 
 * quote_in_string ()
 * 
 * Called when we see '"' in a string.  Ends the string if the quote is at
 * depth 0 and the string was started with a quote, otherwise instructs the
 * lexer to continue munching happily along.  (Also prints a warning,
 * assuming that input is destined for processing by TeX and you really
 * want either `` or '' rather than ".)
 */
void quote_in_string (void)
{
   if (StringOpener == '"' && BraceDepth == 0)
   {
      end_string ('"');
   }
   else
   {
      boolean at_top = FALSE;;

      /* 
       * Note -- this warning assumes that strings are destined 
       * to be processed by TeX, so it should be optional.  Hmmm.
       */

      if (StringOpener == '"' || StringOpener == '(')
         at_top = (BraceDepth == 0);
      else if (StringOpener == '{')
         at_top = (BraceDepth == 1);
      else
         internal_error ("Illegal string opener \"%c\"", StringOpener);

      if (!QuoteWarned && at_top)
      {
         lexical_warning ("found \" at brace-depth zero in string "
                          "(TeX accents in BibTeX should be inside braces)");
         QuoteWarned = 1;
      }
      zzmore ();
   }
}


/*
 * check_runaway_string ()
 *
 * Called from the lexer whenever we see a newline in a string.  See 
 * bibtex.g for a detailed explanation; basically, this function
 * looks for an entry start ("@name{") or new field ("name=") immediately
 * after a newline (with possible whitespace).  This is a heuristic 
 * check for runaway strings, under the assumption that text that looks
 * like a new entry or new field won't actually occur inside a string
 * very often.
 */
void check_runaway_string (void)
{
   int      len;
   int      i;

   /* 
    * could these be made significantly more efficient by a 256-element
    * lookup table instead of calling strchr()?
    */
   static char *alpha_chars = "abcdefghijklmnopqrstuvwxyz";
   static char *name_chars = "abcdefghijklmnopqrstuvwxyz0123456789:+/'.-";

   /* 
    * on entry: zzlextext contains the whole string, starting with {
    * and with newlines/tabs converted to space; zzbegexpr points to
    * a chunk of the string starting with newline (newlines and 
    * tabs have not yet been converted)
    */

#if DEBUG > 1
   printf ("check_runaway_string(): zzline=%d\n", zzline);
   printf ("zzlextext=>%s<\nzzbegexpr=>%s<\n", 
           zzlextext, zzbegexpr);
#endif
      

   /* 
    * increment zzline to take the leading newline into account -- but
    * first a sanity check to be sure that newline is there!
    */

   if (zzbegexpr[0] != '\n')
   {
      lexical_warning ("huh? something's wrong (buffer overflow?) near "
                       "offset %d (line %d)", zzendcol, zzline);
   /* internal_error ("zzbegexpr (line %d, offset %d-%d, "
                      "text >%s<, expr >%s<)"
                      "should start with a newline",
                      zzline, zzbegcol, zzendcol, zzlextext, zzbegexpr);
   */
   }
   else
   {
      zzline++;
   }

   /* standardize whitespace (convert all to space) */

   len = strlen (zzbegexpr);
   for (i = 0; i < len; i++)
   {
      if (isspace (zzbegexpr[i]))
         zzbegexpr[i] = ' ';
   }
   

   if (!ApparentRunaway)                /* haven't already warned about it */
   {
      enum { none, entry, field, giveup } guess;

      i = 1;
      guess = none;
      while (i < len && zzbegexpr[i] == ' ') i++;

      if (zzbegexpr[i] == '@')
      {
         i++;
         while (i < len && zzbegexpr[i] == ' ') i++;
         guess = entry;
      }

      if (strchr (alpha_chars, tolower (zzbegexpr[i])) != NULL)
      {
         while (i < len && strchr (name_chars, tolower (zzbegexpr[i])) != NULL)
            i++;
         while (i < len && zzbegexpr[i] == ' ') i++;
         if (i == len)
         {
            guess = giveup;
         }
         else
         {
            if (guess == entry)
            {
               if (zzbegexpr[i] != '{' && zzbegexpr[i] != '(')
                  guess = giveup;
            }
            else                        /* assume it's a field */
            {
               if (zzbegexpr[i] == '=')
                  guess = field;
               else
                  guess = giveup;
            }               
         }
      }
      else                              /* no name seen after WS or @ */
      {
         guess = giveup;
      }

      if (guess == none)
         internal_error ("gee, I should have made a guess by now");

      if (guess != giveup)
      {
         lexical_warning ("possible runaway string started at line %d", 
                          StringStart);
         ApparentRunaway = 1;
      }
   }

   zzmore();
}