aboutsummaryrefslogtreecommitdiff
path: root/utils/Spiff/parse.c
diff options
context:
space:
mode:
Diffstat (limited to 'utils/Spiff/parse.c')
-rw-r--r--utils/Spiff/parse.c802
1 files changed, 802 insertions, 0 deletions
diff --git a/utils/Spiff/parse.c b/utils/Spiff/parse.c
new file mode 100644
index 0000000000..55722b701d
--- /dev/null
+++ b/utils/Spiff/parse.c
@@ -0,0 +1,802 @@
+/* Copyright (c) 1988 Bellcore
+** All Rights Reserved
+** Permission is granted to copy or use this program, EXCEPT that it
+** may not be sold for profit, the copyright notice must be reproduced
+** on copies, and credit should be given to Bellcore where it is due.
+** BELLCORE MAKES NO WARRANTY AND ACCEPTS NO LIABILITY FOR THIS PROGRAM.
+*/
+
+
+#ifndef lint
+static char rcsid[]= "$Header$";
+#endif
+
+#include "misc.h"
+#include "flagdefs.h"
+#include "float.h"
+#include "tol.h"
+#include "token.h"
+#include "line.h"
+#include "command.h"
+#include "comment.h"
+#include "parse.h"
+
+
+#include <ctype.h>
+
+#define _P_PARSE_CHATTER 1000
+
+
+static int _P_realline; /* loop counter */
+static int _P_fnumb;
+
+static char *_P_nextchr; /* pointer to the next character to parse */
+static char *_P_firstchr; /* pointer to the beginning of the line being parsed */
+static int _P_next_tol; /* number of floats seen on this line */
+static int _P_stringsize; /* count of number of characters that are being
+ read into a comment or literal */
+static int _P_has_content; /* flag to indicate if the line being
+ parsed has any tokens on it */
+static int _P_start; /* first line to parse */
+static int _P_lcount; /* number of lines to parse */
+
+static int _P_flags; /* location for global flags */
+
+/*
+** by default, "words" can be made up of numbers and letters
+** the following code allows for extending the alphabet that can
+** be used in words. this is useful for handling languages such
+** as C where the underscore character is an allowable character
+** in an identifier. If a character (such as underscore) is NOT added
+** to the alphabet, the identifier will be broken into 2 or more "words"
+** by the parser. as such the two sequences
+** one_two
+** and
+** one _ two
+** would look identical to spiff.
+*/
+#define _P_ALPHALEN 256
+static char _P_alpha[_P_ALPHALEN];
+
+static void
+_P_alpha_clear()
+{
+ *_P_alpha = '\0';
+}
+
+static
+_P_in_alpha(chr)
+char chr;
+{
+#ifndef ATT
+ extern int index();
+#endif
+ /*
+ ** special case when string terminator
+ ** is handed to us
+ */
+ if ('\0' == chr)
+ return(0);
+
+#ifdef ATT
+ return((int) strchr(_P_alpha,chr));
+#else
+ return((int) index(_P_alpha,chr));
+#endif
+}
+
+void
+P_addalpha(ptr)
+char *ptr;
+{
+ char buf[Z_LINELEN];
+
+ S_wordcpy(buf,ptr); /* copy up to (but not including)
+ the first whitespace char */
+
+ if ((strlen(_P_alpha) + strlen(buf)) >= _P_ALPHALEN)
+ {
+ Z_fatal("too many characters added to extended alphabet");
+ }
+ (void) strcat(_P_alpha,buf);
+}
+
+/*
+** put parser in a default state
+*/
+
+static char _P_dummyline[2]; /* a place to aim wild pointers */
+static void
+_P_initparser()
+{
+ _P_dummyline[0] = '\0';
+
+ /*
+ ** now reset all the state of each module
+ */
+ C_clear_cmd(); /* disable embedded command key word */
+ T_clear_tols();
+ W_clearcoms();
+ W_clearlits();
+ _P_alpha_clear(); /* disable extended alphabet */
+
+ /*
+ ** and set state as defined by execute-time commands.
+ */
+ C_docmds();
+ return;
+}
+
+
+static
+_P_needmore()
+{
+ return(*_P_nextchr == '\0');
+}
+
+static
+_P_nextline()
+{
+ /*
+ ** if the line that we just finished had
+ ** some content, increment the count
+ */
+ if (_P_has_content)
+ {
+ L_incclmax(_P_fnumb);
+ /*
+ ** if the previous line had a token
+ ** increment the line
+ */
+ if (L_getcount(_P_fnumb,L_gettlmax(_P_fnumb)))
+ {
+ L_inctlmax(_P_fnumb);
+ L_setcount(_P_fnumb,L_gettlmax(_P_fnumb),0);
+ }
+ _P_has_content = 0;
+ }
+
+ /*
+ ** reset the number of floats seen on the line
+ */
+ _P_next_tol = 0;
+
+ /*
+ ** get another line if there is one available
+ */
+ _P_realline++;
+ if (_P_realline >= _P_start+_P_lcount)
+ {
+ return(1);
+ }
+
+ _P_firstchr = _P_nextchr = L_getrline(_P_fnumb,_P_realline);
+ /*
+ ** and look for a command
+ */
+ if (C_is_cmd(_P_firstchr))
+ {
+ _P_nextchr = _P_dummyline;
+ _P_has_content = 0;
+ }
+ else
+ {
+ /*
+ ** we have a real line, so set up the index
+ */
+ L_setclindex(_P_fnumb,L_getclmax(_P_fnumb),_P_realline);
+ _P_has_content = 1;
+ }
+ return(0);
+}
+
+/*
+** the following three routines (_P_litsnarf, _P_bolsnarf, and _P_comsnarf
+** all do roughly the same thing. they scan ahead and collect the
+** specified string, move _P_nextchr to the end of the
+** comment or literal and return 1 if we run off the end of file,
+** 0 otherwise. it would have been nice to have 1 routine handle
+** all three task (there is much common code), however there were
+** so enough differences, (for instance, only comments check for nesting,
+** only literals need to set _P_stringsize, etc)
+** that I decided to split them up.
+*/
+static int
+_P_litsnarf(litptr)
+W_lit litptr;
+{
+ _P_stringsize = 0;
+ /*
+ ** skip the start of literal string
+ */
+ _P_nextchr += strlen(W_litbegin(litptr));
+ _P_stringsize += strlen(W_litbegin(litptr));
+ /*
+ ** is there a separate end string?
+ ** if not, then we're done
+ */
+ if ('\0' == *(W_litend(litptr)))
+ {
+ return(0);
+ }
+ /*
+ ** loop once for each character in the literal
+ */
+ while(1)
+ {
+ /*
+ ** if we are out of characters, move on to next line
+ */
+ if (_P_needmore())
+ {
+ if (_P_nextline())
+ {
+ return(1);
+ }
+ if (!_P_has_content)
+ {
+ /*
+ ** since we've just gotten a command
+ ** check to see if this literal
+ ** is still legit ...
+ ** could have just been reset
+ ** by the command
+ */
+ if (!W_is_lit(litptr))
+ {
+ return(0);
+ }
+ }
+ } /* if _P_needmore */
+
+ /*
+ ** see if we have an escaped end of literal string
+ */
+ if (('\0' != *(W_litescape(litptr))) && /* escape string exists */
+ !S_wordcmp(_P_nextchr,
+ W_litescape(litptr)) && /* and escape matches */
+ !S_wordcmp(_P_nextchr+strlen(W_litescape(litptr)),
+ W_litend(litptr))) /* and endstring matches */
+ {
+ _P_nextchr += strlen(W_litescape(litptr))
+ + strlen(W_litend(litptr));
+ _P_stringsize += strlen(W_litescape(litptr))
+ + strlen(W_litend(litptr));
+ continue;
+ }
+
+ /*
+ ** see if we have an end of literal string
+ */
+ if (!S_wordcmp(_P_nextchr,W_litend(litptr))) /* escape matches */
+ {
+ _P_nextchr += strlen(W_litend(litptr));
+ _P_stringsize += strlen(W_litend(litptr));
+ return(0);
+ }
+ /*
+ ** this must be yet another character in the literal, so
+ ** just snarf it up
+ */
+ _P_nextchr++;
+ _P_stringsize++;
+ } /* while loop once for each character */
+
+#ifndef lint
+ Z_fatal("shouldn't execute this line at the end of _P_litsnarf");
+#endif
+} /* _P_litsnarf */
+
+static int
+_P_bolsnarf(bolptr)
+W_bol bolptr;
+{
+ /*
+ ** skip the start of comment string
+ */
+ _P_nextchr += strlen(W_bolbegin(bolptr));
+ /*
+ ** is there a separate end string
+ ** if not, then we're done
+ */
+ if ('\0' == *(W_bolend(bolptr)))
+ {
+ return(0);
+ }
+ /*
+ ** loop once for each character in the comment
+ */
+ while(1)
+ {
+ /*
+ ** if we are out of characters,move on to next line
+ */
+ if (_P_needmore())
+ {
+ if (_P_nextline())
+ {
+ return(1);
+ }
+ if (!_P_has_content)
+ {
+ /*
+ ** since we've just gotten a command
+ ** check to see if this comment
+ ** is still legit ... comments
+ ** could have just been reset
+ ** by the command
+ */
+ if (!W_is_bol(bolptr))
+ {
+ return(0);
+ }
+ }
+ } /* if at end of line */
+
+ /*
+ ** see if we have an escaped end of comment string
+ */
+ if ('\0' != *(W_bolescape(bolptr)) && /* escape string exists */
+ !S_wordcmp(_P_nextchr,
+ W_bolescape(bolptr)) && /* and escape matches */
+ !S_wordcmp(_P_nextchr+strlen(W_bolescape(bolptr)),
+ W_bolend(bolptr))) /* and end string matches */
+ {
+ _P_nextchr += strlen(W_bolescape(bolptr))
+ + strlen(W_bolend(bolptr));
+ continue;
+ }
+
+ /*
+ ** see if we have an end of comment string
+ */
+ if (!S_wordcmp(_P_nextchr,W_bolend(bolptr)))
+ {
+ _P_nextchr += strlen(W_bolend(bolptr));
+ return(0);
+ }
+ /*
+ ** this must be yet another character in the comment, so
+ ** just snarf it up
+ */
+ _P_nextchr++;
+ } /* while loop once for each character */
+
+#ifndef lint
+ Z_fatal("shouldn't execute this line in at end of _P_bolsnarf");
+#endif
+} /* _P_bolsnarf */
+
+/*
+** pass over a comment -- look for nexting
+*/
+static
+_P_comsnarf(comptr)
+W_com comptr;
+{
+ int depth = 1; /* nesting depth */
+ /*
+ ** skip the start of comment string
+ */
+ _P_nextchr += strlen(W_combegin(comptr));
+
+ /*
+ ** is there a separate end string
+ ** if not, then we're done
+ */
+ if ('\0' == *(W_comend(comptr)))
+ {
+ return(0);
+ }
+ /*
+ ** loop once for each character in the comment
+ */
+ while(1)
+ {
+ /*
+ ** if we are out of characters, move on to next line
+ */
+ if (_P_needmore())
+ {
+ if (_P_nextline())
+ {
+ return(1);
+ }
+ if (!_P_has_content)
+ {
+ /*
+ ** since we've just gotten a command
+ ** check to see if this comment
+ ** is still legit ... comments
+ ** could have just been reset
+ ** by the command
+ */
+ if (!W_is_com(comptr))
+ {
+ return(0);
+ }
+ }
+ } /* if at end of line */
+
+ /*
+ ** see if we have an escaped end of comment string
+ */
+ if ('\0' != *(W_comescape(comptr)) && /* escape string exists */
+ !S_wordcmp(_P_nextchr,
+ W_comescape(comptr)) && /* and escape matches */
+ !S_wordcmp(_P_nextchr+strlen(W_comescape(comptr)),
+ W_comend(comptr))) /* and end string matches */
+ {
+ /*
+ ** skip over the escape sequence and the end sequence
+ */
+ _P_nextchr += strlen(W_comescape(comptr))
+ + strlen(W_comend(comptr));
+ continue;
+ }
+
+ /*
+ ** see if we have an end of comment string
+ */
+ if (!S_wordcmp(_P_nextchr,W_comend(comptr))) /* end matches */
+ {
+ /*
+ ** skip over the end sequence
+ */
+ _P_nextchr += strlen(W_comend(comptr));
+ if (W_is_nesting(comptr))
+ {
+ depth--;
+ if (0 == depth)
+ return(0);
+ }
+ else
+ {
+ return(0);
+ }
+ continue;
+ }
+ /*
+ ** see if we have another beginning of comment string
+ */
+ if (W_is_nesting(comptr) &&
+ !S_wordcmp(_P_nextchr,W_comend(comptr))) /* end matches */
+ {
+ _P_nextchr += strlen(W_comend(comptr));
+ depth++;
+ continue;
+ }
+ /*
+ ** this must be yet another character in the comment, so
+ ** just snarf it up
+ */
+ _P_nextchr++;
+ } /* while loop once for each character */
+
+#ifndef lint
+ Z_fatal("should not execute this line in _P_comsnarf\n");
+#endif
+
+} /* _P_comsnarf */
+
+
+/*
+** parse a file
+*/
+static void
+_P_do_parse()
+{
+
+ char *ptr; /* scratch space */
+ int tmp;
+ int ret_code;
+
+ K_token newtoken;
+ W_bol bolptr;
+ W_com comptr;
+ W_lit litptr;
+
+ int startline, endline, startpos;
+
+ /*
+ ** main parsing loop
+ */
+ while (1)
+ {
+ /*
+ ** get more text if necessary
+ */
+ if (_P_needmore())
+ {
+ if (_P_nextline())
+ {
+ return;
+ }
+
+ /*
+ ** if the line contains nothing of interest,
+ ** try again
+ */
+ if (!_P_has_content)
+ {
+ continue;
+ }
+
+ /*
+ ** check to see if this line starts a comment
+ */
+ if ((bolptr = W_isbol(_P_firstchr)) != W_BOLNULL)
+ {
+ if (_P_bolsnarf(bolptr))
+ {
+ return;
+ }
+ continue;
+ }
+ } /* if _P_needmore */
+
+ /*
+ ** skip whitespace
+ */
+ if (!(U_INCLUDE_WS & _P_flags) && isspace(*_P_nextchr))
+ {
+ _P_nextchr++;
+ continue;
+ }
+
+ /*
+ ** check to see if this character starts a comment
+ */
+ if ((comptr = W_iscom(_P_nextchr)) != W_COMNULL)
+ {
+ if (_P_comsnarf(comptr))
+ {
+ return;
+ }
+ continue;
+ }
+
+ /*
+ ** if there aren't any tokens on this line already
+ ** set up the index from the token line to the content line
+ */
+ if (!L_getcount(_P_fnumb,L_gettlmax(_P_fnumb)))
+ {
+ L_settlindex(_P_fnumb,
+ L_gettlmax(_P_fnumb),
+ L_getclmax(_P_fnumb));
+ /*
+ ** and the pointer from the token line to the
+ ** first token on the line
+ */
+ L_setindex(_P_fnumb,
+ L_gettlmax(_P_fnumb),
+ K_gettmax(_P_fnumb));
+ }
+
+ startline = L_tl2cl(_P_fnumb,L_gettlmax(_P_fnumb));
+ startpos = _P_nextchr-_P_firstchr;
+
+ newtoken = K_maketoken();
+ K_setline(newtoken,L_gettlmax(_P_fnumb));
+ K_setpos(newtoken,startpos);
+
+ ret_code = 0;
+ /*
+ ** check to see if this character starts a
+ ** delimited literal string
+ */
+ if ((litptr = W_islit(_P_nextchr)) != W_LITNULL)
+ {
+ ret_code = _P_litsnarf(litptr);
+ K_settype(newtoken,K_LIT);
+ S_allocstr(&ptr,_P_stringsize);
+ /*
+ ** fixed nasty memory bug here by adding else
+ ** old code copied entire line even if literal
+ ** ended before the end of line
+ ** should check into getting strcpy loaded
+ ** locally
+ */
+ endline = L_getclmax(_P_fnumb);
+ if (endline > startline)
+ {
+ /*
+ ** copy in the first line of the literal
+ */
+ (void) strcpy(ptr,
+ L_getcline(_P_fnumb,startline)
+ +startpos);
+ /*
+ ** now copy all the lines between
+ ** the first and last
+ */
+ for (tmp=startline+1;tmp<endline;tmp++)
+ {
+ (void) strcat(ptr,
+ L_getcline(_P_fnumb,tmp));
+ }
+ /*
+ ** and now copy in the last line
+ */
+ (void) strncat(ptr,
+ L_getcline(_P_fnumb,endline),
+ _P_stringsize-strlen(ptr));
+ }
+ else
+ {
+ (void) strncpy(ptr,
+ L_getcline(_P_fnumb,startline)
+ +startpos,
+ _P_stringsize);
+ /*
+ ** terminate the string you just copied
+ */
+ ptr[_P_stringsize] = '\0';
+ }
+ K_settext(newtoken,ptr);
+ } /* if is_lit */
+
+ /*
+ ** see if this is a floating point number
+ */
+ else if (tmp = F_isfloat(_P_nextchr,
+ _P_flags & U_NEED_DECIMAL,
+ _P_flags & U_INC_SIGN))
+ {
+ K_saventext(newtoken,_P_nextchr,tmp);
+ K_settype(newtoken,K_FLO_NUM);
+ if (!(_P_flags & U_BYTE_COMPARE))
+ {
+ K_setfloat(newtoken,
+ F_atof(K_gettext(newtoken),
+ USE_ALL));
+
+ /*
+ ** assign the curent tolerance
+ */
+ K_settol(newtoken,T_gettol(_P_next_tol));
+ }
+
+ /*
+ ** use next tolerance in the
+ ** specification if there is one
+ */
+ if (T_moretols(_P_next_tol))
+ {
+ _P_next_tol++;
+ }
+ /*
+ ** and move pointer past the float
+ */
+ _P_nextchr += tmp;
+ }
+
+ /*
+ ** is this a fixed point number
+ */
+ else if (isdigit(*_P_nextchr))
+ {
+ for(ptr=_P_nextchr; isdigit(*ptr); ptr++)
+ {
+ }
+ K_saventext(newtoken,_P_nextchr,ptr-_P_nextchr);
+ K_settype(newtoken,K_LIT);
+ _P_nextchr = ptr;
+ }
+
+ /*
+ ** try an alpha-numeric word
+ */
+ else if (isalpha(*_P_nextchr) || _P_in_alpha(*_P_nextchr))
+ {
+ /*
+ ** it's a multi character word
+ */
+ for(ptr = _P_nextchr;
+ isalpha(*ptr)
+ || isdigit(*ptr)
+ || _P_in_alpha(*ptr);
+ ptr++)
+ {
+ }
+ K_saventext(newtoken,_P_nextchr,ptr-_P_nextchr);
+ K_settype(newtoken,K_LIT);
+ _P_nextchr = ptr;
+ }
+ else
+ {
+ /*
+ ** otherwise, treat the char itself as a token
+ */
+ K_saventext(newtoken,_P_nextchr,1);
+ K_settype(newtoken,K_LIT);
+ _P_nextchr++;
+ }
+
+ K_settoken(_P_fnumb,K_gettmax(_P_fnumb),newtoken);
+ L_inccount(_P_fnumb,L_gettlmax(_P_fnumb));
+ /*
+ ** if we are out of space, complain and quit
+ */
+ if (K_inctmax(_P_fnumb))
+ {
+ (void) sprintf(Z_err_buf,
+ "warning -- to many tokens in file only first %d tokens will be used.\n",
+ K_MAXTOKENS);
+ Z_complain(Z_err_buf);
+ return;
+ }
+#ifndef NOCHATTER
+ if (0 == (K_gettmax(_P_fnumb) % _P_PARSE_CHATTER))
+ {
+ int max = K_gettmax(_P_fnumb);
+ (void) sprintf(Z_err_buf,
+ "scanned %d words from file #%d\n",
+ max,_P_fnumb+1);
+ Z_chatter(Z_err_buf);
+ }
+#endif
+
+ /*
+ ** are we done?
+ */
+ if(ret_code)
+ {
+ return;
+ }
+ } /* loop once per object on a line */
+
+#ifndef lint
+ Z_fatal("this line should never execute");
+#endif
+}
+
+void
+P_file_parse(num,strt,lcnt,flags)
+int num; /* file number */
+int strt; /* first line to parse expressed in real line numbers */
+int lcnt; /* max number of lines to parse */
+int flags; /* flags for controlling the parse mode */
+{
+ /*
+ ** set module-wide state variables
+ */
+ _P_fnumb = num;
+ _P_start = strt;
+ _P_lcount = lcnt;
+ _P_flags = flags;
+
+ _P_initparser();
+
+ _P_nextchr = _P_dummyline;
+
+ _P_has_content = 0;
+ _P_next_tol = 0;
+ L_setcount(_P_fnumb,L_gettlmax(_P_fnumb),0);
+ /*
+ ** start everything back one line (it will be incremented
+ ** just before the first line is accessed
+ */
+ _P_realline = _P_start-1;
+
+ _P_do_parse();
+
+ /*
+ ** if the last line had content, increment the count
+ */
+ if (_P_has_content)
+ {
+/*
+** this code will get executed if we stopped parsing in the middle
+** of a line. i haven't looked at this case carefully.
+** so, there is a good chance that it is buggy.
+*/
+(void) sprintf(Z_err_buf,"parser got confused at end of file\n");
+Z_complain(Z_err_buf);
+ L_incclmax(_P_fnumb);
+ if (L_getcount(_P_fnumb,L_gettlmax(_P_fnumb)))
+ L_inctlmax(_P_fnumb);
+ }
+ return;
+}