diff options
Diffstat (limited to 'utils/Spiff/parse.c')
-rw-r--r-- | utils/Spiff/parse.c | 802 |
1 files changed, 802 insertions, 0 deletions
diff --git a/utils/Spiff/parse.c b/utils/Spiff/parse.c new file mode 100644 index 0000000000..55722b701d --- /dev/null +++ b/utils/Spiff/parse.c @@ -0,0 +1,802 @@ +/* Copyright (c) 1988 Bellcore +** All Rights Reserved +** Permission is granted to copy or use this program, EXCEPT that it +** may not be sold for profit, the copyright notice must be reproduced +** on copies, and credit should be given to Bellcore where it is due. +** BELLCORE MAKES NO WARRANTY AND ACCEPTS NO LIABILITY FOR THIS PROGRAM. +*/ + + +#ifndef lint +static char rcsid[]= "$Header$"; +#endif + +#include "misc.h" +#include "flagdefs.h" +#include "float.h" +#include "tol.h" +#include "token.h" +#include "line.h" +#include "command.h" +#include "comment.h" +#include "parse.h" + + +#include <ctype.h> + +#define _P_PARSE_CHATTER 1000 + + +static int _P_realline; /* loop counter */ +static int _P_fnumb; + +static char *_P_nextchr; /* pointer to the next character to parse */ +static char *_P_firstchr; /* pointer to the beginning of the line being parsed */ +static int _P_next_tol; /* number of floats seen on this line */ +static int _P_stringsize; /* count of number of characters that are being + read into a comment or literal */ +static int _P_has_content; /* flag to indicate if the line being + parsed has any tokens on it */ +static int _P_start; /* first line to parse */ +static int _P_lcount; /* number of lines to parse */ + +static int _P_flags; /* location for global flags */ + +/* +** by default, "words" can be made up of numbers and letters +** the following code allows for extending the alphabet that can +** be used in words. this is useful for handling languages such +** as C where the underscore character is an allowable character +** in an identifier. If a character (such as underscore) is NOT added +** to the alphabet, the identifier will be broken into 2 or more "words" +** by the parser. as such the two sequences +** one_two +** and +** one _ two +** would look identical to spiff. +*/ +#define _P_ALPHALEN 256 +static char _P_alpha[_P_ALPHALEN]; + +static void +_P_alpha_clear() +{ + *_P_alpha = '\0'; +} + +static +_P_in_alpha(chr) +char chr; +{ +#ifndef ATT + extern int index(); +#endif + /* + ** special case when string terminator + ** is handed to us + */ + if ('\0' == chr) + return(0); + +#ifdef ATT + return((int) strchr(_P_alpha,chr)); +#else + return((int) index(_P_alpha,chr)); +#endif +} + +void +P_addalpha(ptr) +char *ptr; +{ + char buf[Z_LINELEN]; + + S_wordcpy(buf,ptr); /* copy up to (but not including) + the first whitespace char */ + + if ((strlen(_P_alpha) + strlen(buf)) >= _P_ALPHALEN) + { + Z_fatal("too many characters added to extended alphabet"); + } + (void) strcat(_P_alpha,buf); +} + +/* +** put parser in a default state +*/ + +static char _P_dummyline[2]; /* a place to aim wild pointers */ +static void +_P_initparser() +{ + _P_dummyline[0] = '\0'; + + /* + ** now reset all the state of each module + */ + C_clear_cmd(); /* disable embedded command key word */ + T_clear_tols(); + W_clearcoms(); + W_clearlits(); + _P_alpha_clear(); /* disable extended alphabet */ + + /* + ** and set state as defined by execute-time commands. + */ + C_docmds(); + return; +} + + +static +_P_needmore() +{ + return(*_P_nextchr == '\0'); +} + +static +_P_nextline() +{ + /* + ** if the line that we just finished had + ** some content, increment the count + */ + if (_P_has_content) + { + L_incclmax(_P_fnumb); + /* + ** if the previous line had a token + ** increment the line + */ + if (L_getcount(_P_fnumb,L_gettlmax(_P_fnumb))) + { + L_inctlmax(_P_fnumb); + L_setcount(_P_fnumb,L_gettlmax(_P_fnumb),0); + } + _P_has_content = 0; + } + + /* + ** reset the number of floats seen on the line + */ + _P_next_tol = 0; + + /* + ** get another line if there is one available + */ + _P_realline++; + if (_P_realline >= _P_start+_P_lcount) + { + return(1); + } + + _P_firstchr = _P_nextchr = L_getrline(_P_fnumb,_P_realline); + /* + ** and look for a command + */ + if (C_is_cmd(_P_firstchr)) + { + _P_nextchr = _P_dummyline; + _P_has_content = 0; + } + else + { + /* + ** we have a real line, so set up the index + */ + L_setclindex(_P_fnumb,L_getclmax(_P_fnumb),_P_realline); + _P_has_content = 1; + } + return(0); +} + +/* +** the following three routines (_P_litsnarf, _P_bolsnarf, and _P_comsnarf +** all do roughly the same thing. they scan ahead and collect the +** specified string, move _P_nextchr to the end of the +** comment or literal and return 1 if we run off the end of file, +** 0 otherwise. it would have been nice to have 1 routine handle +** all three task (there is much common code), however there were +** so enough differences, (for instance, only comments check for nesting, +** only literals need to set _P_stringsize, etc) +** that I decided to split them up. +*/ +static int +_P_litsnarf(litptr) +W_lit litptr; +{ + _P_stringsize = 0; + /* + ** skip the start of literal string + */ + _P_nextchr += strlen(W_litbegin(litptr)); + _P_stringsize += strlen(W_litbegin(litptr)); + /* + ** is there a separate end string? + ** if not, then we're done + */ + if ('\0' == *(W_litend(litptr))) + { + return(0); + } + /* + ** loop once for each character in the literal + */ + while(1) + { + /* + ** if we are out of characters, move on to next line + */ + if (_P_needmore()) + { + if (_P_nextline()) + { + return(1); + } + if (!_P_has_content) + { + /* + ** since we've just gotten a command + ** check to see if this literal + ** is still legit ... + ** could have just been reset + ** by the command + */ + if (!W_is_lit(litptr)) + { + return(0); + } + } + } /* if _P_needmore */ + + /* + ** see if we have an escaped end of literal string + */ + if (('\0' != *(W_litescape(litptr))) && /* escape string exists */ + !S_wordcmp(_P_nextchr, + W_litescape(litptr)) && /* and escape matches */ + !S_wordcmp(_P_nextchr+strlen(W_litescape(litptr)), + W_litend(litptr))) /* and endstring matches */ + { + _P_nextchr += strlen(W_litescape(litptr)) + + strlen(W_litend(litptr)); + _P_stringsize += strlen(W_litescape(litptr)) + + strlen(W_litend(litptr)); + continue; + } + + /* + ** see if we have an end of literal string + */ + if (!S_wordcmp(_P_nextchr,W_litend(litptr))) /* escape matches */ + { + _P_nextchr += strlen(W_litend(litptr)); + _P_stringsize += strlen(W_litend(litptr)); + return(0); + } + /* + ** this must be yet another character in the literal, so + ** just snarf it up + */ + _P_nextchr++; + _P_stringsize++; + } /* while loop once for each character */ + +#ifndef lint + Z_fatal("shouldn't execute this line at the end of _P_litsnarf"); +#endif +} /* _P_litsnarf */ + +static int +_P_bolsnarf(bolptr) +W_bol bolptr; +{ + /* + ** skip the start of comment string + */ + _P_nextchr += strlen(W_bolbegin(bolptr)); + /* + ** is there a separate end string + ** if not, then we're done + */ + if ('\0' == *(W_bolend(bolptr))) + { + return(0); + } + /* + ** loop once for each character in the comment + */ + while(1) + { + /* + ** if we are out of characters,move on to next line + */ + if (_P_needmore()) + { + if (_P_nextline()) + { + return(1); + } + if (!_P_has_content) + { + /* + ** since we've just gotten a command + ** check to see if this comment + ** is still legit ... comments + ** could have just been reset + ** by the command + */ + if (!W_is_bol(bolptr)) + { + return(0); + } + } + } /* if at end of line */ + + /* + ** see if we have an escaped end of comment string + */ + if ('\0' != *(W_bolescape(bolptr)) && /* escape string exists */ + !S_wordcmp(_P_nextchr, + W_bolescape(bolptr)) && /* and escape matches */ + !S_wordcmp(_P_nextchr+strlen(W_bolescape(bolptr)), + W_bolend(bolptr))) /* and end string matches */ + { + _P_nextchr += strlen(W_bolescape(bolptr)) + + strlen(W_bolend(bolptr)); + continue; + } + + /* + ** see if we have an end of comment string + */ + if (!S_wordcmp(_P_nextchr,W_bolend(bolptr))) + { + _P_nextchr += strlen(W_bolend(bolptr)); + return(0); + } + /* + ** this must be yet another character in the comment, so + ** just snarf it up + */ + _P_nextchr++; + } /* while loop once for each character */ + +#ifndef lint + Z_fatal("shouldn't execute this line in at end of _P_bolsnarf"); +#endif +} /* _P_bolsnarf */ + +/* +** pass over a comment -- look for nexting +*/ +static +_P_comsnarf(comptr) +W_com comptr; +{ + int depth = 1; /* nesting depth */ + /* + ** skip the start of comment string + */ + _P_nextchr += strlen(W_combegin(comptr)); + + /* + ** is there a separate end string + ** if not, then we're done + */ + if ('\0' == *(W_comend(comptr))) + { + return(0); + } + /* + ** loop once for each character in the comment + */ + while(1) + { + /* + ** if we are out of characters, move on to next line + */ + if (_P_needmore()) + { + if (_P_nextline()) + { + return(1); + } + if (!_P_has_content) + { + /* + ** since we've just gotten a command + ** check to see if this comment + ** is still legit ... comments + ** could have just been reset + ** by the command + */ + if (!W_is_com(comptr)) + { + return(0); + } + } + } /* if at end of line */ + + /* + ** see if we have an escaped end of comment string + */ + if ('\0' != *(W_comescape(comptr)) && /* escape string exists */ + !S_wordcmp(_P_nextchr, + W_comescape(comptr)) && /* and escape matches */ + !S_wordcmp(_P_nextchr+strlen(W_comescape(comptr)), + W_comend(comptr))) /* and end string matches */ + { + /* + ** skip over the escape sequence and the end sequence + */ + _P_nextchr += strlen(W_comescape(comptr)) + + strlen(W_comend(comptr)); + continue; + } + + /* + ** see if we have an end of comment string + */ + if (!S_wordcmp(_P_nextchr,W_comend(comptr))) /* end matches */ + { + /* + ** skip over the end sequence + */ + _P_nextchr += strlen(W_comend(comptr)); + if (W_is_nesting(comptr)) + { + depth--; + if (0 == depth) + return(0); + } + else + { + return(0); + } + continue; + } + /* + ** see if we have another beginning of comment string + */ + if (W_is_nesting(comptr) && + !S_wordcmp(_P_nextchr,W_comend(comptr))) /* end matches */ + { + _P_nextchr += strlen(W_comend(comptr)); + depth++; + continue; + } + /* + ** this must be yet another character in the comment, so + ** just snarf it up + */ + _P_nextchr++; + } /* while loop once for each character */ + +#ifndef lint + Z_fatal("should not execute this line in _P_comsnarf\n"); +#endif + +} /* _P_comsnarf */ + + +/* +** parse a file +*/ +static void +_P_do_parse() +{ + + char *ptr; /* scratch space */ + int tmp; + int ret_code; + + K_token newtoken; + W_bol bolptr; + W_com comptr; + W_lit litptr; + + int startline, endline, startpos; + + /* + ** main parsing loop + */ + while (1) + { + /* + ** get more text if necessary + */ + if (_P_needmore()) + { + if (_P_nextline()) + { + return; + } + + /* + ** if the line contains nothing of interest, + ** try again + */ + if (!_P_has_content) + { + continue; + } + + /* + ** check to see if this line starts a comment + */ + if ((bolptr = W_isbol(_P_firstchr)) != W_BOLNULL) + { + if (_P_bolsnarf(bolptr)) + { + return; + } + continue; + } + } /* if _P_needmore */ + + /* + ** skip whitespace + */ + if (!(U_INCLUDE_WS & _P_flags) && isspace(*_P_nextchr)) + { + _P_nextchr++; + continue; + } + + /* + ** check to see if this character starts a comment + */ + if ((comptr = W_iscom(_P_nextchr)) != W_COMNULL) + { + if (_P_comsnarf(comptr)) + { + return; + } + continue; + } + + /* + ** if there aren't any tokens on this line already + ** set up the index from the token line to the content line + */ + if (!L_getcount(_P_fnumb,L_gettlmax(_P_fnumb))) + { + L_settlindex(_P_fnumb, + L_gettlmax(_P_fnumb), + L_getclmax(_P_fnumb)); + /* + ** and the pointer from the token line to the + ** first token on the line + */ + L_setindex(_P_fnumb, + L_gettlmax(_P_fnumb), + K_gettmax(_P_fnumb)); + } + + startline = L_tl2cl(_P_fnumb,L_gettlmax(_P_fnumb)); + startpos = _P_nextchr-_P_firstchr; + + newtoken = K_maketoken(); + K_setline(newtoken,L_gettlmax(_P_fnumb)); + K_setpos(newtoken,startpos); + + ret_code = 0; + /* + ** check to see if this character starts a + ** delimited literal string + */ + if ((litptr = W_islit(_P_nextchr)) != W_LITNULL) + { + ret_code = _P_litsnarf(litptr); + K_settype(newtoken,K_LIT); + S_allocstr(&ptr,_P_stringsize); + /* + ** fixed nasty memory bug here by adding else + ** old code copied entire line even if literal + ** ended before the end of line + ** should check into getting strcpy loaded + ** locally + */ + endline = L_getclmax(_P_fnumb); + if (endline > startline) + { + /* + ** copy in the first line of the literal + */ + (void) strcpy(ptr, + L_getcline(_P_fnumb,startline) + +startpos); + /* + ** now copy all the lines between + ** the first and last + */ + for (tmp=startline+1;tmp<endline;tmp++) + { + (void) strcat(ptr, + L_getcline(_P_fnumb,tmp)); + } + /* + ** and now copy in the last line + */ + (void) strncat(ptr, + L_getcline(_P_fnumb,endline), + _P_stringsize-strlen(ptr)); + } + else + { + (void) strncpy(ptr, + L_getcline(_P_fnumb,startline) + +startpos, + _P_stringsize); + /* + ** terminate the string you just copied + */ + ptr[_P_stringsize] = '\0'; + } + K_settext(newtoken,ptr); + } /* if is_lit */ + + /* + ** see if this is a floating point number + */ + else if (tmp = F_isfloat(_P_nextchr, + _P_flags & U_NEED_DECIMAL, + _P_flags & U_INC_SIGN)) + { + K_saventext(newtoken,_P_nextchr,tmp); + K_settype(newtoken,K_FLO_NUM); + if (!(_P_flags & U_BYTE_COMPARE)) + { + K_setfloat(newtoken, + F_atof(K_gettext(newtoken), + USE_ALL)); + + /* + ** assign the curent tolerance + */ + K_settol(newtoken,T_gettol(_P_next_tol)); + } + + /* + ** use next tolerance in the + ** specification if there is one + */ + if (T_moretols(_P_next_tol)) + { + _P_next_tol++; + } + /* + ** and move pointer past the float + */ + _P_nextchr += tmp; + } + + /* + ** is this a fixed point number + */ + else if (isdigit(*_P_nextchr)) + { + for(ptr=_P_nextchr; isdigit(*ptr); ptr++) + { + } + K_saventext(newtoken,_P_nextchr,ptr-_P_nextchr); + K_settype(newtoken,K_LIT); + _P_nextchr = ptr; + } + + /* + ** try an alpha-numeric word + */ + else if (isalpha(*_P_nextchr) || _P_in_alpha(*_P_nextchr)) + { + /* + ** it's a multi character word + */ + for(ptr = _P_nextchr; + isalpha(*ptr) + || isdigit(*ptr) + || _P_in_alpha(*ptr); + ptr++) + { + } + K_saventext(newtoken,_P_nextchr,ptr-_P_nextchr); + K_settype(newtoken,K_LIT); + _P_nextchr = ptr; + } + else + { + /* + ** otherwise, treat the char itself as a token + */ + K_saventext(newtoken,_P_nextchr,1); + K_settype(newtoken,K_LIT); + _P_nextchr++; + } + + K_settoken(_P_fnumb,K_gettmax(_P_fnumb),newtoken); + L_inccount(_P_fnumb,L_gettlmax(_P_fnumb)); + /* + ** if we are out of space, complain and quit + */ + if (K_inctmax(_P_fnumb)) + { + (void) sprintf(Z_err_buf, + "warning -- to many tokens in file only first %d tokens will be used.\n", + K_MAXTOKENS); + Z_complain(Z_err_buf); + return; + } +#ifndef NOCHATTER + if (0 == (K_gettmax(_P_fnumb) % _P_PARSE_CHATTER)) + { + int max = K_gettmax(_P_fnumb); + (void) sprintf(Z_err_buf, + "scanned %d words from file #%d\n", + max,_P_fnumb+1); + Z_chatter(Z_err_buf); + } +#endif + + /* + ** are we done? + */ + if(ret_code) + { + return; + } + } /* loop once per object on a line */ + +#ifndef lint + Z_fatal("this line should never execute"); +#endif +} + +void +P_file_parse(num,strt,lcnt,flags) +int num; /* file number */ +int strt; /* first line to parse expressed in real line numbers */ +int lcnt; /* max number of lines to parse */ +int flags; /* flags for controlling the parse mode */ +{ + /* + ** set module-wide state variables + */ + _P_fnumb = num; + _P_start = strt; + _P_lcount = lcnt; + _P_flags = flags; + + _P_initparser(); + + _P_nextchr = _P_dummyline; + + _P_has_content = 0; + _P_next_tol = 0; + L_setcount(_P_fnumb,L_gettlmax(_P_fnumb),0); + /* + ** start everything back one line (it will be incremented + ** just before the first line is accessed + */ + _P_realline = _P_start-1; + + _P_do_parse(); + + /* + ** if the last line had content, increment the count + */ + if (_P_has_content) + { +/* +** this code will get executed if we stopped parsing in the middle +** of a line. i haven't looked at this case carefully. +** so, there is a good chance that it is buggy. +*/ +(void) sprintf(Z_err_buf,"parser got confused at end of file\n"); +Z_complain(Z_err_buf); + L_incclmax(_P_fnumb); + if (L_getcount(_P_fnumb,L_gettlmax(_P_fnumb))) + L_inctlmax(_P_fnumb); + } + return; +} |