aboutsummaryrefslogtreecommitdiff
path: root/regex
diff options
context:
space:
mode:
Diffstat (limited to 'regex')
-rw-r--r--regex/COPYRIGHT20
-rw-r--r--regex/Makefile130
-rw-r--r--regex/README32
-rw-r--r--regex/WHATSNEW108
-rw-r--r--regex/cclass.h27
-rw-r--r--regex/cname.h102
-rw-r--r--regex/debug.c242
-rw-r--r--regex/debug.ih14
-rw-r--r--regex/engine.c1019
-rw-r--r--regex/engine.ih35
-rw-r--r--regex/main.c510
-rw-r--r--regex/main.ih19
-rwxr-xr-xregex/mkh76
-rw-r--r--regex/regcomp.c1603
-rw-r--r--regex/regcomp.ih51
-rw-r--r--regex/regerror.c126
-rw-r--r--regex/regerror.ih12
-rw-r--r--regex/regex.3509
-rw-r--r--regex/regex.7235
-rw-r--r--regex/regex.h74
-rw-r--r--regex/regex2.h134
-rw-r--r--regex/regexec.c138
-rw-r--r--regex/regfree.c37
-rw-r--r--regex/split.c316
-rw-r--r--regex/tests477
-rw-r--r--regex/utils.h22
26 files changed, 6068 insertions, 0 deletions
diff --git a/regex/COPYRIGHT b/regex/COPYRIGHT
new file mode 100644
index 0000000..30c1f7a
--- /dev/null
+++ b/regex/COPYRIGHT
@@ -0,0 +1,20 @@
+Copyright 1992, 1993, 1994, 1997 Henry Spencer. All rights reserved.
+This software is not subject to any license of the American Telephone
+and Telegraph Company or of the Regents of the University of California.
+
+Permission is granted to anyone to use this software for any purpose on
+any computer system, and to alter it and redistribute it, subject
+to the following restrictions:
+
+1. The author is not responsible for the consequences of use of this
+ software, no matter how awful, even if they arise from flaws in it.
+
+2. The origin of this software must not be misrepresented, either by
+ explicit claim or by omission. Since few users ever read sources,
+ credits must appear in the documentation.
+
+3. Altered versions must be plainly marked as such, and must not be
+ misrepresented as being the original software. Since few users
+ ever read sources, credits must appear in the documentation.
+
+4. This notice may not be removed or altered.
diff --git a/regex/Makefile b/regex/Makefile
new file mode 100644
index 0000000..3882b37
--- /dev/null
+++ b/regex/Makefile
@@ -0,0 +1,130 @@
+# You probably want to take -DREDEBUG out of CFLAGS, and put something like
+# -O in, *after* testing (-DREDEBUG strengthens testing by enabling a lot of
+# internal assertion checking and some debugging facilities).
+# Put -Dconst= in for a pre-ANSI compiler.
+# Do not take -DPOSIX_MISTAKE out.
+# REGCFLAGS isn't important to you (it's for my use in some special contexts).
+CFLAGS=-I. -DPOSIX_MISTAKE -DREDEBUG $(REGCFLAGS)
+
+# If you have a pre-ANSI compiler, put -o into MKHFLAGS. If you want
+# the Berkeley __P macro, put -b in.
+MKHFLAGS=
+
+# Flags for linking but not compiling, if any.
+LDFLAGS=
+
+# Extra libraries for linking, if any.
+LIBS=
+
+# Internal stuff, should not need changing.
+OBJPRODN=regcomp.o regexec.o regerror.o regfree.o
+OBJS=$(OBJPRODN) split.o debug.o main.o
+H=cclass.h cname.h regex2.h utils.h
+REGSRC=regcomp.c regerror.c regexec.c regfree.c
+ALLSRC=$(REGSRC) engine.c debug.c main.c split.c
+
+# Stuff that matters only if you're trying to lint the package.
+LINTFLAGS=-I. -Dstatic= -Dconst= -DREDEBUG
+LINTC=regcomp.c regexec.c regerror.c regfree.c debug.c main.c
+JUNKLINT=possible pointer alignment|null effect
+
+# arrangements to build forward-reference header files
+.SUFFIXES: .ih .h
+.c.ih:
+ sh ./mkh $(MKHFLAGS) -p $< >$@
+
+default: r
+
+lib: purge $(OBJPRODN)
+ rm -f libregex.a
+ ar crv libregex.a $(OBJPRODN)
+
+purge:
+ rm -f *.o
+
+# stuff to build regex.h
+REGEXH=regex.h
+REGEXHSRC=regex2.h $(REGSRC)
+$(REGEXH): $(REGEXHSRC) mkh
+ sh ./mkh $(MKHFLAGS) -i _REGEX_H_ $(REGEXHSRC) >regex.tmp
+ cmp -s regex.tmp regex.h 2>/dev/null || cp regex.tmp regex.h
+ rm -f regex.tmp
+
+# dependencies
+$(OBJPRODN) debug.o: utils.h regex.h regex2.h
+regcomp.o: cclass.h cname.h regcomp.ih
+regexec.o: engine.c engine.ih
+regerror.o: regerror.ih
+debug.o: debug.ih
+main.o: main.ih
+
+# tester
+re: $(OBJS)
+ $(CC) $(CFLAGS) $(LDFLAGS) $(OBJS) $(LIBS) -o $@
+
+# regression test
+r: re tests
+ ./re <tests
+ ./re -el <tests
+ ./re -er <tests
+
+# 57 variants, and other stuff, for development use -- not useful to you
+ra: ./re tests
+ -./re <tests
+ -./re -el <tests
+ -./re -er <tests
+
+rx: ./re tests
+ ./re -x <tests
+ ./re -x -el <tests
+ ./re -x -er <tests
+
+t: ./re tests
+ -time ./re <tests
+ -time ./re -cs <tests
+ -time ./re -el <tests
+ -time ./re -cs -el <tests
+
+l: $(LINTC)
+ lint $(LINTFLAGS) -h $(LINTC) 2>&1 | egrep -v '$(JUNKLINT)' | tee lint
+
+fullprint:
+ ti README WHATSNEW notes todo | list
+ ti *.h | list
+ list *.c
+ list regex.3 regex.7
+
+print:
+ ti README WHATSNEW notes todo | list
+ ti *.h | list
+ list reg*.c engine.c
+
+
+mf.tmp: Makefile
+ sed '/^REGEXH=/s/=.*/=regex.h/' Makefile | sed '/#DEL$$/d' >$@
+
+DTRH=cclass.h cname.h regex2.h utils.h
+PRE=COPYRIGHT README WHATSNEW
+POST=mkh regex.3 regex.7 tests $(DTRH) $(ALLSRC) fake/*.[ch]
+FILES=$(PRE) Makefile $(POST)
+DTR=$(PRE) Makefile=mf.tmp $(POST)
+dtr: $(FILES) mf.tmp
+ makedtr $(DTR) >$@
+ rm mf.tmp
+
+cio: $(FILES)
+ cio $(FILES)
+
+rdf: $(FILES)
+ rcsdiff -c $(FILES) 2>&1 | p
+
+# various forms of cleanup
+tidy:
+ rm -f junk* core core.* *.core dtr *.tmp lint
+
+clean: tidy
+ rm -f *.o *.s *.ih re libregex.a
+
+# don't do this one unless you know what you're doing
+spotless: clean
+ rm -f mkh regex.h
diff --git a/regex/README b/regex/README
new file mode 100644
index 0000000..e6ce373
--- /dev/null
+++ b/regex/README
@@ -0,0 +1,32 @@
+alpha3.8 release.
+Tue Aug 10 15:51:48 EDT 1999
+henry@spsystems.net (formerly henry@zoo.toronto.edu)
+
+See WHATSNEW for change listing.
+
+installation notes:
+--------
+Read the comments at the beginning of Makefile before running.
+
+Utils.h contains some things that just might have to be modified on
+some systems, as well as a nested include (ugh) of <assert.h>.
+
+The "fake" directory contains quick-and-dirty fakes for some header
+files and routines that old systems may not have. Note also that
+-DUSEBCOPY will make utils.h substitute bcopy() for memmove().
+
+After that, "make r" will build regcomp.o, regexec.o, regfree.o,
+and regerror.o (the actual routines), bundle them together into a test
+program, and run regression tests on them. No output is good output.
+
+"make lib" builds just the .o files for the actual routines (when
+you're happy with testing and have adjusted CFLAGS for production),
+and puts them together into libregex.a. You can pick up either the
+library or *.o ("make lib" makes sure there are no other .o files left
+around to confuse things).
+
+Main.c, debug.c, split.c are used for regression testing but are not part
+of the RE routines themselves.
+
+Regex.h goes in /usr/include. All other .h files are internal only.
+--------
diff --git a/regex/WHATSNEW b/regex/WHATSNEW
new file mode 100644
index 0000000..1295343
--- /dev/null
+++ b/regex/WHATSNEW
@@ -0,0 +1,108 @@
+New in alpha3.8: Bug fix for signed/unsigned mixup, found and fixed
+by the FreeBSD folks.
+
+New in alpha3.7: A bit of cleanup aimed at maximizing portability,
+possibly at slight cost in efficiency. "ul" suffixes and "unsigned long"
+no longer appear, in particular.
+
+New in alpha3.6: A couple more portability glitches fixed.
+
+New in alpha3.5: Active development of this code has been stopped --
+I'm working on a complete reimplementation -- but folks have found some
+minor portability glitches and the like, hence this release to fix them.
+One penalty: slightly reduced compatibility with old compilers, because
+the ANSI C `unsigned long' type and `ul' constant suffix are used in a
+few places (I could avoid this but it would be considerably more work).
+
+New in alpha3.4: The complex bug alluded to below has been fixed (in a
+slightly kludgey temporary way that may hurt efficiency a bit; this is
+another "get it out the door for 4.4" release). The tests at the end of
+the tests file have accordingly been uncommented. The primary sign of
+the bug was that something like a?b matching ab matched b rather than ab.
+(The bug was essentially specific to this exact situation, else it would
+have shown up earlier.)
+
+New in alpha3.3: The definition of word boundaries has been altered
+slightly, to more closely match the usual programming notion that "_"
+is an alphabetic. Stuff used for pre-ANSI systems is now in a subdir,
+and the makefile no longer alludes to it in mysterious ways. The
+makefile has generally been cleaned up some. Fixes have been made
+(again!) so that the regression test will run without -DREDEBUG, at
+the cost of weaker checking. A workaround for a bug in some folks'
+<assert.h> has been added. And some more things have been added to
+tests, including a couple right at the end which are commented out
+because the code currently flunks them (complex bug; fix coming).
+Plus the usual minor cleanup.
+
+New in alpha3.2: Assorted bits of cleanup and portability improvement
+(the development base is now a BSDI system using GCC instead of an ancient
+Sun system, and the newer compiler exposed some glitches). Fix for a
+serious bug that affected REs using many [] (including REG_ICASE REs
+because of the way they are implemented), *sometimes*, depending on
+memory-allocation patterns. The header-file prototypes no longer name
+the parameters, avoiding possible name conflicts. The possibility that
+some clot has defined CHAR_MIN as (say) `-128' instead of `(-128)' is
+now handled gracefully. "uchar" is no longer used as an internal type
+name (too many people have the same idea). Still the same old lousy
+performance, alas.
+
+New in alpha3.1: Basically nothing, this release is just a bookkeeping
+convenience. Stay tuned.
+
+New in alpha3.0: Performance is no better, alas, but some fixes have been
+made and some functionality has been added. (This is basically the "get
+it out the door in time for 4.4" release.) One bug fix: regfree() didn't
+free the main internal structure (how embarrassing). It is now possible
+to put NULs in either the RE or the target string, using (resp.) a new
+REG_PEND flag and the old REG_STARTEND flag. The REG_NOSPEC flag to
+regcomp() makes all characters ordinary, so you can match a literal
+string easily (this will become more useful when performance improves!).
+There are now primitives to match beginnings and ends of words, although
+the syntax is disgusting and so is the implementation. The REG_ATOI
+debugging interface has changed a bit. And there has been considerable
+internal cleanup of various kinds.
+
+New in alpha2.3: Split change list out of README, and moved flags notes
+into Makefile. Macro-ized the name of regex(7) in regex(3), since it has
+to change for 4.4BSD. Cleanup work in engine.c, and some new regression
+tests to catch tricky cases thereof.
+
+New in alpha2.2: Out-of-date manpages updated. Regerror() acquires two
+small extensions -- REG_ITOA and REG_ATOI -- which avoid debugging kludges
+in my own test program and might be useful to others for similar purposes.
+The regression test will now compile (and run) without REDEBUG. The
+BRE \$ bug is fixed. Most uses of "uchar" are gone; it's all chars now.
+Char/uchar parameters are now written int/unsigned, to avoid possible
+portability problems with unpromoted parameters. Some unsigned casts have
+been introduced to minimize portability problems with shifting into sign
+bits.
+
+New in alpha2.1: Lots of little stuff, cleanup and fixes. The one big
+thing is that regex.h is now generated, using mkh, rather than being
+supplied in the distribution; due to circularities in dependencies,
+you have to build regex.h explicitly by "make h". The two known bugs
+have been fixed (and the regression test now checks for them), as has a
+problem with assertions not being suppressed in the absence of REDEBUG.
+No performance work yet.
+
+New in alpha2: Backslash-anything is an ordinary character, not an
+error (except, of course, for the handful of backslashed metacharacters
+in BREs), which should reduce script breakage. The regression test
+checks *where* null strings are supposed to match, and has generally
+been tightened up somewhat. Small bug fixes in parameter passing (not
+harmful, but technically errors) and some other areas. Debugging
+invoked by defining REDEBUG rather than not defining NDEBUG.
+
+New in alpha+3: full prototyping for internal routines, using a little
+helper program, mkh, which extracts prototypes given in stylized comments.
+More minor cleanup. Buglet fix: it's CHAR_BIT, not CHAR_BITS. Simple
+pre-screening of input when a literal string is known to be part of the
+RE; this does wonders for performance.
+
+New in alpha+2: minor bits of cleanup. Notably, the number "32" for the
+word width isn't hardwired into regexec.c any more, the public header
+file prototypes the functions if __STDC__ is defined, and some small typos
+in the manpages have been fixed.
+
+New in alpha+1: improvements to the manual pages, and an important
+extension, the REG_STARTEND option to regexec().
diff --git a/regex/cclass.h b/regex/cclass.h
new file mode 100644
index 0000000..d892103
--- /dev/null
+++ b/regex/cclass.h
@@ -0,0 +1,27 @@
+/* character-class table */
+static struct cclass {
+ char *name;
+ char *chars;
+ char *multis;
+} cclasses[] = {
+ {"alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", ""},
+ {"alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
+ ""},
+ {"blank", " \t", ""},
+ {"cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\25\26\27\30\31\32\33\34\35\36\37\177", ""},
+ {"digit", "0123456789", ""},
+ {"graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
+ ""},
+ {"lower", "abcdefghijklmnopqrstuvwxyz",
+ ""},
+ {"print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ",
+ ""},
+ {"punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
+ ""},
+ {"space", "\t\n\v\f\r ", ""},
+ {"upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
+ ""},
+ {"xdigit", "0123456789ABCDEFabcdef",
+ ""},
+ {NULL, 0, ""}
+};
diff --git a/regex/cname.h b/regex/cname.h
new file mode 100644
index 0000000..a4af252
--- /dev/null
+++ b/regex/cname.h
@@ -0,0 +1,102 @@
+/* character-name table */
+static struct cname {
+ char *name;
+ char code;
+} cnames[] = {
+ {"NUL", '\0'},
+ {"SOH", '\001'},
+ {"STX", '\002'},
+ {"ETX", '\003'},
+ {"EOT", '\004'},
+ {"ENQ", '\005'},
+ {"ACK", '\006'},
+ {"BEL", '\007'},
+ {"alert", '\007'},
+ {"BS", '\010'},
+ {"backspace", '\b'},
+ {"HT", '\011'},
+ {"tab", '\t'},
+ {"LF", '\012'},
+ {"newline", '\n'},
+ {"VT", '\013'},
+ {"vertical-tab", '\v'},
+ {"FF", '\014'},
+ {"form-feed", '\f'},
+ {"CR", '\015'},
+ {"carriage-return", '\r'},
+ {"SO", '\016'},
+ {"SI", '\017'},
+ {"DLE", '\020'},
+ {"DC1", '\021'},
+ {"DC2", '\022'},
+ {"DC3", '\023'},
+ {"DC4", '\024'},
+ {"NAK", '\025'},
+ {"SYN", '\026'},
+ {"ETB", '\027'},
+ {"CAN", '\030'},
+ {"EM", '\031'},
+ {"SUB", '\032'},
+ {"ESC", '\033'},
+ {"IS4", '\034'},
+ {"FS", '\034'},
+ {"IS3", '\035'},
+ {"GS", '\035'},
+ {"IS2", '\036'},
+ {"RS", '\036'},
+ {"IS1", '\037'},
+ {"US", '\037'},
+ {"space", ' '},
+ {"exclamation-mark", '!'},
+ {"quotation-mark", '"'},
+ {"number-sign", '#'},
+ {"dollar-sign", '$'},
+ {"percent-sign", '%'},
+ {"ampersand", '&'},
+ {"apostrophe", '\''},
+ {"left-parenthesis", '('},
+ {"right-parenthesis", ')'},
+ {"asterisk", '*'},
+ {"plus-sign", '+'},
+ {"comma", ','},
+ {"hyphen", '-'},
+ {"hyphen-minus", '-'},
+ {"period", '.'},
+ {"full-stop", '.'},
+ {"slash", '/'},
+ {"solidus", '/'},
+ {"zero", '0'},
+ {"one", '1'},
+ {"two", '2'},
+ {"three", '3'},
+ {"four", '4'},
+ {"five", '5'},
+ {"six", '6'},
+ {"seven", '7'},
+ {"eight", '8'},
+ {"nine", '9'},
+ {"colon", ':'},
+ {"semicolon", ';'},
+ {"less-than-sign", '<'},
+ {"equals-sign", '='},
+ {"greater-than-sign", '>'},
+ {"question-mark", '?'},
+ {"commercial-at", '@'},
+ {"left-square-bracket", '['},
+ {"backslash", '\\'},
+ {"reverse-solidus", '\\'},
+ {"right-square-bracket", ']'},
+ {"circumflex", '^'},
+ {"circumflex-accent", '^'},
+ {"underscore", '_'},
+ {"low-line", '_'},
+ {"grave-accent", '`'},
+ {"left-brace", '{'},
+ {"left-curly-bracket", '{'},
+ {"vertical-line", '|'},
+ {"right-brace", '}'},
+ {"right-curly-bracket", '}'},
+ {"tilde", '~'},
+ {"DEL", '\177'},
+ {NULL, 0}
+};
diff --git a/regex/debug.c b/regex/debug.c
new file mode 100644
index 0000000..99ce7da
--- /dev/null
+++ b/regex/debug.c
@@ -0,0 +1,242 @@
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <regex.h>
+
+#include "utils.h"
+#include "regex2.h"
+#include "debug.ih"
+
+/*
+ - regprint - print a regexp for debugging
+ == void regprint(regex_t *r, FILE *d);
+ */
+void
+regprint(r, d)
+regex_t *r;
+FILE *d;
+{
+ register struct re_guts *g = r->re_g;
+ register int i;
+ register int c;
+ register int last;
+ int nincat[NC];
+
+ fprintf(d, "%ld states, %d categories", (long)g->nstates,
+ g->ncategories);
+ fprintf(d, ", first %ld last %ld", (long)g->firststate,
+ (long)g->laststate);
+ if (g->iflags&USEBOL)
+ fprintf(d, ", USEBOL");
+ if (g->iflags&USEEOL)
+ fprintf(d, ", USEEOL");
+ if (g->iflags&BAD)
+ fprintf(d, ", BAD");
+ if (g->nsub > 0)
+ fprintf(d, ", nsub=%ld", (long)g->nsub);
+ if (g->must != NULL)
+ fprintf(d, ", must(%ld) `%*s'", (long)g->mlen, (int)g->mlen,
+ g->must);
+ if (g->backrefs)
+ fprintf(d, ", backrefs");
+ if (g->nplus > 0)
+ fprintf(d, ", nplus %ld", (long)g->nplus);
+ fprintf(d, "\n");
+ s_print(g, d);
+ for (i = 0; i < g->ncategories; i++) {
+ nincat[i] = 0;
+ for (c = CHAR_MIN; c <= CHAR_MAX; c++)
+ if (g->categories[c] == i)
+ nincat[i]++;
+ }
+ fprintf(d, "cc0#%d", nincat[0]);
+ for (i = 1; i < g->ncategories; i++)
+ if (nincat[i] == 1) {
+ for (c = CHAR_MIN; c <= CHAR_MAX; c++)
+ if (g->categories[c] == i)
+ break;
+ fprintf(d, ", %d=%s", i, regchar(c));
+ }
+ fprintf(d, "\n");
+ for (i = 1; i < g->ncategories; i++)
+ if (nincat[i] != 1) {
+ fprintf(d, "cc%d\t", i);
+ last = -1;
+ for (c = CHAR_MIN; c <= CHAR_MAX+1; c++) /* +1 does flush */
+ if (c <= CHAR_MAX && g->categories[c] == i) {
+ if (last < 0) {
+ fprintf(d, "%s", regchar(c));
+ last = c;
+ }
+ } else {
+ if (last >= 0) {
+ if (last != c-1)
+ fprintf(d, "-%s",
+ regchar(c-1));
+ last = -1;
+ }
+ }
+ fprintf(d, "\n");
+ }
+}
+
+/*
+ - s_print - print the strip for debugging
+ == static void s_print(register struct re_guts *g, FILE *d);
+ */
+static void
+s_print(g, d)
+register struct re_guts *g;
+FILE *d;
+{
+ register sop *s;
+ register cset *cs;
+ register int i;
+ register int done = 0;
+ register sop opnd;
+ register int col = 0;
+ register int last;
+ register sopno offset = 2;
+# define GAP() { if (offset % 5 == 0) { \
+ if (col > 40) { \
+ fprintf(d, "\n\t"); \
+ col = 0; \
+ } else { \
+ fprintf(d, " "); \
+ col++; \
+ } \
+ } else \
+ col++; \
+ offset++; \
+ }
+
+ if (OP(g->strip[0]) != OEND)
+ fprintf(d, "missing initial OEND!\n");
+ for (s = &g->strip[1]; !done; s++) {
+ opnd = OPND(*s);
+ switch (OP(*s)) {
+ case OEND:
+ fprintf(d, "\n");
+ done = 1;
+ break;
+ case OCHAR:
+ if (strchr("\\|()^$.[+*?{}!<> ", (char)opnd) != NULL)
+ fprintf(d, "\\%c", (char)opnd);
+ else
+ fprintf(d, "%s", regchar((char)opnd));
+ break;
+ case OBOL:
+ fprintf(d, "^");
+ break;
+ case OEOL:
+ fprintf(d, "$");
+ break;
+ case OBOW:
+ fprintf(d, "\\{");
+ break;
+ case OEOW:
+ fprintf(d, "\\}");
+ break;
+ case OANY:
+ fprintf(d, ".");
+ break;
+ case OANYOF:
+ fprintf(d, "[(%ld)", (long)opnd);
+ cs = &g->sets[opnd];
+ last = -1;
+ for (i = 0; i < g->csetsize+1; i++) /* +1 flushes */
+ if (CHIN(cs, i) && i < g->csetsize) {
+ if (last < 0) {
+ fprintf(d, "%s", regchar(i));
+ last = i;
+ }
+ } else {
+ if (last >= 0) {
+ if (last != i-1)
+ fprintf(d, "-%s",
+ regchar(i-1));
+ last = -1;
+ }
+ }
+ fprintf(d, "]");
+ break;
+ case OBACK_:
+ fprintf(d, "(\\<%ld>", (long)opnd);
+ break;
+ case O_BACK:
+ fprintf(d, "<%ld>\\)", (long)opnd);
+ break;
+ case OPLUS_:
+ fprintf(d, "(+");
+ if (OP(*(s+opnd)) != O_PLUS)
+ fprintf(d, "<%ld>", (long)opnd);
+ break;
+ case O_PLUS:
+ if (OP(*(s-opnd)) != OPLUS_)
+ fprintf(d, "<%ld>", (long)opnd);
+ fprintf(d, "+)");
+ break;
+ case OQUEST_:
+ fprintf(d, "(?");
+ if (OP(*(s+opnd)) != O_QUEST)
+ fprintf(d, "<%ld>", (long)opnd);
+ break;
+ case O_QUEST:
+ if (OP(*(s-opnd)) != OQUEST_)
+ fprintf(d, "<%ld>", (long)opnd);
+ fprintf(d, "?)");
+ break;
+ case OLPAREN:
+ fprintf(d, "((<%ld>", (long)opnd);
+ break;
+ case ORPAREN:
+ fprintf(d, "<%ld>))", (long)opnd);
+ break;
+ case OCH_:
+ fprintf(d, "<");
+ if (OP(*(s+opnd)) != OOR2)
+ fprintf(d, "<%ld>", (long)opnd);
+ break;
+ case OOR1:
+ if (OP(*(s-opnd)) != OOR1 && OP(*(s-opnd)) != OCH_)
+ fprintf(d, "<%ld>", (long)opnd);
+ fprintf(d, "|");
+ break;
+ case OOR2:
+ fprintf(d, "|");
+ if (OP(*(s+opnd)) != OOR2 && OP(*(s+opnd)) != O_CH)
+ fprintf(d, "<%ld>", (long)opnd);
+ break;
+ case O_CH:
+ if (OP(*(s-opnd)) != OOR1)
+ fprintf(d, "<%ld>", (long)opnd);
+ fprintf(d, ">");
+ break;
+ default:
+ fprintf(d, "!%d(%d)!", OP(*s), opnd);
+ break;
+ }
+ if (!done)
+ GAP();
+ }
+}
+
+/*
+ - regchar - make a character printable
+ == static char *regchar(int ch);
+ */
+static char * /* -> representation */
+regchar(ch)
+int ch;
+{
+ static char buf[10];
+
+ if (isprint(ch) || ch == ' ')
+ sprintf(buf, "%c", ch);
+ else
+ sprintf(buf, "\\%o", ch);
+ return(buf);
+}
diff --git a/regex/debug.ih b/regex/debug.ih
new file mode 100644
index 0000000..5f40ff7
--- /dev/null
+++ b/regex/debug.ih
@@ -0,0 +1,14 @@
+/* ========= begin header generated by ./mkh ========= */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* === debug.c === */
+void regprint(regex_t *r, FILE *d);
+static void s_print(register struct re_guts *g, FILE *d);
+static char *regchar(int ch);
+
+#ifdef __cplusplus
+}
+#endif
+/* ========= end header generated by ./mkh ========= */
diff --git a/regex/engine.c b/regex/engine.c
new file mode 100644
index 0000000..919fe3f
--- /dev/null
+++ b/regex/engine.c
@@ -0,0 +1,1019 @@
+/*
+ * The matching engine and friends. This file is #included by regexec.c
+ * after suitable #defines of a variety of macros used herein, so that
+ * different state representations can be used without duplicating masses
+ * of code.
+ */
+
+#ifdef SNAMES
+#define matcher smatcher
+#define fast sfast
+#define slow sslow
+#define dissect sdissect
+#define backref sbackref
+#define step sstep
+#define print sprint
+#define at sat
+#define match smat
+#endif
+#ifdef LNAMES
+#define matcher lmatcher
+#define fast lfast
+#define slow lslow
+#define dissect ldissect
+#define backref lbackref
+#define step lstep
+#define print lprint
+#define at lat
+#define match lmat
+#endif
+
+/* another structure passed up and down to avoid zillions of parameters */
+struct match {
+ struct re_guts *g;
+ int eflags;
+ regmatch_t *pmatch; /* [nsub+1] (0 element unused) */
+ char *offp; /* offsets work from here */
+ char *beginp; /* start of string -- virtual NUL precedes */
+ char *endp; /* end of string -- virtual NUL here */
+ char *coldp; /* can be no match starting before here */
+ char **lastpos; /* [nplus+1] */
+ STATEVARS;
+ states st; /* current states */
+ states fresh; /* states for a fresh start */
+ states tmp; /* temporary */
+ states empty; /* empty set of states */
+};
+
+#include "engine.ih"
+
+#ifdef REDEBUG
+#define SP(t, s, c) print(m, t, s, c, stdout)
+#define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2)
+#define NOTE(str) { if (m->eflags&REG_TRACE) printf("=%s\n", (str)); }
+#else
+#define SP(t, s, c) /* nothing */
+#define AT(t, p1, p2, s1, s2) /* nothing */
+#define NOTE(s) /* nothing */
+#endif
+
+/*
+ - matcher - the actual matching engine
+ == static int matcher(register struct re_guts *g, char *string, \
+ == size_t nmatch, regmatch_t pmatch[], int eflags);
+ */
+static int /* 0 success, REG_NOMATCH failure */
+matcher(g, string, nmatch, pmatch, eflags)
+register struct re_guts *g;
+char *string;
+size_t nmatch;
+regmatch_t pmatch[];
+int eflags;
+{
+ register char *endp;
+ register int i;
+ struct match mv;
+ register struct match *m = &mv;
+ register char *dp;
+ const register sopno gf = g->firststate+1; /* +1 for OEND */
+ const register sopno gl = g->laststate;
+ char *start;
+ char *stop;
+
+ /* simplify the situation where possible */
+ if (g->cflags&REG_NOSUB)
+ nmatch = 0;
+ if (eflags&REG_STARTEND) {
+ start = string + pmatch[0].rm_so;
+ stop = string + pmatch[0].rm_eo;
+ } else {
+ start = string;
+ stop = start + strlen(start);
+ }
+ if (stop < start)
+ return(REG_INVARG);
+
+ /* prescreening; this does wonders for this rather slow code */
+ if (g->must != NULL) {
+ for (dp = start; dp < stop; dp++)
+ if (*dp == g->must[0] && stop - dp >= g->mlen &&
+ memcmp(dp, g->must, (size_t)g->mlen) == 0)
+ break;
+ if (dp == stop) /* we didn't find g->must */
+ return(REG_NOMATCH);
+ }
+
+ /* match struct setup */
+ m->g = g;
+ m->eflags = eflags;
+ m->pmatch = NULL;
+ m->lastpos = NULL;
+ m->offp = string;
+ m->beginp = start;
+ m->endp = stop;
+ STATESETUP(m, 4);
+ SETUP(m->st);
+ SETUP(m->fresh);
+ SETUP(m->tmp);
+ SETUP(m->empty);
+ CLEAR(m->empty);
+
+ /* this loop does only one repetition except for backrefs */
+ for (;;) {
+ endp = fast(m, start, stop, gf, gl);
+ if (endp == NULL) { /* a miss */
+ STATETEARDOWN(m);
+ return(REG_NOMATCH);
+ }
+ if (nmatch == 0 && !g->backrefs)
+ break; /* no further info needed */
+
+ /* where? */
+ assert(m->coldp != NULL);
+ for (;;) {
+ NOTE("finding start");
+ endp = slow(m, m->coldp, stop, gf, gl);
+ if (endp != NULL)
+ break;
+ assert(m->coldp < m->endp);
+ m->coldp++;
+ }
+ if (nmatch == 1 && !g->backrefs)
+ break; /* no further info needed */
+
+ /* oh my, he wants the subexpressions... */
+ if (m->pmatch == NULL)
+ m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
+ sizeof(regmatch_t));
+ if (m->pmatch == NULL) {
+ STATETEARDOWN(m);
+ return(REG_ESPACE);
+ }
+ for (i = 1; i <= m->g->nsub; i++)
+ m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
+ if (!g->backrefs && !(m->eflags&REG_BACKR)) {
+ NOTE("dissecting");
+ dp = dissect(m, m->coldp, endp, gf, gl);
+ } else {
+ if (g->nplus > 0 && m->lastpos == NULL)
+ m->lastpos = (char **)malloc((g->nplus+1) *
+ sizeof(char *));
+ if (g->nplus > 0 && m->lastpos == NULL) {
+ free(m->pmatch);
+ STATETEARDOWN(m);
+ return(REG_ESPACE);
+ }
+ NOTE("backref dissect");
+ dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
+ }
+ if (dp != NULL)
+ break;
+
+ /* uh-oh... we couldn't find a subexpression-level match */
+ assert(g->backrefs); /* must be back references doing it */
+ assert(g->nplus == 0 || m->lastpos != NULL);
+ for (;;) {
+ if (dp != NULL || endp <= m->coldp)
+ break; /* defeat */
+ NOTE("backoff");
+ endp = slow(m, m->coldp, endp-1, gf, gl);
+ if (endp == NULL)
+ break; /* defeat */
+ /* try it on a shorter possibility */
+#ifndef NDEBUG
+ for (i = 1; i <= m->g->nsub; i++) {
+ assert(m->pmatch[i].rm_so == -1);
+ assert(m->pmatch[i].rm_eo == -1);
+ }
+#endif
+ NOTE("backoff dissect");
+ dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
+ }
+ assert(dp == NULL || dp == endp);
+ if (dp != NULL) /* found a shorter one */
+ break;
+
+ /* despite initial appearances, there is no match here */
+ NOTE("false alarm");
+ start = m->coldp + 1; /* recycle starting later */
+ assert(start <= stop);
+ }
+
+ /* fill in the details if requested */
+ if (nmatch > 0) {
+ pmatch[0].rm_so = m->coldp - m->offp;
+ pmatch[0].rm_eo = endp - m->offp;
+ }
+ if (nmatch > 1) {
+ assert(m->pmatch != NULL);
+ for (i = 1; i < nmatch; i++)
+ if (i <= m->g->nsub)
+ pmatch[i] = m->pmatch[i];
+ else {
+ pmatch[i].rm_so = -1;
+ pmatch[i].rm_eo = -1;
+ }
+ }
+
+ if (m->pmatch != NULL)
+ free((char *)m->pmatch);
+ if (m->lastpos != NULL)
+ free((char *)m->lastpos);
+ STATETEARDOWN(m);
+ return(0);
+}
+
+/*
+ - dissect - figure out what matched what, no back references
+ == static char *dissect(register struct match *m, char *start, \
+ == char *stop, sopno startst, sopno stopst);
+ */
+static char * /* == stop (success) always */
+dissect(m, start, stop, startst, stopst)
+register struct match *m;
+char *start;
+char *stop;
+sopno startst;
+sopno stopst;
+{
+ register int i;
+ register sopno ss; /* start sop of current subRE */
+ register sopno es; /* end sop of current subRE */
+ register char *sp; /* start of string matched by it */
+ register char *stp; /* string matched by it cannot pass here */
+ register char *rest; /* start of rest of string */
+ register char *tail; /* string unmatched by rest of RE */
+ register sopno ssub; /* start sop of subsubRE */
+ register sopno esub; /* end sop of subsubRE */
+ register char *ssp; /* start of string matched by subsubRE */
+ register char *sep; /* end of string matched by subsubRE */
+ register char *oldssp; /* previous ssp */
+ register char *dp;
+
+ AT("diss", start, stop, startst, stopst);
+ sp =