aboutsummaryrefslogtreecommitdiff
path: root/lib/Support/regcomp.c
diff options
context:
space:
mode:
authorEli Bendersky <eliben@google.com>2012-11-28 19:00:02 +0000
committerEli Bendersky <eliben@google.com>2012-11-28 19:00:02 +0000
commit6b731486d4460e5f1088a6066c0081af048c1e45 (patch)
treea6abf2bd0d5983a7715a3d1538189ee73758fa20 /lib/Support/regcomp.c
parentbacc82548ae0ddddeb99f79fc01641a08133c550 (diff)
Add backreference matching capabilities to Support/Regex, with
appropriate unit tests. This change in itself is not expected to affect any functionality at this point, but it will serve as a stepping stone to improve FileCheck's variable matching capabilities. Luckily, our regex implementation already supports backreferences, although a bit of hacking is required to enable it. It supports both Basic Regular Expressions (BREs) and Extended Regular Expressions (EREs), without supporting backrefs for EREs, following POSIX strictly in this respect. And EREs is what we actually use (rightly). This is contrary to many implementations (including the default on Linux) of POSIX regexes, that do allow backrefs in EREs. Adding backref support to our EREs is a very simple change in the regcomp parsing code. I fail to think of significant cases where it would clash with existing things, and can bring more versatility to the regexes we write. There's always the danger of a backref in a specially crafted regex causing exponential matching times, but since we mainly use them for testing purposes I don't think it's a big problem. [it can also be placed behind a flag specific to FileCheck, if needed]. For more details, see: * http://lists.cs.uiuc.edu/pipermail/llvmdev/2012-November/055840.html * http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20121126/156878.html git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168802 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Support/regcomp.c')
-rw-r--r--lib/Support/regcomp.c30
1 files changed, 29 insertions, 1 deletions
diff --git a/lib/Support/regcomp.c b/lib/Support/regcomp.c
index 46c91a9c49..74d9186aaa 100644
--- a/lib/Support/regcomp.c
+++ b/lib/Support/regcomp.c
@@ -303,6 +303,7 @@ p_ere_exp(struct parse *p)
sopno pos;
int count;
int count2;
+ int backrefnum;
sopno subno;
int wascaret = 0;
@@ -370,7 +371,34 @@ p_ere_exp(struct parse *p)
case '\\':
REQUIRE(MORE(), REG_EESCAPE);
c = GETNEXT();
- ordinary(p, c);
+ if (c >= '1' && c <= '9') {
+ /* \[0-9] is taken to be a back-reference to a previously specified
+ * matching group. backrefnum will hold the number. The matching
+ * group must exist (i.e. if \4 is found there must have been at
+ * least 4 matching groups specified in the pattern previously).
+ */
+ backrefnum = c - '0';
+ if (p->pend[backrefnum] == 0) {
+ SETERROR(REG_ESUBREG);
+ break;
+ }
+
+ /* Make sure everything checks out and emit the sequence
+ * that marks a back-reference to the parse structure.
+ */
+ assert(backrefnum <= p->g->nsub);
+ EMIT(OBACK_, backrefnum);
+ assert(p->pbegin[backrefnum] != 0);
+ assert(OP(p->strip[p->pbegin[backrefnum]]) != OLPAREN);
+ assert(OP(p->strip[p->pend[backrefnum]]) != ORPAREN);
+ (void) dupl(p, p->pbegin[backrefnum]+1, p->pend[backrefnum]);
+ EMIT(O_BACK, backrefnum);
+ p->g->backrefs = 1;
+ } else {
+ /* Other chars are simply themselves when escaped with a backslash.
+ */
+ ordinary(p, c);
+ }
break;
case '{': /* okay as ordinary except if digit follows */
REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT);