aboutsummaryrefslogtreecommitdiff
path: root/system/lib
diff options
context:
space:
mode:
Diffstat (limited to 'system/lib')
-rw-r--r--system/lib/libc/musl/src/regex/regcomp.c3352
-rw-r--r--system/lib/libc/musl/src/regex/regerror.c35
-rw-r--r--system/lib/libc/musl/src/regex/regexec.c1011
-rw-r--r--system/lib/libc/musl/src/regex/tre-mem.c158
-rw-r--r--system/lib/libc/musl/src/regex/tre.h231
-rw-r--r--system/lib/libcextra.symbols7
6 files changed, 4794 insertions, 0 deletions
diff --git a/system/lib/libc/musl/src/regex/regcomp.c b/system/lib/libc/musl/src/regex/regcomp.c
new file mode 100644
index 00000000..5cedfd52
--- /dev/null
+++ b/system/lib/libc/musl/src/regex/regcomp.c
@@ -0,0 +1,3352 @@
+/*
+ regcomp.c - TRE POSIX compatible regex compilation functions.
+
+ Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+*/
+
+#include <string.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <regex.h>
+#include <limits.h>
+#include <stdint.h>
+
+#include "tre.h"
+
+#include <assert.h>
+
+/***********************************************************************
+ from tre-compile.h
+***********************************************************************/
+
+typedef struct {
+ int position;
+ int code_min;
+ int code_max;
+ int *tags;
+ int assertions;
+ tre_ctype_t class;
+ tre_ctype_t *neg_classes;
+ int backref;
+} tre_pos_and_tags_t;
+
+
+/***********************************************************************
+ from tre-ast.c and tre-ast.h
+***********************************************************************/
+
+/* The different AST node types. */
+typedef enum {
+ LITERAL,
+ CATENATION,
+ ITERATION,
+ UNION
+} tre_ast_type_t;
+
+/* Special subtypes of TRE_LITERAL. */
+#define EMPTY -1 /* Empty leaf (denotes empty string). */
+#define ASSERTION -2 /* Assertion leaf. */
+#define TAG -3 /* Tag leaf. */
+#define BACKREF -4 /* Back reference leaf. */
+
+#define IS_SPECIAL(x) ((x)->code_min < 0)
+#define IS_EMPTY(x) ((x)->code_min == EMPTY)
+#define IS_ASSERTION(x) ((x)->code_min == ASSERTION)
+#define IS_TAG(x) ((x)->code_min == TAG)
+#define IS_BACKREF(x) ((x)->code_min == BACKREF)
+
+
+/* A generic AST node. All AST nodes consist of this node on the top
+ level with `obj' pointing to the actual content. */
+typedef struct {
+ tre_ast_type_t type; /* Type of the node. */
+ void *obj; /* Pointer to actual node. */
+ int nullable;
+ int submatch_id;
+ int num_submatches;
+ int num_tags;
+ tre_pos_and_tags_t *firstpos;
+ tre_pos_and_tags_t *lastpos;
+} tre_ast_node_t;
+
+
+/* A "literal" node. These are created for assertions, back references,
+ tags, matching parameter settings, and all expressions that match one
+ character. */
+typedef struct {
+ long code_min;
+ long code_max;
+ int position;
+ tre_ctype_t class;
+ tre_ctype_t *neg_classes;
+} tre_literal_t;
+
+/* A "catenation" node. These are created when two regexps are concatenated.
+ If there are more than one subexpressions in sequence, the `left' part
+ holds all but the last, and `right' part holds the last subexpression
+ (catenation is left associative). */
+typedef struct {
+ tre_ast_node_t *left;
+ tre_ast_node_t *right;
+} tre_catenation_t;
+
+/* An "iteration" node. These are created for the "*", "+", "?", and "{m,n}"
+ operators. */
+typedef struct {
+ /* Subexpression to match. */
+ tre_ast_node_t *arg;
+ /* Minimum number of consecutive matches. */
+ int min;
+ /* Maximum number of consecutive matches. */
+ int max;
+ /* If 0, match as many characters as possible, if 1 match as few as
+ possible. Note that this does not always mean the same thing as
+ matching as many/few repetitions as possible. */
+ unsigned int minimal:1;
+} tre_iteration_t;
+
+/* An "union" node. These are created for the "|" operator. */
+typedef struct {
+ tre_ast_node_t *left;
+ tre_ast_node_t *right;
+} tre_union_t;
+
+static tre_ast_node_t *
+tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size);
+
+static tre_ast_node_t *
+tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position);
+
+static tre_ast_node_t *
+tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max,
+ int minimal);
+
+static tre_ast_node_t *
+tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right);
+
+static tre_ast_node_t *
+tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left,
+ tre_ast_node_t *right);
+
+
+static tre_ast_node_t *
+tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size)
+{
+ tre_ast_node_t *node;
+
+ node = tre_mem_calloc(mem, sizeof(*node));
+ if (!node)
+ return NULL;
+ node->obj = tre_mem_calloc(mem, size);
+ if (!node->obj)
+ return NULL;
+ node->type = type;
+ node->nullable = -1;
+ node->submatch_id = -1;
+
+ return node;
+}
+
+static tre_ast_node_t *
+tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position)
+{
+ tre_ast_node_t *node;
+ tre_literal_t *lit;
+
+ node = tre_ast_new_node(mem, LITERAL, sizeof(tre_literal_t));
+ if (!node)
+ return NULL;
+ lit = node->obj;
+ lit->code_min = code_min;
+ lit->code_max = code_max;
+ lit->position = position;
+
+ return node;
+}
+
+static tre_ast_node_t *
+tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max,
+ int minimal)
+{
+ tre_ast_node_t *node;
+ tre_iteration_t *iter;
+
+ node = tre_ast_new_node(mem, ITERATION, sizeof(tre_iteration_t));
+ if (!node)
+ return NULL;
+ iter = node->obj;
+ iter->arg = arg;
+ iter->min = min;
+ iter->max = max;
+ iter->minimal = minimal;
+ node->num_submatches = arg->num_submatches;
+
+ return node;
+}
+
+static tre_ast_node_t *
+tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
+{
+ tre_ast_node_t *node;
+
+ node = tre_ast_new_node(mem, UNION, sizeof(tre_union_t));
+ if (node == NULL)
+ return NULL;
+ ((tre_union_t *)node->obj)->left = left;
+ ((tre_union_t *)node->obj)->right = right;
+ node->num_submatches = left->num_submatches + right->num_submatches;
+
+ return node;
+}
+
+static tre_ast_node_t *
+tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left,
+ tre_ast_node_t *right)
+{
+ tre_ast_node_t *node;
+
+ node = tre_ast_new_node(mem, CATENATION, sizeof(tre_catenation_t));
+ if (node == NULL)
+ return NULL;
+ ((tre_catenation_t *)node->obj)->left = left;
+ ((tre_catenation_t *)node->obj)->right = right;
+ node->num_submatches = left->num_submatches + right->num_submatches;
+
+ return node;
+}
+
+
+/***********************************************************************
+ from tre-stack.c and tre-stack.h
+***********************************************************************/
+
+typedef struct tre_stack_rec tre_stack_t;
+
+/* Creates a new stack object. `size' is initial size in bytes, `max_size'
+ is maximum size, and `increment' specifies how much more space will be
+ allocated with realloc() if all space gets used up. Returns the stack
+ object or NULL if out of memory. */
+static tre_stack_t *
+tre_stack_new(int size, int max_size, int increment);
+
+/* Frees the stack object. */
+static void
+tre_stack_destroy(tre_stack_t *s);
+
+/* Returns the current number of objects in the stack. */
+static int
+tre_stack_num_objects(tre_stack_t *s);
+
+/* Each tre_stack_push_*(tre_stack_t *s, <type> value) function pushes
+ `value' on top of stack `s'. Returns REG_ESPACE if out of memory.
+ This tries to realloc() more space before failing if maximum size
+ has not yet been reached. Returns REG_OK if successful. */
+#define declare_pushf(typetag, type) \
+ static reg_errcode_t tre_stack_push_ ## typetag(tre_stack_t *s, type value)
+
+declare_pushf(voidptr, void *);
+declare_pushf(int, int);
+
+/* Each tre_stack_pop_*(tre_stack_t *s) function pops the topmost
+ element off of stack `s' and returns it. The stack must not be
+ empty. */
+#define declare_popf(typetag, type) \
+ static type tre_stack_pop_ ## typetag(tre_stack_t *s)
+
+declare_popf(voidptr, void *);
+declare_popf(int, int);
+
+/* Just to save some typing. */
+#define STACK_PUSH(s, typetag, value) \
+ do \
+ { \
+ status = tre_stack_push_ ## typetag(s, value); \
+ } \
+ while (/*CONSTCOND*/0)
+
+#define STACK_PUSHX(s, typetag, value) \
+ { \
+ status = tre_stack_push_ ## typetag(s, value); \
+ if (status != REG_OK) \
+ break; \
+ }
+
+#define STACK_PUSHR(s, typetag, value) \
+ { \
+ reg_errcode_t _status; \
+ _status = tre_stack_push_ ## typetag(s, value); \
+ if (_status != REG_OK) \
+ return _status; \
+ }
+
+union tre_stack_item {
+ void *voidptr_value;
+ int int_value;
+};
+
+struct tre_stack_rec {
+ int size;
+ int max_size;
+ int increment;
+ int ptr;
+ union tre_stack_item *stack;
+};
+
+
+static tre_stack_t *
+tre_stack_new(int size, int max_size, int increment)
+{
+ tre_stack_t *s;
+
+ s = xmalloc(sizeof(*s));
+ if (s != NULL)
+ {
+ s->stack = xmalloc(sizeof(*s->stack) * size);
+ if (s->stack == NULL)
+ {
+ xfree(s);
+ return NULL;
+ }
+ s->size = size;
+ s->max_size = max_size;
+ s->increment = increment;
+ s->ptr = 0;
+ }
+ return s;
+}
+
+static void
+tre_stack_destroy(tre_stack_t *s)
+{
+ xfree(s->stack);
+ xfree(s);
+}
+
+static int
+tre_stack_num_objects(tre_stack_t *s)
+{
+ return s->ptr;
+}
+
+static reg_errcode_t
+tre_stack_push(tre_stack_t *s, union tre_stack_item value)
+{
+ if (s->ptr < s->size)
+ {
+ s->stack[s->ptr] = value;
+ s->ptr++;
+ }
+ else
+ {
+ if (s->size >= s->max_size)
+ {
+ return REG_ESPACE;
+ }
+ else
+ {
+ union tre_stack_item *new_buffer;
+ int new_size;
+ new_size = s->size + s->increment;
+ if (new_size > s->max_size)
+ new_size = s->max_size;
+ new_buffer = xrealloc(s->stack, sizeof(*new_buffer) * new_size);
+ if (new_buffer == NULL)
+ {
+ return REG_ESPACE;
+ }
+ assert(new_size > s->size);
+ s->size = new_size;
+ s->stack = new_buffer;
+ tre_stack_push(s, value);
+ }
+ }
+ return REG_OK;
+}
+
+#define define_pushf(typetag, type) \
+ declare_pushf(typetag, type) { \
+ union tre_stack_item item; \
+ item.typetag ## _value = value; \
+ return tre_stack_push(s, item); \
+}
+
+define_pushf(int, int)
+define_pushf(voidptr, void *)
+
+#define define_popf(typetag, type) \
+ declare_popf(typetag, type) { \
+ return s->stack[--s->ptr].typetag ## _value; \
+ }
+
+define_popf(int, int)
+define_popf(voidptr, void *)
+
+
+/***********************************************************************
+ from tre-parse.c and tre-parse.h
+***********************************************************************/
+
+/* Parse context. */
+typedef struct {
+ /* Memory allocator. The AST is allocated using this. */
+ tre_mem_t mem;
+ /* Stack used for keeping track of regexp syntax. */
+ tre_stack_t *stack;
+ /* The parse result. */
+ tre_ast_node_t *result;
+ /* The regexp to parse and its length. */
+ const char *re;
+ /* The first character of the entire regexp. */
+ const char *re_start;
+ /* Current submatch ID. */
+ int submatch_id;
+ /* Current position (number of literal). */
+ int position;
+ /* The highest back reference or -1 if none seen so far. */
+ int max_backref;
+ /* This flag is set if the regexp uses approximate matching. */
+ int have_approx;
+ /* Compilation flags. */
+ int cflags;
+ /* If this flag is set the top-level submatch is not captured. */
+ int nofirstsub;
+} tre_parse_ctx_t;
+
+/* Parses a wide character regexp pattern into a syntax tree. This parser
+ handles both syntaxes (BRE and ERE), including the TRE extensions. */
+static reg_errcode_t
+tre_parse(tre_parse_ctx_t *ctx);
+
+
+/*
+ This parser is just a simple recursive descent parser for POSIX.2
+ regexps. The parser supports both the obsolete default syntax and
+ the "extended" syntax, and some nonstandard extensions.
+*/
+
+/* Characters with special meanings in regexp syntax. */
+#define CHAR_PIPE '|'
+#define CHAR_LPAREN '('
+#define CHAR_RPAREN ')'
+#define CHAR_LBRACE '{'
+#define CHAR_RBRACE '}'
+#define CHAR_LBRACKET '['
+#define CHAR_RBRACKET ']'
+#define CHAR_MINUS '-'
+#define CHAR_STAR '*'
+#define CHAR_QUESTIONMARK '?'
+#define CHAR_PLUS '+'
+#define CHAR_PERIOD '.'
+#define CHAR_COLON ':'
+#define CHAR_EQUAL '='
+#define CHAR_COMMA ','
+#define CHAR_CARET '^'
+#define CHAR_DOLLAR '$'
+#define CHAR_BACKSLASH '\\'
+#define CHAR_HASH '#'
+#define CHAR_TILDE '~'
+
+
+/* Some macros for expanding \w, \s, etc. */
+static const struct tre_macro_struct {
+ const char c;
+ const char *expansion;
+} tre_macros[] =
+ { {'t', "\t"}, {'n', "\n"}, {'r', "\r"},
+ {'f', "\f"}, {'a', "\a"}, {'e', "\033"},
+ {'w', "[[:alnum:]_]"}, {'W', "[^[:alnum:]_]"}, {'s', "[[:space:]]"},
+ {'S', "[^[:space:]]"}, {'d', "[[:digit:]]"}, {'D', "[^[:digit:]]"},
+ { 0, NULL }
+ };
+
+
+/* Expands a macro delimited by `regex' and `regex_end' to `buf', which
+ must have at least `len' items. Sets buf[0] to zero if the there
+ is no match in `tre_macros'. */
+static const char *
+tre_expand_macro(const char *regex)
+{
+ int i;
+
+ if (!*regex)
+ return 0;
+
+ for (i = 0; tre_macros[i].expansion && tre_macros[i].c != *regex; i++);
+ return tre_macros[i].expansion;
+}
+
+static reg_errcode_t
+tre_new_item(tre_mem_t mem, int min, int max, int *i, int *max_i,
+ tre_ast_node_t ***items)
+{
+ reg_errcode_t status;
+ tre_ast_node_t **array = *items;
+ /* Allocate more space if necessary. */
+ if (*i >= *max_i)
+ {
+ tre_ast_node_t **new_items;
+ /* If the array is already 1024 items large, give up -- there's
+ probably an error in the regexp (e.g. not a '\0' terminated
+ string and missing ']') */
+ if (*max_i > 1024)
+ return REG_ESPACE;
+ *max_i *= 2;
+ new_items = xrealloc(array, sizeof(*items) * *max_i);
+ if (new_items == NULL)
+ return REG_ESPACE;
+ *items = array = new_items;
+ }
+ array[*i] = tre_ast_new_literal(mem, min, max, -1);
+ status = array[*i] == NULL ? REG_ESPACE : REG_OK;
+ (*i)++;
+ return status;
+}
+
+
+static int
+tre_compare_items(const void *a, const void *b)
+{
+ const tre_ast_node_t *node_a = *(tre_ast_node_t * const *)a;
+ const tre_ast_node_t *node_b = *(tre_ast_node_t * const *)b;
+ tre_literal_t *l_a = node_a->obj, *l_b = node_b->obj;
+ int a_min = l_a->code_min, b_min = l_b->code_min;
+
+ if (a_min < b_min)
+ return -1;
+ else if (a_min > b_min)
+ return 1;
+ else
+ return 0;
+}
+
+/* Maximum number of character classes that can occur in a negated bracket
+ expression. */
+#define MAX_NEG_CLASSES 64
+
+/* Maximum length of character class names. */
+#define MAX_CLASS_NAME
+
+static reg_errcode_t
+tre_parse_bracket_items(tre_parse_ctx_t *ctx, int negate,
+ tre_ctype_t neg_classes[], int *num_neg_classes,
+ tre_ast_node_t ***items, int *num_items,
+ int *items_size)
+{
+ const char *re = ctx->re;
+ reg_errcode_t status = REG_OK;
+ tre_ctype_t class = (tre_ctype_t)0;
+ int i = *num_items;
+ int max_i = *items_size;
+ int skip;
+
+ /* Build an array of the items in the bracket expression. */
+ while (status == REG_OK)
+ {
+ skip = 0;
+ if (!*re)
+ {
+ status = REG_EBRACK;
+ }
+ else if (*re == CHAR_RBRACKET && re > ctx->re)
+ {
+ re++;
+ break;
+ }
+ else
+ {
+ tre_cint_t min = 0, max = 0;
+ wchar_t wc;
+ int clen = mbtowc(&wc, re, -1);
+
+ if (clen<0) clen=1, wc=WEOF;
+
+ class = (tre_ctype_t)0;
+ if (*(re + clen) == CHAR_MINUS && *(re + clen + 1) != CHAR_RBRACKET)
+ {
+ min = wc;
+ re += clen+1;
+ clen = mbtowc(&wc, re, -1);
+ if (clen<0) clen=1, wc=WEOF;
+ max = wc;
+ re += clen;
+ /* XXX - Should use collation order instead of encoding values
+ in character ranges. */
+ if (min > max)
+ status = REG_ERANGE;
+ }
+ else if (*re == CHAR_LBRACKET && *(re + 1) == CHAR_PERIOD)
+ status = REG_ECOLLATE;
+ else if (*re == CHAR_LBRACKET && *(re + 1) == CHAR_EQUAL)
+ status = REG_ECOLLATE;
+ else if (*re == CHAR_LBRACKET && *(re + 1) == CHAR_COLON)
+ {
+ char tmp_str[64];
+ const char *endptr = re + 2;
+ int len;
+ while (*endptr && *endptr != CHAR_COLON)
+ endptr++;
+ if (*endptr)
+ {
+ len = MIN(endptr - re - 2, 63);
+ strncpy(tmp_str, re + 2, len);
+ tmp_str[len] = '\0';
+ class = tre_ctype(tmp_str);
+ if (!class)
+ status = REG_ECTYPE;
+ re = endptr + 2;
+ }
+ else
+ status = REG_ECTYPE;
+ min = 0;
+ max = TRE_CHAR_MAX;
+ }
+ else
+ {
+ if (*re == CHAR_MINUS && *(re + 1) != CHAR_RBRACKET
+ && ctx->re != re)
+ /* Two ranges are not allowed to share and endpoint. */
+ status = REG_ERANGE;
+ min = max = wc;
+ re += clen;
+ }
+
+ if (status != REG_OK)
+ break;
+
+ if (class && negate)
+ if (*num_neg_classes >= MAX_NEG_CLASSES)
+ status = REG_ESPACE;
+ else
+ neg_classes[(*num_neg_classes)++] = class;
+ else if (!skip)
+ {
+ status = tre_new_item(ctx->mem, min, max, &i, &max_i, items);
+ if (status != REG_OK)
+ break;
+ ((tre_literal_t*)((*items)[i-1])->obj)->class = class;
+ }
+
+ /* Add opposite-case counterpoints if REG_ICASE is present.
+ This is broken if there are more than two "same" characters. */
+ if (ctx->cflags & REG_ICASE && !class && status == REG_OK && !skip)
+ {
+ tre_cint_t cmin, ccurr;
+
+ while (min <= max)
+ {
+ if (tre_islower(min))
+ {
+ cmin = ccurr = tre_toupper(min++);
+ while (tre_islower(min) && tre_toupper(min) == ccurr + 1
+ && min <= max)
+ ccurr = tre_toupper(min++);
+ status = tre_new_item(ctx->mem, cmin, ccurr,
+ &i, &max_i, items);
+ }
+ else if (tre_isupper(min))
+ {
+ cmin = ccurr = tre_tolower(min++);
+ while (tre_isupper(min) && tre_tolower(min) == ccurr + 1
+ && min <= max)
+ ccurr = tre_tolower(min++);
+ status = tre_new_item(ctx->mem, cmin, ccurr,
+ &i, &max_i, items);
+ }
+ else min++;
+ if (status != REG_OK)
+ break;
+ }
+ if (status != REG_OK)
+ break;
+ }
+ }
+ }
+ *num_items = i;
+ *items_size = max_i;
+ ctx->re = re;
+ return status;
+}
+
+static reg_errcode_t
+tre_parse_bracket(tre_parse_ctx_t *ctx, tre_ast_node_t **result)
+{
+ tre_ast_node_t *node = NULL;
+ int negate = 0;
+ reg_errcode_t status = REG_OK;
+ tre_ast_node_t **items, *u, *n;
+ int i = 0, j, max_i = 32, curr_max, curr_min;
+ tre_ctype_t neg_classes[MAX_NEG_CLASSES];
+ int num_neg_classes = 0;
+
+ /* Start off with an array of `max_i' elements. */
+ items = xmalloc(sizeof(*items) * max_i);
+ if (items == NULL)
+ return REG_ESPACE;
+
+ if (*ctx->re == CHAR_CARET)
+ {
+ negate = 1;
+ ctx->re++;
+ }
+
+ status = tre_parse_bracket_items(ctx, negate, neg_classes, &num_neg_classes,
+ &items, &i, &max_i);
+
+ if (status != REG_OK)
+ goto parse_bracket_done;
+
+ /* Sort the array if we need to negate it. */
+ if (negate)
+ qsort(items, (unsigned)i, sizeof(*items), tre_compare_items);
+
+ curr_max = curr_min = 0;
+ /* Build a union of the items in the array, negated if necessary. */
+ for (j = 0; j < i && status == REG_OK; j++)
+ {
+ int min, max;
+ tre_literal_t *l = items[j]->obj;
+ min = l->code_min;
+ max = l->code_max;
+
+ if (negate)
+ {
+ if (min < curr_max)
+ {
+ /* Overlap. */
+ curr_max = MAX(max + 1, curr_max);
+ l = NULL;
+ }
+ else
+ {
+ /* No overlap. */
+ curr_max = min - 1;
+ if (curr_max >= curr_min)
+ {
+ l->code_min = curr_min;
+ l->code_max = curr_max;
+ }
+ else
+ {
+ l = NULL;
+ }
+ curr_min = curr_max = max + 1;
+ }
+ }
+
+ if (l != NULL)
+ {
+ int k;
+ l->position = ctx->position;
+ if (num_neg_classes > 0)
+ {
+ l->neg_classes = tre_mem_alloc(ctx->mem,
+ (sizeof(l->neg_classes)
+ * (num_neg_classes + 1)));
+ if (l->neg_classes == NULL)
+ {
+ status = REG_ESPACE;
+ break;
+ }
+ for (k = 0; k < num_neg_classes; k++)
+ l->neg_classes[k] = neg_classes[k];
+ l->neg_classes[k] = (tre_ctype_t)0;
+ }
+ else
+ l->neg_classes = NULL;
+ if (node == NULL)
+ node = items[j];
+ else
+ {
+ u = tre_ast_new_union(ctx->mem, node, items[j]);
+ if (u == NULL)
+ status = REG_ESPACE;
+ node = u;
+ }
+ }
+ }
+
+ if (status != REG_OK)
+ goto parse_bracket_done;
+
+ if (negate)
+ {
+ int k;
+ n = tre_ast_new_literal(ctx->mem, curr_min, TRE_CHAR_MAX, ctx->position);
+ if (n == NULL)
+ status = REG_ESPACE;
+ else
+ {
+ tre_literal_t *l = n->obj;
+ if (num_neg_classes > 0)
+ {
+ l->neg_classes = tre_mem_alloc(ctx->mem,
+ (sizeof(l->neg_classes)
+ * (num_neg_classes + 1)));
+ if (l->neg_classes == NULL)
+ {
+ status = REG_ESPACE;
+ goto parse_bracket_done;
+ }
+ for (k = 0; k < num_neg_classes; k++)
+ l->neg_classes[k] = neg_classes[k];
+ l->neg_classes[k] = (tre_ctype_t)0;
+ }
+ else
+ l->neg_classes = NULL;
+ if (node == NULL)
+ node = n;
+ else
+ {
+ u = tre_ast_new_union(ctx->mem, node, n);
+ if (u == NULL)
+ status = REG_ESPACE;
+ node = u;
+ }
+ }
+ }
+
+ if (status != REG_OK)
+ goto parse_bracket_done;
+
+#ifdef TRE_DEBUG
+ tre_ast_print(node);
+#endif /* TRE_DEBUG */
+
+ parse_bracket_done:
+ xfree(items);
+ ctx->position++;
+ *result = node;
+ return status;
+}
+
+
+/* Parses a positive decimal integer. Returns -1 if the string does not
+ contain a valid number. */
+static int
+tre_parse_int(const char **regex)
+{
+ int num = -1;
+ const char *r = *regex;
+ while (*r-'0'<10U)
+ {
+ if (num < 0)
+ num = 0;
+ num = num * 10 + *r - '0';
+ r++;
+ }
+ *regex = r;
+ return num;
+}
+
+
+static reg_errcode_t
+tre_parse_bound(tre_parse_ctx_t *ctx, tre_ast_node_t **result)
+{
+ int min, max;
+ const char *r = ctx->re;
+ int minimal = 0;
+
+ /* Parse number (minimum repetition count). */
+ min = -1;
+ if (*r >= '0' && *r <= '9') {
+ min = tre_parse_int(&r);
+ }
+
+ /* Parse comma and second number (maximum repetition count). */
+ max = min;
+ if (*r == CHAR_COMMA)
+ {
+ r++;
+ max = tre_parse_int(&r);
+ }
+
+ /* Check that the repeat counts are sane. */
+ if ((max >= 0 && min > max) || max > RE_DUP_MAX)
+ return REG_BADBR;
+
+ /* Missing }. */
+ if (!*r)
+ return REG_EBRACE;
+
+ /* Empty contents of {}. */
+ if (r == ctx->re)
+ return REG_BADBR;
+
+ /* Parse the ending '}' or '\}'.*/
+ if (ctx->cflags & REG_EXTENDED)
+ {
+ if (*r != CHAR_RBRACE)
+ return REG_BADBR;
+ r++;
+ }
+ else
+ {
+ if (*r != CHAR_BACKSLASH || *(r + 1) != CHAR_RBRACE)
+ return REG_BADBR;
+ r += 2;
+ }
+
+ /* Create the AST node(s). */
+ if (min == 0 && max == 0)
+ {
+ *result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
+ if (*result == NULL)
+ return REG_ESPACE;
+ }
+ else
+ {
+ if (min < 0 && max < 0)
+ /* Only approximate parameters set, no repetitions. */
+ min = max = 1;
+
+ *result = tre_ast_new_iter(ctx->mem, *result, min, max, minimal);
+ if (!*result)
+ return REG_ESPACE;
+ }
+
+ ctx->re = r;
+ return REG_OK;
+}
+
+typedef enum {
+ PARSE_RE = 0,
+ PARSE_ATOM,
+ PARSE_MARK_FOR_SUBMATCH,
+ PARSE_BRANCH,
+ PARSE_PIECE,
+ PARSE_CATENATION,
+ PARSE_POST_CATENATION,
+ PARSE_UNION,
+ PARSE_POST_UNION,
+ PARSE_POSTFIX,
+ PARSE_RESTORE_CFLAGS
+} tre_parse_re_stack_symbol_t;
+
+
+static reg_errcode_t
+tre_parse(tre_parse_ctx_t *ctx)
+{
+ tre_ast_node_t *result = NULL;
+ tre_parse_re_stack_symbol_t symbol;
+ reg_errcode_t status = REG_OK;
+ tre_stack_t *stack = ctx->stack;
+ int bottom = tre_stack_num_objects(stack);
+ int depth = 0;
+ wchar_t wc;
+ int clen;
+
+ if (!ctx->nofirstsub)
+ {
+ STACK_PUSH(stack, int, ctx->submatch_id);
+ STACK_PUSH(stack, int, PARSE_MARK_FOR_SUBMATCH);
+ ctx->submatch_id++;
+ }
+ STACK_PUSH(stack, int, PARSE_RE);
+ ctx->re_start = ctx->re;
+
+
+ /* The following is basically just a recursive descent parser. I use
+ an explicit stack instead of recursive functions mostly because of
+ two reasons: compatibility with systems which have an overflowable
+ call stack, and efficiency (both in lines of code and speed). */
+ while (tre_stack_num_objects(stack) > bottom && status == REG_OK)
+ {
+ if (status != REG_OK)
+ break;
+ symbol = tre_stack_pop_int(stack);
+ switch (symbol)
+ {
+ case PARSE_RE:
+ /* Parse a full regexp. A regexp is one or more branches,
+ separated by the union operator `|'. */
+ if (ctx->cflags & REG_EXTENDED)
+ STACK_PUSHX(stack, int, PARSE_UNION);
+ STACK_PUSHX(stack, int, PARSE_BRANCH);
+ break;
+
+ case PARSE_BRANCH:
+ /* Parse a branch. A branch is one or more pieces, concatenated.
+ A piece is an atom possibly followed by a postfix operator. */
+ STACK_PUSHX(stack, int, PARSE_CATENATION);
+ STACK_PUSHX(stack, int, PARSE_PIECE);
+ break;
+
+ case PARSE_PIECE:
+ /* Parse a piece. A piece is an atom possibly followed by one
+ or more postfix operators. */
+ STACK_PUSHX(stack, int, PARSE_POSTFIX);
+ STACK_PUSHX(stack, int, PARSE_ATOM);
+ break;
+
+ case PARSE_CATENATION:
+ /* If the expression has not ended, parse another piece. */
+ {
+ tre_char_t c;
+ if (!*ctx->re)
+ break;
+ c = *ctx->re;
+ if (ctx->cflags & REG_EXTENDED && c == CHAR_PIPE)
+ break;
+ if ((ctx->cflags & REG_EXTENDED
+ && c == CHAR_RPAREN && depth > 0)
+ || (!(ctx->cflags & REG_EXTENDED)
+ && (c == CHAR_BACKSLASH
+ && *(ctx->re + 1) == CHAR_RPAREN)))
+ {
+ if (!(ctx->cflags & REG_EXTENDED) && depth == 0)
+ status = REG_EPAREN;
+ depth--;
+ if (!(ctx->cflags & REG_EXTENDED))
+ ctx->re += 2;
+ break;
+ }
+
+ {
+ /* Default case, left associative concatenation. */
+ STACK_PUSHX(stack, int, PARSE_CATENATION);
+ STACK_PUSHX(stack, voidptr, result);
+ STACK_PUSHX(stack, int, PARSE_POST_CATENATION);
+ STACK_PUSHX(stack, int, PARSE_PIECE);
+ }
+ break;
+ }
+
+ case PARSE_POST_CATENATION:
+ {
+ tre_ast_node_t *tree = tre_stack_pop_voidptr(stack);
+ tre_ast_node_t *tmp_node;
+ tmp_node = tre_ast_new_catenation(ctx->mem, tree, result);
+ if (!tmp_node)
+ return REG_ESPACE;
+ result = tmp_node;
+ break;
+ }
+
+ case PARSE_UNION:
+ switch (*ctx->re)
+ {
+ case CHAR_PIPE:
+ STACK_PUSHX(stack, int, PARSE_UNION);
+ STACK_PUSHX(stack, voidptr, result);
+ STACK_PUSHX(stack, int, PARSE_POST_UNION);
+ STACK_PUSHX(stack, int, PARSE_BRANCH);
+ ctx->re++;
+ break;
+
+ case CHAR_RPAREN:
+ ctx->re++;
+ break;
+
+ default:
+ break;
+ }
+ break;
+
+ case PARSE_POST_UNION:
+ {
+ tre_ast_node_t *tmp_node;
+ tre_ast_node_t *tree = tre_stack_pop_voidptr(stack);
+ tmp_node = tre_ast_new_union(ctx->mem, tree, result);
+ if (!tmp_node)
+ return REG_ESPACE;
+ result = tmp_node;
+ break;
+ }
+
+ case PARSE_POSTFIX:
+ /* Parse postfix operators. */
+ switch (*ctx->re)
+ {
+ case CHAR_PLUS:
+ case CHAR_QUESTIONMARK:
+ if (!(ctx->cflags & REG_EXTENDED))
+ break;
+ /*FALLTHROUGH*/
+ case CHAR_STAR:
+ {
+ tre_ast_node_t *tmp_node;
+ int minimal = 0;
+ int rep_min = 0;
+ int rep_max = -1;
+
+ if (*ctx->re == CHAR_PLUS)
+ rep_min = 1;
+ if (*ctx->re == CHAR_QUESTIONMARK)
+ rep_max = 1;
+
+ ctx->re++;
+ tmp_node = tre_ast_new_iter(ctx->mem, result, rep_min, rep_max,
+ minimal);
+ if (tmp_node == NULL)
+ return REG_ESPACE;
+ result = tmp_node;
+ STACK_PUSHX(stack, int, PARSE_POSTFIX);
+ }
+ break;
+
+ case CHAR_BACKSLASH:
+ /* "\{" is special without REG_EXTENDED */
+ if (!(ctx->cflags & REG_EXTENDED)
+ && *(ctx->re + 1) == CHAR_LBRACE)
+ {
+ ctx->re++;
+ goto parse_brace;
+ }
+ else
+ break;
+
+ case CHAR_LBRACE:
+ /* "{" is literal without REG_EXTENDED */
+ if (!(ctx->cflags & REG_EXTENDED))
+ break;
+
+ parse_brace:
+ ctx->re++;
+
+ status = tre_parse_bound(ctx, &result);
+ if (status != REG_OK)
+ return status;
+ STACK_PUSHX(stack, int, PARSE_POSTFIX);
+ break;
+ }
+ break;
+
+ case PARSE_ATOM:
+ /* Parse an atom. An atom is a regular expression enclosed in `()',
+ an empty set of `()', a bracket expression, `.', `^', `$',
+ a `\' followed by a character, or a single character. */
+
+ switch (*ctx->re)
+ {
+ case CHAR_LPAREN: /* parenthesized subexpression */
+
+ if (ctx->cflags & REG_EXTENDED)
+ {
+ lparen:
+ depth++;
+ {
+ ctx->re++;
+ /* First parse a whole RE, then mark the resulting tree
+ for submatching. */
+ STACK_PUSHX(stack, int, ctx->submatch_id);
+ STACK_PUSHX(stack, int, PARSE_MARK_FOR_SUBMATCH);
+ STACK_PUSHX(stack, int, PARSE_RE);
+ ctx->submatch_id++;
+ }
+ }
+ else
+ goto parse_literal;
+ break;
+
+ case CHAR_LBRACKET: /* bracket expression */
+ ctx->re++;
+ status = tre_parse_bracket(ctx, &result);
+ if (status != REG_OK)
+ return status;
+ break;
+
+ case CHAR_BACKSLASH:
+ /* If this is "\(" or "\)" chew off the backslash and
+ try again. */
+ if (!(ctx->cflags & REG_EXTENDED) && *(ctx->re + 1) == CHAR_LPAREN)
+ {
+ ctx->re++;
+ goto lparen;
+ }
+ if (!(ctx->cflags & REG_EXTENDED) && *(ctx->re