diff options
author | David Barksdale <amatus.amongus@gmail.com> | 2013-07-22 08:26:16 -0500 |
---|---|---|
committer | David Barksdale <amatus.amongus@gmail.com> | 2013-07-22 08:26:16 -0500 |
commit | 7450bd0b6c6c05ee6425e2c63e9b79beb94bfbfa (patch) | |
tree | dfde89b41437def7ce23af24db53a11a9b5f1075 /src/regex/regex.c | |
parent | 740b30688bd745a527f96f9116c19acb3480971a (diff) |
Imported Upstream version 0.9.5aupstream
Diffstat (limited to 'src/regex/regex.c')
-rw-r--r-- | src/regex/regex.c | 3373 |
1 files changed, 2357 insertions, 1016 deletions
diff --git a/src/regex/regex.c b/src/regex/regex.c index 5244c26..ad8e56b 100644 --- a/src/regex/regex.c +++ b/src/regex/regex.c @@ -19,417 +19,80 @@ */ /** * @file src/regex/regex.c - * @brief library to create automatons from regular expressions + * @brief library to create Deterministic Finite Automatons (DFAs) from regular + * expressions (regexes). * @author Maximilian Szengel */ #include "platform.h" #include "gnunet_container_lib.h" #include "gnunet_crypto_lib.h" #include "gnunet_regex_lib.h" -#include "regex.h" - -#define initial_bits 10 - -/** - * Context that contains an id counter for states and transitions as well as a - * DLL of automatons used as a stack for NFA construction. - */ -struct GNUNET_REGEX_Context -{ - /** - * Unique state id. - */ - unsigned int state_id; - - /** - * Unique transition id. - */ - unsigned int transition_id; - - /** - * Unique SCC (Strongly Connected Component) id. - */ - unsigned int scc_id; - - /** - * DLL of GNUNET_REGEX_Automaton's used as a stack. - */ - struct GNUNET_REGEX_Automaton *stack_head; - - /** - * DLL of GNUNET_REGEX_Automaton's used as a stack. - */ - struct GNUNET_REGEX_Automaton *stack_tail; -}; +#include "regex_internal.h" /** - * Type of an automaton. + * Set this to GNUNET_YES to enable state naming. Used to debug NFA->DFA + * creation. Disabled by default for better performance. */ -enum GNUNET_REGEX_automaton_type -{ - NFA, - DFA -}; +#define REGEX_DEBUG_DFA GNUNET_NO /** - * Automaton representation. + * Set of states using MDLL API. */ -struct GNUNET_REGEX_Automaton +struct GNUNET_REGEX_StateSet_MDLL { /** - * This is a linked list. - */ - struct GNUNET_REGEX_Automaton *prev; - - /** - * This is a linked list. - */ - struct GNUNET_REGEX_Automaton *next; - - /** - * First state of the automaton. This is mainly used for constructing an NFA, - * where each NFA itself consists of one or more NFAs linked together. - */ - struct GNUNET_REGEX_State *start; - - /** - * End state of the automaton. - */ - struct GNUNET_REGEX_State *end; - - /** - * Number of states in the automaton. + * MDLL of states. */ - unsigned int state_count; + struct GNUNET_REGEX_State *head; /** - * DLL of states. + * MDLL of states. */ - struct GNUNET_REGEX_State *states_head; + struct GNUNET_REGEX_State *tail; /** - * DLL of states - */ - struct GNUNET_REGEX_State *states_tail; - - /** - * Type of the automaton. - */ - enum GNUNET_REGEX_automaton_type type; -}; - -/** - * A state. Can be used in DFA and NFA automatons. - */ -struct GNUNET_REGEX_State -{ - /** - * This is a linked list. - */ - struct GNUNET_REGEX_State *prev; - - /** - * This is a linked list. - */ - struct GNUNET_REGEX_State *next; - - /** - * Unique state id. - */ - unsigned int id; - - /** - * If this is an accepting state or not. - */ - int accepting; - - /** - * Marking of the state. This is used for marking all visited states when - * traversing all states of an automaton and for cases where the state id - * cannot be used (dfa minimization). - */ - int marked; - - /** - * Marking the state as contained. This is used for checking, if the state is - * contained in a set in constant time - */ - int contained; - - /** - * Marking the state as part of an SCC (Strongly Connected Component). All - * states with the same scc_id are part of the same SCC. scc_id is 0, if state - * is not a part of any SCC. - */ - unsigned int scc_id; - - /** - * Used for SCC detection. - */ - int index; - - /** - * Used for SCC detection. - */ - int lowlink; - - /** - * Human readable name of the automaton. Used for debugging and graph - * creation. - */ - char *name; - - /** - * Hash of the state. - */ - GNUNET_HashCode hash; - - /** - * Proof for this state. - */ - char *proof; - - /** - * Number of transitions from this state to other states. - */ - unsigned int transition_count; - - /** - * DLL of transitions. - */ - struct Transition *transitions_head; - - /** - * DLL of transitions. - */ - struct Transition *transitions_tail; - - /** - * Set of states on which this state is based on. Used when creating a DFA out - * of several NFA states. - */ - struct GNUNET_REGEX_StateSet *nfa_set; -}; - -/** - * Transition between two states. Each state can have 0-n transitions. If label - * is 0, this is considered to be an epsilon transition. - */ -struct Transition -{ - /** - * This is a linked list. - */ - struct Transition *prev; - - /** - * This is a linked list. - */ - struct Transition *next; - - /** - * Unique id of this transition. - */ - unsigned int id; - - /** - * Label for this transition. This is basically the edge label for the graph. - */ - char label; - - /** - * State to which this transition leads. - */ - struct GNUNET_REGEX_State *to_state; - - /** - * State from which this transition origins. - */ - struct GNUNET_REGEX_State *from_state; - - /** - * Mark this transition. For example when reversing the automaton. - */ - int mark; -}; - -/** - * Set of states. - */ -struct GNUNET_REGEX_StateSet -{ - /** - * Array of states. - */ - struct GNUNET_REGEX_State **states; - - /** - * Length of the 'states' array. + * Length of the MDLL. */ unsigned int len; }; -/* - * Debug helper functions - */ -void -debug_print_transitions (struct GNUNET_REGEX_State *); - -void -debug_print_state (struct GNUNET_REGEX_State *s) -{ - char *proof; - - if (NULL == s->proof) - proof = "NULL"; - else - proof = s->proof; - - GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, - "State %i: %s marked: %i accepting: %i scc_id: %i transitions: %i proof: %s\n", - s->id, s->name, s->marked, s->accepting, s->scc_id, - s->transition_count, proof); - - GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Transitions:\n"); - debug_print_transitions (s); -} - -void -debug_print_states (struct GNUNET_REGEX_Automaton *a) -{ - struct GNUNET_REGEX_State *s; - - for (s = a->states_head; NULL != s; s = s->next) - debug_print_state (s); -} - -void -debug_print_transition (struct Transition *t) -{ - char *to_state; - char *from_state; - char label; - - if (NULL == t) - return; - - if (0 == t->label) - label = '0'; - else - label = t->label; - - if (NULL == t->to_state) - to_state = "NULL"; - else - to_state = t->to_state->name; - - if (NULL == t->from_state) - from_state = "NULL"; - else - from_state = t->from_state->name; - - GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Transition %i: From %s on %c to %s\n", - t->id, from_state, label, to_state); -} - -void -debug_print_transitions (struct GNUNET_REGEX_State *s) -{ - struct Transition *t; - - for (t = s->transitions_head; NULL != t; t = t->next) - debug_print_transition (t); -} /** - * Recursive function doing DFS with 'v' as a start, detecting all SCCs inside - * the subgraph reachable from 'v'. Used with scc_tarjan function to detect all - * SCCs inside an automaton. + * Append state to the given StateSet ' * - * @param ctx context - * @param v start vertex - * @param index current index - * @param stack stack for saving all SCCs - * @param stack_size current size of the stack + * @param set set to be modified + * @param state state to be appended */ static void -scc_tarjan_strongconnect (struct GNUNET_REGEX_Context *ctx, - struct GNUNET_REGEX_State *v, int *index, - struct GNUNET_REGEX_State **stack, - unsigned int *stack_size) +state_set_append (struct GNUNET_REGEX_StateSet *set, + struct GNUNET_REGEX_State *state) { - struct GNUNET_REGEX_State *w; - struct Transition *t; - - v->index = *index; - v->lowlink = *index; - (*index)++; - stack[(*stack_size)++] = v; - v->contained = 1; - - for (t = v->transitions_head; NULL != t; t = t->next) - { - w = t->to_state; - if (NULL != w && w->index < 0) - { - scc_tarjan_strongconnect (ctx, w, index, stack, stack_size); - v->lowlink = (v->lowlink > w->lowlink) ? w->lowlink : v->lowlink; - } - else if (0 != w->contained) - v->lowlink = (v->lowlink > w->index) ? w->index : v->lowlink; - } - - if (v->lowlink == v->index) - { - w = stack[--(*stack_size)]; - w->contained = 0; - - if (v != w) - { - ctx->scc_id++; - while (v != w) - { - w->scc_id = ctx->scc_id; - w = stack[--(*stack_size)]; - w->contained = 0; - } - w->scc_id = ctx->scc_id; - } - } + if (set->off == set->size) + GNUNET_array_grow (set->states, set->size, set->size * 2 + 4); + set->states[set->off++] = state; } + /** - * Detect all SCCs (Strongly Connected Components) inside the given automaton. - * SCCs will be marked using the scc_id on each state. + * Compare two strings for equality. If either is NULL they are not equal. * - * @param ctx context - * @param a automaton + * @param str1 first string for comparison. + * @param str2 second string for comparison. + * + * @return 0 if the strings are the same or both NULL, 1 or -1 if not. */ -static void -scc_tarjan (struct GNUNET_REGEX_Context *ctx, struct GNUNET_REGEX_Automaton *a) +static int +nullstrcmp (const char *str1, const char *str2) { - int index; - struct GNUNET_REGEX_State *v; - struct GNUNET_REGEX_State *stack[a->state_count]; - unsigned int stack_size; - - for (v = a->states_head; NULL != v; v = v->next) - { - v->contained = 0; - v->index = -1; - v->lowlink = -1; - } - - stack_size = 0; - index = 0; + if ((NULL == str1) != (NULL == str2)) + return -1; + if ((NULL == str1) && (NULL == str2)) + return 0; - for (v = a->states_head; NULL != v; v = v->next) - { - if (v->index < 0) - scc_tarjan_strongconnect (ctx, v, &index, stack, &stack_size); - } + return strcmp (str1, str2); } + /** * Adds a transition from one state to another on 'label'. Does not add * duplicate states. @@ -441,11 +104,11 @@ scc_tarjan (struct GNUNET_REGEX_Context *ctx, struct GNUNET_REGEX_Automaton *a) */ static void state_add_transition (struct GNUNET_REGEX_Context *ctx, - struct GNUNET_REGEX_State *from_state, const char label, + struct GNUNET_REGEX_State *from_state, const char *label, struct GNUNET_REGEX_State *to_state) { - int is_dup; - struct Transition *t; + struct GNUNET_REGEX_Transition *t; + struct GNUNET_REGEX_Transition *oth; if (NULL == from_state) { @@ -453,33 +116,64 @@ state_add_transition (struct GNUNET_REGEX_Context *ctx, return; } - // Do not add duplicate state transitions - is_dup = GNUNET_NO; + /* Do not add duplicate state transitions */ for (t = from_state->transitions_head; NULL != t; t = t->next) { - if (t->to_state == to_state && t->label == label && + if (t->to_state == to_state && 0 == nullstrcmp (t->label, label) && t->from_state == from_state) - { - is_dup = GNUNET_YES; - break; - } + return; } - if (is_dup) - return; + /* sort transitions by label */ + for (oth = from_state->transitions_head; NULL != oth; oth = oth->next) + { + if (0 < nullstrcmp (oth->label, label)) + break; + } - t = GNUNET_malloc (sizeof (struct Transition)); - t->id = ctx->transition_id++; - t->label = label; + t = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Transition)); + if (NULL != ctx) + t->id = ctx->transition_id++; + if (NULL != label) + t->label = GNUNET_strdup (label); + else + t->label = NULL; t->to_state = to_state; t->from_state = from_state; - // Add outgoing transition to 'from_state' + /* Add outgoing transition to 'from_state' */ from_state->transition_count++; - GNUNET_CONTAINER_DLL_insert (from_state->transitions_head, - from_state->transitions_tail, t); + GNUNET_CONTAINER_DLL_insert_before (from_state->transitions_head, + from_state->transitions_tail, oth, t); } + +/** + * Remove a 'transition' from 'state'. + * + * @param state state from which the to-be-removed transition originates. + * @param transition transition that should be removed from state 'state'. + */ +static void +state_remove_transition (struct GNUNET_REGEX_State *state, + struct GNUNET_REGEX_Transition *transition) +{ + if (NULL == state || NULL == transition) + return; + + if (transition->from_state != state) + return; + + GNUNET_free_non_null (transition->label); + + state->transition_count--; + GNUNET_CONTAINER_DLL_remove (state->transitions_head, state->transitions_tail, + transition); + + GNUNET_free (transition); +} + + /** * Compare two states. Used for sorting. * @@ -493,27 +187,26 @@ state_add_transition (struct GNUNET_REGEX_Context *ctx, static int state_compare (const void *a, const void *b) { - struct GNUNET_REGEX_State **s1; - struct GNUNET_REGEX_State **s2; - - s1 = (struct GNUNET_REGEX_State **) a; - s2 = (struct GNUNET_REGEX_State **) b; + struct GNUNET_REGEX_State **s1 = (struct GNUNET_REGEX_State **) a; + struct GNUNET_REGEX_State **s2 = (struct GNUNET_REGEX_State **) b; return (*s1)->id - (*s2)->id; } + /** * Get all edges leaving state 's'. * * @param s state. - * @param edges all edges leaving 's'. + * @param edges all edges leaving 's', expected to be allocated and have enough + * space for s->transitions_count elements. * * @return number of edges. */ static unsigned int state_get_edges (struct GNUNET_REGEX_State *s, struct GNUNET_REGEX_Edge *edges) { - struct Transition *t; + struct GNUNET_REGEX_Transition *t; unsigned int count; if (NULL == s) @@ -525,7 +218,7 @@ state_get_edges (struct GNUNET_REGEX_State *s, struct GNUNET_REGEX_Edge *edges) { if (NULL != t->to_state) { - edges[count].label = &t->label; + edges[count].label = t->label; edges[count].destination = t->to_state->hash; count++; } @@ -533,39 +226,37 @@ state_get_edges (struct GNUNET_REGEX_State *s, struct GNUNET_REGEX_Edge *edges) return count; } + /** * Compare to state sets by comparing the id's of the states that are contained * in each set. Both sets are expected to be sorted by id! * * @param sset1 first state set * @param sset2 second state set - * - * @return an integer less than, equal to, or greater than zero - * if the first argument is considered to be respectively - * less than, equal to, or greater than the second. + * @return 0 if the sets are equal, otherwise non-zero */ static int state_set_compare (struct GNUNET_REGEX_StateSet *sset1, struct GNUNET_REGEX_StateSet *sset2) { int result; - int i; + unsigned int i; if (NULL == sset1 || NULL == sset2) return 1; - result = sset1->len - sset2->len; - - for (i = 0; i < sset1->len; i++) - { - if (0 != result) + result = sset1->off - sset2->off; + if (result < 0) + return -1; + if (result > 0) + return 1; + for (i = 0; i < sset1->off; i++) + if (0 != (result = state_compare (&sset1->states[i], &sset2->states[i]))) break; - - result = state_compare (&sset1->states[i], &sset2->states[i]); - } return result; } + /** * Clears the given StateSet 'set' * @@ -574,14 +265,11 @@ state_set_compare (struct GNUNET_REGEX_StateSet *sset1, static void state_set_clear (struct GNUNET_REGEX_StateSet *set) { - if (NULL != set) - { - if (NULL != set->states) - GNUNET_free (set->states); - GNUNET_free (set); - } + GNUNET_array_grow (set->states, set->size, 0); + set->off = 0; } + /** * Clears an automaton fragment. Does not destroy the states inside the * automaton. @@ -602,6 +290,7 @@ automaton_fragment_clear (struct GNUNET_REGEX_Automaton *a) GNUNET_free (a); } + /** * Frees the memory used by State 's' * @@ -610,30 +299,25 @@ automaton_fragment_clear (struct GNUNET_REGEX_Automaton *a) static void automaton_destroy_state (struct GNUNET_REGEX_State *s) { - struct Transition *t; - struct Transition *next_t; + struct GNUNET_REGEX_Transition *t; + struct GNUNET_REGEX_Transition *next_t; if (NULL == s) return; - if (NULL != s->name) - GNUNET_free (s->name); - - if (NULL != s->proof) - GNUNET_free (s->proof); - + GNUNET_free_non_null (s->name); + GNUNET_free_non_null (s->proof); + state_set_clear (&s->nfa_set); for (t = s->transitions_head; NULL != t; t = next_t) { next_t = t->next; - GNUNET_CONTAINER_DLL_remove (s->transitions_head, s->transitions_tail, t); - GNUNET_free (t); + state_remove_transition (s, t); } - state_set_clear (s->nfa_set); - GNUNET_free (s); } + /** * Remove a state from the given automaton 'a'. Always use this function when * altering the states of an automaton. Will also remove all transitions leading @@ -646,39 +330,36 @@ static void automaton_remove_state (struct GNUNET_REGEX_Automaton *a, struct GNUNET_REGEX_State *s) { - struct GNUNET_REGEX_State *ss; struct GNUNET_REGEX_State *s_check; - struct Transition *t_check; + struct GNUNET_REGEX_Transition *t_check; + struct GNUNET_REGEX_Transition *t_check_next; if (NULL == a || NULL == s) return; - // remove state - ss = s; - GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s); - a->state_count--; - - // remove all transitions leading to this state + /* remove all transitions leading to this state */ for (s_check = a->states_head; NULL != s_check; s_check = s_check->next) { for (t_check = s_check->transitions_head; NULL != t_check; - t_check = t_check->next) + t_check = t_check_next) { - if (t_check->to_state == ss) - { - GNUNET_CONTAINER_DLL_remove (s_check->transitions_head, - s_check->transitions_tail, t_check); - s_check->transition_count--; - } + t_check_next = t_check->next; + if (t_check->to_state == s) + state_remove_transition (s_check, t_check); } } - automaton_destroy_state (ss); + /* remove state */ + GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s); + a->state_count--; + + automaton_destroy_state (s); } + /** * Merge two states into one. Will merge 's1' and 's2' into 's1' and destroy - * 's2'. + * 's2'. 's1' will contain all (non-duplicate) outgoing transitions of 's2'. * * @param ctx context * @param a automaton @@ -692,48 +373,61 @@ automaton_merge_states (struct GNUNET_REGEX_Context *ctx, struct GNUNET_REGEX_State *s2) { struct GNUNET_REGEX_State *s_check; - struct Transition *t_check; - char *new_name; - - GNUNET_assert (NULL != ctx && NULL != a && NULL != s1 && NULL != s2); + struct GNUNET_REGEX_Transition *t_check; + struct GNUNET_REGEX_Transition *t; + struct GNUNET_REGEX_Transition *t_next; + int is_dup; if (s1 == s2) return; - // 1. Make all transitions pointing to s2 point to s1 + /* 1. Make all transitions pointing to s2 point to s1, unless this transition + * does not already exists, if it already exists remove transition. */ for (s_check = a->states_head; NULL != s_check; s_check = s_check->next) { - for (t_check = s_check->transitions_head; NULL != t_check; - t_check = t_check->next) + for (t_check = s_check->transitions_head; NULL != t_check; t_check = t_next) { + t_next = t_check->next; + if (s2 == t_check->to_state) - t_check->to_state = s1; + { + is_dup = GNUNET_NO; + for (t = t_check->from_state->transitions_head; NULL != t; t = t->next) + { + if (t->to_state == s1 && 0 == strcmp (t_check->label, t->label)) + is_dup = GNUNET_YES; + } + if (GNUNET_NO == is_dup) + t_check->to_state = s1; + else + state_remove_transition (t_check->from_state, t_check); + } } } - // 2. Add all transitions from s2 to sX to s1 + /* 2. Add all transitions from s2 to sX to s1 */ for (t_check = s2->transitions_head; NULL != t_check; t_check = t_check->next) { if (t_check->to_state != s1) state_add_transition (ctx, s1, t_check->label, t_check->to_state); } - // 3. Rename s1 to {s1,s2} - new_name = GNUNET_strdup (s1->name); - if (NULL != s1->name) - { - GNUNET_free (s1->name); - s1->name = NULL; - } + /* 3. Rename s1 to {s1,s2} */ +#if REGEX_DEBUG_DFA + char *new_name; + + new_name = s1->name; GNUNET_asprintf (&s1->name, "{%s,%s}", new_name, s2->name); GNUNET_free (new_name); +#endif - // remove state + /* remove state */ GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s2); a->state_count--; automaton_destroy_state (s2); } + /** * Add a state to the automaton 'a', always use this function to alter the * states DLL of the automaton. @@ -749,154 +443,1316 @@ automaton_add_state (struct GNUNET_REGEX_Automaton *a, a->state_count++; } -/** - * Function that is called with each state, when traversing an automaton. - * - * @param cls closure - * @param s state - */ -typedef void (*GNUNET_REGEX_traverse_action) (void *cls, - struct GNUNET_REGEX_State * s); /** - * Traverses all states that are reachable from state 's'. Expects the states to - * be unmarked (s->marked == GNUNET_NO). Performs 'action' on each visited - * state. + * Depth-first traversal (DFS) of all states that are reachable from state + * 's'. Performs 'action' on each visited state. * - * @param cls closure. * @param s start state. + * @param marks an array of size a->state_count to remember which state was + * already visited. + * @param count current count of the state. + * @param check function that is checked before advancing on each transition + * in the DFS. + * @param check_cls closure for check. * @param action action to be performed on each state. + * @param action_cls closure for action. */ static void -automaton_state_traverse (void *cls, struct GNUNET_REGEX_State *s, - GNUNET_REGEX_traverse_action action) +automaton_state_traverse (struct GNUNET_REGEX_State *s, int *marks, + unsigned int *count, + GNUNET_REGEX_traverse_check check, void *check_cls, + GNUNET_REGEX_traverse_action action, void *action_cls) { - struct Transition *t; + struct GNUNET_REGEX_Transition *t; + + if (GNUNET_YES == marks[s->traversal_id]) + return; - if (GNUNET_NO == s->marked) + marks[s->traversal_id] = GNUNET_YES; + + if (NULL != action) + action (action_cls, *count, s); + + (*count)++; + + for (t = s->transitions_head; NULL != t; t = t->next) { - s->marked = GNUNET_YES; + if (NULL == check || + (NULL != check && GNUNET_YES == check (check_cls, s, t))) + { + automaton_state_traverse (t->to_state, marks, count, check, check_cls, + action, action_cls); + } + } +} - if (action > 0) - action (cls, s); - for (t = s->transitions_head; NULL != t; t = t->next) - automaton_state_traverse (cls, t->to_state, action); +/** + * Traverses the given automaton using depth-first-search (DFS) from it's start + * state, visiting all reachable states and calling 'action' on each one of + * them. + * + * @param a automaton to be traversed. + * @param start start state, pass a->start or NULL to traverse the whole automaton. + * @param check function that is checked before advancing on each transition + * in the DFS. + * @param check_cls closure for check. + * @param action action to be performed on each state. + * @param action_cls closure for action + */ +void +GNUNET_REGEX_automaton_traverse (const struct GNUNET_REGEX_Automaton *a, + struct GNUNET_REGEX_State *start, + GNUNET_REGEX_traverse_check check, + void *check_cls, + GNUNET_REGEX_traverse_action action, + void *action_cls) +{ + unsigned int count; + struct GNUNET_REGEX_State *s; + + if (NULL == a || 0 == a->state_count) + return; + + int marks[a->state_count]; + + for (count = 0, s = a->states_head; NULL != s && count < a->state_count; + s = s->next, count++) + { + s->traversal_id = count; + marks[s->traversal_id] = GNUNET_NO; } + + count = 0; + + if (NULL == start) + s = a->start; + else + s = start; + + automaton_state_traverse (s, marks, &count, check, check_cls, action, + action_cls); } + /** - * Traverses the given automaton from it's start state, visiting all reachable - * states and calling 'action' on each one of them. + * String container for faster string operations. + */ +struct StringBuffer +{ + /** + * Buffer holding the string (may start in the middle!); + * NOT 0-terminated! + */ + char *sbuf; + + /** + * Allocated buffer. + */ + char *abuf; + + /** + * Length of the string in the buffer. + */ + size_t slen; + + /** + * Number of bytes allocated for 'sbuf' + */ + unsigned int blen; + + /** + * Buffer currently represents "NULL" (not the empty string!) + */ + int16_t null_flag; + + /** + * If this entry is part of the last/current generation array, + * this flag is GNUNET_YES if the last and current generation are + * identical (and thus copying is unnecessary if the value didn't + * change). This is used in an optimization that improves + * performance by about 1% --- if we use int16_t here. With just + * "int" for both flags, performance drops (on my system) significantly, + * most likely due to increased cache misses. + */ + int16_t synced; + +}; + + +/** + * Compare two strings for equality. If either is NULL they are not equal. * - * @param cls closure. - * @param a automaton. - * @param action action to be performed on each state. + * @param s1 first string for comparison. + * @param s2 second string for comparison. + * + * @return 0 if the strings are the same or both NULL, 1 or -1 if not. + */ +static int +sb_nullstrcmp (const struct StringBuffer *s1, + const struct StringBuffer *s2) +{ + if ( (GNUNET_YES == s1->null_flag) && + (GNUNET_YES == s2->null_flag) ) + return 0; + if ( (GNUNET_YES == s1->null_flag) || + (GNUNET_YES == s2->null_flag) ) + return -1; + if (s1->slen != s2->slen) + return -1; + return memcmp (s1->sbuf, s2->sbuf, s1->slen); +} + + +/** + * Compare two strings for equality. + * + * @param s1 first string for comparison. + * @param s2 second string for comparison. + * + * @return 0 if the strings are the same, 1 or -1 if not. + */ +static int +sb_strcmp (const struct StringBuffer *s1, + const struct StringBuffer *s2) +{ + if (s1->slen != s2->slen) + return -1; + return memcmp (s1->sbuf, s2->sbuf, s1->slen); +} + + +/** + * Reallocate the buffer of 'ret' to fit 'nlen' characters; + * move the existing string to the beginning of the new buffer. + * + * @param ret current buffer, to be updated + * @param nlen target length for the buffer, must be at least ret->slen */ static void -automaton_traverse (void *cls, struct GNUNET_REGEX_Automaton *a, - GNUNET_REGEX_traverse_action action) +sb_realloc (struct StringBuffer *ret, + size_t nlen) { - struct GNUNET_REGEX_State *s; + char *old; + + GNUNET_assert (nlen >= ret->slen); + old = ret->abuf; + ret->abuf = GNUNET_malloc (nlen); + ret->blen = nlen; + memcpy (ret->abuf, + ret->sbuf, + ret->slen); + ret->sbuf = ret->abuf; + GNUNET_free_non_null (old); +} + - for (s = a->states_head; NULL != s; s = s->next) - s->marked = GNUNET_NO; +/** + * Append a string. + * + * @param ret where to write the result + * @param sarg string to append + */ +static void +sb_append (struct StringBuffer *ret, + const struct StringBuffer *sarg) +{ + if (GNUNET_YES == ret->null_flag) + ret->slen = 0; + ret->null_flag = GNUNET_NO; + if (ret->blen < sarg->slen + ret->slen) + sb_realloc (ret, ret->blen + sarg->slen + 128); + memcpy (&ret->sbuf[ret->slen], + sarg->sbuf, + sarg->slen); + ret->slen += sarg->slen; +} + - automaton_state_traverse (cls, a->start, action); +/** + * Append a C string. + * + * @param ret where to write the result + * @param cstr string to append + */ +static void +sb_append_cstr (struct StringBuffer *ret, + const char *cstr) +{ + size_t cstr_len = strlen (cstr); + + if (GNUNET_YES == ret->null_flag) + ret->slen = 0; + ret->null_flag = GNUNET_NO; + if (ret->blen < cstr_len + ret->slen) + sb_realloc (ret, ret->blen + cstr_len + 128); + memcpy (&ret->sbuf[ret->slen], + cstr, + cstr_len); + ret->slen += cstr_len; +} + + +/** + * Wrap a string buffer, that is, set ret to the format string + * which contains an "%s" which is to be replaced with the original + * content of 'ret'. Note that optimizing this function is not + * really worth it, it is rarely called. + * + * @param ret where to write the result and take the input for %.*s from + * @param format format string, fprintf-style, with exactly one "%.*s" + * @param extra_chars how long will the result be, in addition to 'sarg' length + */ +static void +sb_wrap (struct StringBuffer *ret, + const char *format, + size_t extra_chars) +{ + char *temp; + + if (GNUNET_YES == ret->null_flag) + ret->slen = 0; + ret->null_flag = GNUNET_NO; + temp = GNUNET_malloc (ret->slen + extra_chars + 1); + GNUNET_snprintf (temp, + ret->slen + extra_chars + 1, + format, + (int) ret->slen, + ret->sbuf); + GNUNET_free_non_null (ret->abuf); + ret->abuf = temp; + ret->sbuf = temp; + ret->blen = ret->slen + extra_chars + 1; + ret->slen = ret->slen + extra_chars; } + /** - * Reverses all transitions of the given automaton. + * Format a string buffer. Note that optimizing this function is not + * really worth it, it is rarely called. * - * @param a automaton. + * @param ret where to write the result + * @param format format string, fprintf-style, with exactly one "%.*s" + * @param extra_chars how long will the result be, in addition to 'sarg' length + * @param sarg string to print into the format */ static void -automaton_reverse (struct GNUNET_REGEX_Automaton *a) +sb_printf1 (struct StringBuffer *ret, + const char *format, + size_t extra_chars, + const struct StringBuffer *sarg) { - struct GNUNET_REGEX_State *s; - struct Transition *t; - struct Transition *t_next; - struct GNUNET_REGEX_State *s_swp; + if (ret->blen < sarg->slen + extra_chars + 1) + sb_realloc (ret, + sarg->slen + extra_chars + 1); + ret->null_flag = GNUNET_NO; + ret->sbuf = ret->abuf; + ret->slen = sarg->slen + extra_chars; + GNUNET_snprintf (ret->sbuf, + ret->blen, + format, + (int) sarg->slen, + sarg->sbuf); +} - for (s = a->states_head; NULL != s; s = s->next) - for (t = s->transitions_head; NULL != t; t = t->next) - t->mark = GNUNET_NO; - for (s = a->states_head; NULL != s; s = s->next) +/** + * Format a string buffer. + * + * @param ret where to write the result + * @param format format string, fprintf-style, with exactly two "%.*s" + * @param extra_chars how long will the result be, in addition to 'sarg1/2' length + * @param sarg1 first strin |