aboutsummaryrefslogtreecommitdiff
path: root/src/regex/regex.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/regex/regex.c')
-rw-r--r--src/regex/regex.c3373
1 files changed, 2357 insertions, 1016 deletions
diff --git a/src/regex/regex.c b/src/regex/regex.c
index 5244c26..ad8e56b 100644
--- a/src/regex/regex.c
+++ b/src/regex/regex.c
@@ -19,417 +19,80 @@
*/
/**
* @file src/regex/regex.c
- * @brief library to create automatons from regular expressions
+ * @brief library to create Deterministic Finite Automatons (DFAs) from regular
+ * expressions (regexes).
* @author Maximilian Szengel
*/
#include "platform.h"
#include "gnunet_container_lib.h"
#include "gnunet_crypto_lib.h"
#include "gnunet_regex_lib.h"
-#include "regex.h"
-
-#define initial_bits 10
-
-/**
- * Context that contains an id counter for states and transitions as well as a
- * DLL of automatons used as a stack for NFA construction.
- */
-struct GNUNET_REGEX_Context
-{
- /**
- * Unique state id.
- */
- unsigned int state_id;
-
- /**
- * Unique transition id.
- */
- unsigned int transition_id;
-
- /**
- * Unique SCC (Strongly Connected Component) id.
- */
- unsigned int scc_id;
-
- /**
- * DLL of GNUNET_REGEX_Automaton's used as a stack.
- */
- struct GNUNET_REGEX_Automaton *stack_head;
-
- /**
- * DLL of GNUNET_REGEX_Automaton's used as a stack.
- */
- struct GNUNET_REGEX_Automaton *stack_tail;
-};
+#include "regex_internal.h"
/**
- * Type of an automaton.
+ * Set this to GNUNET_YES to enable state naming. Used to debug NFA->DFA
+ * creation. Disabled by default for better performance.
*/
-enum GNUNET_REGEX_automaton_type
-{
- NFA,
- DFA
-};
+#define REGEX_DEBUG_DFA GNUNET_NO
/**
- * Automaton representation.
+ * Set of states using MDLL API.
*/
-struct GNUNET_REGEX_Automaton
+struct GNUNET_REGEX_StateSet_MDLL
{
/**
- * This is a linked list.
- */
- struct GNUNET_REGEX_Automaton *prev;
-
- /**
- * This is a linked list.
- */
- struct GNUNET_REGEX_Automaton *next;
-
- /**
- * First state of the automaton. This is mainly used for constructing an NFA,
- * where each NFA itself consists of one or more NFAs linked together.
- */
- struct GNUNET_REGEX_State *start;
-
- /**
- * End state of the automaton.
- */
- struct GNUNET_REGEX_State *end;
-
- /**
- * Number of states in the automaton.
+ * MDLL of states.
*/
- unsigned int state_count;
+ struct GNUNET_REGEX_State *head;
/**
- * DLL of states.
+ * MDLL of states.
*/
- struct GNUNET_REGEX_State *states_head;
+ struct GNUNET_REGEX_State *tail;
/**
- * DLL of states
- */
- struct GNUNET_REGEX_State *states_tail;
-
- /**
- * Type of the automaton.
- */
- enum GNUNET_REGEX_automaton_type type;
-};
-
-/**
- * A state. Can be used in DFA and NFA automatons.
- */
-struct GNUNET_REGEX_State
-{
- /**
- * This is a linked list.
- */
- struct GNUNET_REGEX_State *prev;
-
- /**
- * This is a linked list.
- */
- struct GNUNET_REGEX_State *next;
-
- /**
- * Unique state id.
- */
- unsigned int id;
-
- /**
- * If this is an accepting state or not.
- */
- int accepting;
-
- /**
- * Marking of the state. This is used for marking all visited states when
- * traversing all states of an automaton and for cases where the state id
- * cannot be used (dfa minimization).
- */
- int marked;
-
- /**
- * Marking the state as contained. This is used for checking, if the state is
- * contained in a set in constant time
- */
- int contained;
-
- /**
- * Marking the state as part of an SCC (Strongly Connected Component). All
- * states with the same scc_id are part of the same SCC. scc_id is 0, if state
- * is not a part of any SCC.
- */
- unsigned int scc_id;
-
- /**
- * Used for SCC detection.
- */
- int index;
-
- /**
- * Used for SCC detection.
- */
- int lowlink;
-
- /**
- * Human readable name of the automaton. Used for debugging and graph
- * creation.
- */
- char *name;
-
- /**
- * Hash of the state.
- */
- GNUNET_HashCode hash;
-
- /**
- * Proof for this state.
- */
- char *proof;
-
- /**
- * Number of transitions from this state to other states.
- */
- unsigned int transition_count;
-
- /**
- * DLL of transitions.
- */
- struct Transition *transitions_head;
-
- /**
- * DLL of transitions.
- */
- struct Transition *transitions_tail;
-
- /**
- * Set of states on which this state is based on. Used when creating a DFA out
- * of several NFA states.
- */
- struct GNUNET_REGEX_StateSet *nfa_set;
-};
-
-/**
- * Transition between two states. Each state can have 0-n transitions. If label
- * is 0, this is considered to be an epsilon transition.
- */
-struct Transition
-{
- /**
- * This is a linked list.
- */
- struct Transition *prev;
-
- /**
- * This is a linked list.
- */
- struct Transition *next;
-
- /**
- * Unique id of this transition.
- */
- unsigned int id;
-
- /**
- * Label for this transition. This is basically the edge label for the graph.
- */
- char label;
-
- /**
- * State to which this transition leads.
- */
- struct GNUNET_REGEX_State *to_state;
-
- /**
- * State from which this transition origins.
- */
- struct GNUNET_REGEX_State *from_state;
-
- /**
- * Mark this transition. For example when reversing the automaton.
- */
- int mark;
-};
-
-/**
- * Set of states.
- */
-struct GNUNET_REGEX_StateSet
-{
- /**
- * Array of states.
- */
- struct GNUNET_REGEX_State **states;
-
- /**
- * Length of the 'states' array.
+ * Length of the MDLL.
*/
unsigned int len;
};
-/*
- * Debug helper functions
- */
-void
-debug_print_transitions (struct GNUNET_REGEX_State *);
-
-void
-debug_print_state (struct GNUNET_REGEX_State *s)
-{
- char *proof;
-
- if (NULL == s->proof)
- proof = "NULL";
- else
- proof = s->proof;
-
- GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
- "State %i: %s marked: %i accepting: %i scc_id: %i transitions: %i proof: %s\n",
- s->id, s->name, s->marked, s->accepting, s->scc_id,
- s->transition_count, proof);
-
- GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Transitions:\n");
- debug_print_transitions (s);
-}
-
-void
-debug_print_states (struct GNUNET_REGEX_Automaton *a)
-{
- struct GNUNET_REGEX_State *s;
-
- for (s = a->states_head; NULL != s; s = s->next)
- debug_print_state (s);
-}
-
-void
-debug_print_transition (struct Transition *t)
-{
- char *to_state;
- char *from_state;
- char label;
-
- if (NULL == t)
- return;
-
- if (0 == t->label)
- label = '0';
- else
- label = t->label;
-
- if (NULL == t->to_state)
- to_state = "NULL";
- else
- to_state = t->to_state->name;
-
- if (NULL == t->from_state)
- from_state = "NULL";
- else
- from_state = t->from_state->name;
-
- GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Transition %i: From %s on %c to %s\n",
- t->id, from_state, label, to_state);
-}
-
-void
-debug_print_transitions (struct GNUNET_REGEX_State *s)
-{
- struct Transition *t;
-
- for (t = s->transitions_head; NULL != t; t = t->next)
- debug_print_transition (t);
-}
/**
- * Recursive function doing DFS with 'v' as a start, detecting all SCCs inside
- * the subgraph reachable from 'v'. Used with scc_tarjan function to detect all
- * SCCs inside an automaton.
+ * Append state to the given StateSet '
*
- * @param ctx context
- * @param v start vertex
- * @param index current index
- * @param stack stack for saving all SCCs
- * @param stack_size current size of the stack
+ * @param set set to be modified
+ * @param state state to be appended
*/
static void
-scc_tarjan_strongconnect (struct GNUNET_REGEX_Context *ctx,
- struct GNUNET_REGEX_State *v, int *index,
- struct GNUNET_REGEX_State **stack,
- unsigned int *stack_size)
+state_set_append (struct GNUNET_REGEX_StateSet *set,
+ struct GNUNET_REGEX_State *state)
{
- struct GNUNET_REGEX_State *w;
- struct Transition *t;
-
- v->index = *index;
- v->lowlink = *index;
- (*index)++;
- stack[(*stack_size)++] = v;
- v->contained = 1;
-
- for (t = v->transitions_head; NULL != t; t = t->next)
- {
- w = t->to_state;
- if (NULL != w && w->index < 0)
- {
- scc_tarjan_strongconnect (ctx, w, index, stack, stack_size);
- v->lowlink = (v->lowlink > w->lowlink) ? w->lowlink : v->lowlink;
- }
- else if (0 != w->contained)
- v->lowlink = (v->lowlink > w->index) ? w->index : v->lowlink;
- }
-
- if (v->lowlink == v->index)
- {
- w = stack[--(*stack_size)];
- w->contained = 0;
-
- if (v != w)
- {
- ctx->scc_id++;
- while (v != w)
- {
- w->scc_id = ctx->scc_id;
- w = stack[--(*stack_size)];
- w->contained = 0;
- }
- w->scc_id = ctx->scc_id;
- }
- }
+ if (set->off == set->size)
+ GNUNET_array_grow (set->states, set->size, set->size * 2 + 4);
+ set->states[set->off++] = state;
}
+
/**
- * Detect all SCCs (Strongly Connected Components) inside the given automaton.
- * SCCs will be marked using the scc_id on each state.
+ * Compare two strings for equality. If either is NULL they are not equal.
*
- * @param ctx context
- * @param a automaton
+ * @param str1 first string for comparison.
+ * @param str2 second string for comparison.
+ *
+ * @return 0 if the strings are the same or both NULL, 1 or -1 if not.
*/
-static void
-scc_tarjan (struct GNUNET_REGEX_Context *ctx, struct GNUNET_REGEX_Automaton *a)
+static int
+nullstrcmp (const char *str1, const char *str2)
{
- int index;
- struct GNUNET_REGEX_State *v;
- struct GNUNET_REGEX_State *stack[a->state_count];
- unsigned int stack_size;
-
- for (v = a->states_head; NULL != v; v = v->next)
- {
- v->contained = 0;
- v->index = -1;
- v->lowlink = -1;
- }
-
- stack_size = 0;
- index = 0;
+ if ((NULL == str1) != (NULL == str2))
+ return -1;
+ if ((NULL == str1) && (NULL == str2))
+ return 0;
- for (v = a->states_head; NULL != v; v = v->next)
- {
- if (v->index < 0)
- scc_tarjan_strongconnect (ctx, v, &index, stack, &stack_size);
- }
+ return strcmp (str1, str2);
}
+
/**
* Adds a transition from one state to another on 'label'. Does not add
* duplicate states.
@@ -441,11 +104,11 @@ scc_tarjan (struct GNUNET_REGEX_Context *ctx, struct GNUNET_REGEX_Automaton *a)
*/
static void
state_add_transition (struct GNUNET_REGEX_Context *ctx,
- struct GNUNET_REGEX_State *from_state, const char label,
+ struct GNUNET_REGEX_State *from_state, const char *label,
struct GNUNET_REGEX_State *to_state)
{
- int is_dup;
- struct Transition *t;
+ struct GNUNET_REGEX_Transition *t;
+ struct GNUNET_REGEX_Transition *oth;
if (NULL == from_state)
{
@@ -453,33 +116,64 @@ state_add_transition (struct GNUNET_REGEX_Context *ctx,
return;
}
- // Do not add duplicate state transitions
- is_dup = GNUNET_NO;
+ /* Do not add duplicate state transitions */
for (t = from_state->transitions_head; NULL != t; t = t->next)
{
- if (t->to_state == to_state && t->label == label &&
+ if (t->to_state == to_state && 0 == nullstrcmp (t->label, label) &&
t->from_state == from_state)
- {
- is_dup = GNUNET_YES;
- break;
- }
+ return;
}
- if (is_dup)
- return;
+ /* sort transitions by label */
+ for (oth = from_state->transitions_head; NULL != oth; oth = oth->next)
+ {
+ if (0 < nullstrcmp (oth->label, label))
+ break;
+ }
- t = GNUNET_malloc (sizeof (struct Transition));
- t->id = ctx->transition_id++;
- t->label = label;
+ t = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Transition));
+ if (NULL != ctx)
+ t->id = ctx->transition_id++;
+ if (NULL != label)
+ t->label = GNUNET_strdup (label);
+ else
+ t->label = NULL;
t->to_state = to_state;
t->from_state = from_state;
- // Add outgoing transition to 'from_state'
+ /* Add outgoing transition to 'from_state' */
from_state->transition_count++;
- GNUNET_CONTAINER_DLL_insert (from_state->transitions_head,
- from_state->transitions_tail, t);
+ GNUNET_CONTAINER_DLL_insert_before (from_state->transitions_head,
+ from_state->transitions_tail, oth, t);
}
+
+/**
+ * Remove a 'transition' from 'state'.
+ *
+ * @param state state from which the to-be-removed transition originates.
+ * @param transition transition that should be removed from state 'state'.
+ */
+static void
+state_remove_transition (struct GNUNET_REGEX_State *state,
+ struct GNUNET_REGEX_Transition *transition)
+{
+ if (NULL == state || NULL == transition)
+ return;
+
+ if (transition->from_state != state)
+ return;
+
+ GNUNET_free_non_null (transition->label);
+
+ state->transition_count--;
+ GNUNET_CONTAINER_DLL_remove (state->transitions_head, state->transitions_tail,
+ transition);
+
+ GNUNET_free (transition);
+}
+
+
/**
* Compare two states. Used for sorting.
*
@@ -493,27 +187,26 @@ state_add_transition (struct GNUNET_REGEX_Context *ctx,
static int
state_compare (const void *a, const void *b)
{
- struct GNUNET_REGEX_State **s1;
- struct GNUNET_REGEX_State **s2;
-
- s1 = (struct GNUNET_REGEX_State **) a;
- s2 = (struct GNUNET_REGEX_State **) b;
+ struct GNUNET_REGEX_State **s1 = (struct GNUNET_REGEX_State **) a;
+ struct GNUNET_REGEX_State **s2 = (struct GNUNET_REGEX_State **) b;
return (*s1)->id - (*s2)->id;
}
+
/**
* Get all edges leaving state 's'.
*
* @param s state.
- * @param edges all edges leaving 's'.
+ * @param edges all edges leaving 's', expected to be allocated and have enough
+ * space for s->transitions_count elements.
*
* @return number of edges.
*/
static unsigned int
state_get_edges (struct GNUNET_REGEX_State *s, struct GNUNET_REGEX_Edge *edges)
{
- struct Transition *t;
+ struct GNUNET_REGEX_Transition *t;
unsigned int count;
if (NULL == s)
@@ -525,7 +218,7 @@ state_get_edges (struct GNUNET_REGEX_State *s, struct GNUNET_REGEX_Edge *edges)
{
if (NULL != t->to_state)
{
- edges[count].label = &t->label;
+ edges[count].label = t->label;
edges[count].destination = t->to_state->hash;
count++;
}
@@ -533,39 +226,37 @@ state_get_edges (struct GNUNET_REGEX_State *s, struct GNUNET_REGEX_Edge *edges)
return count;
}
+
/**
* Compare to state sets by comparing the id's of the states that are contained
* in each set. Both sets are expected to be sorted by id!
*
* @param sset1 first state set
* @param sset2 second state set
- *
- * @return an integer less than, equal to, or greater than zero
- * if the first argument is considered to be respectively
- * less than, equal to, or greater than the second.
+ * @return 0 if the sets are equal, otherwise non-zero
*/
static int
state_set_compare (struct GNUNET_REGEX_StateSet *sset1,
struct GNUNET_REGEX_StateSet *sset2)
{
int result;
- int i;
+ unsigned int i;
if (NULL == sset1 || NULL == sset2)
return 1;
- result = sset1->len - sset2->len;
-
- for (i = 0; i < sset1->len; i++)
- {
- if (0 != result)
+ result = sset1->off - sset2->off;
+ if (result < 0)
+ return -1;
+ if (result > 0)
+ return 1;
+ for (i = 0; i < sset1->off; i++)
+ if (0 != (result = state_compare (&sset1->states[i], &sset2->states[i])))
break;
-
- result = state_compare (&sset1->states[i], &sset2->states[i]);
- }
return result;
}
+
/**
* Clears the given StateSet 'set'
*
@@ -574,14 +265,11 @@ state_set_compare (struct GNUNET_REGEX_StateSet *sset1,
static void
state_set_clear (struct GNUNET_REGEX_StateSet *set)
{
- if (NULL != set)
- {
- if (NULL != set->states)
- GNUNET_free (set->states);
- GNUNET_free (set);
- }
+ GNUNET_array_grow (set->states, set->size, 0);
+ set->off = 0;
}
+
/**
* Clears an automaton fragment. Does not destroy the states inside the
* automaton.
@@ -602,6 +290,7 @@ automaton_fragment_clear (struct GNUNET_REGEX_Automaton *a)
GNUNET_free (a);
}
+
/**
* Frees the memory used by State 's'
*
@@ -610,30 +299,25 @@ automaton_fragment_clear (struct GNUNET_REGEX_Automaton *a)
static void
automaton_destroy_state (struct GNUNET_REGEX_State *s)
{
- struct Transition *t;
- struct Transition *next_t;
+ struct GNUNET_REGEX_Transition *t;
+ struct GNUNET_REGEX_Transition *next_t;
if (NULL == s)
return;
- if (NULL != s->name)
- GNUNET_free (s->name);
-
- if (NULL != s->proof)
- GNUNET_free (s->proof);
-
+ GNUNET_free_non_null (s->name);
+ GNUNET_free_non_null (s->proof);
+ state_set_clear (&s->nfa_set);
for (t = s->transitions_head; NULL != t; t = next_t)
{
next_t = t->next;
- GNUNET_CONTAINER_DLL_remove (s->transitions_head, s->transitions_tail, t);
- GNUNET_free (t);
+ state_remove_transition (s, t);
}
- state_set_clear (s->nfa_set);
-
GNUNET_free (s);
}
+
/**
* Remove a state from the given automaton 'a'. Always use this function when
* altering the states of an automaton. Will also remove all transitions leading
@@ -646,39 +330,36 @@ static void
automaton_remove_state (struct GNUNET_REGEX_Automaton *a,
struct GNUNET_REGEX_State *s)
{
- struct GNUNET_REGEX_State *ss;
struct GNUNET_REGEX_State *s_check;
- struct Transition *t_check;
+ struct GNUNET_REGEX_Transition *t_check;
+ struct GNUNET_REGEX_Transition *t_check_next;
if (NULL == a || NULL == s)
return;
- // remove state
- ss = s;
- GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s);
- a->state_count--;
-
- // remove all transitions leading to this state
+ /* remove all transitions leading to this state */
for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
{
for (t_check = s_check->transitions_head; NULL != t_check;
- t_check = t_check->next)
+ t_check = t_check_next)
{
- if (t_check->to_state == ss)
- {
- GNUNET_CONTAINER_DLL_remove (s_check->transitions_head,
- s_check->transitions_tail, t_check);
- s_check->transition_count--;
- }
+ t_check_next = t_check->next;
+ if (t_check->to_state == s)
+ state_remove_transition (s_check, t_check);
}
}
- automaton_destroy_state (ss);
+ /* remove state */
+ GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s);
+ a->state_count--;
+
+ automaton_destroy_state (s);
}
+
/**
* Merge two states into one. Will merge 's1' and 's2' into 's1' and destroy
- * 's2'.
+ * 's2'. 's1' will contain all (non-duplicate) outgoing transitions of 's2'.
*
* @param ctx context
* @param a automaton
@@ -692,48 +373,61 @@ automaton_merge_states (struct GNUNET_REGEX_Context *ctx,
struct GNUNET_REGEX_State *s2)
{
struct GNUNET_REGEX_State *s_check;
- struct Transition *t_check;
- char *new_name;
-
- GNUNET_assert (NULL != ctx && NULL != a && NULL != s1 && NULL != s2);
+ struct GNUNET_REGEX_Transition *t_check;
+ struct GNUNET_REGEX_Transition *t;
+ struct GNUNET_REGEX_Transition *t_next;
+ int is_dup;
if (s1 == s2)
return;
- // 1. Make all transitions pointing to s2 point to s1
+ /* 1. Make all transitions pointing to s2 point to s1, unless this transition
+ * does not already exists, if it already exists remove transition. */
for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
{
- for (t_check = s_check->transitions_head; NULL != t_check;
- t_check = t_check->next)
+ for (t_check = s_check->transitions_head; NULL != t_check; t_check = t_next)
{
+ t_next = t_check->next;
+
if (s2 == t_check->to_state)
- t_check->to_state = s1;
+ {
+ is_dup = GNUNET_NO;
+ for (t = t_check->from_state->transitions_head; NULL != t; t = t->next)
+ {
+ if (t->to_state == s1 && 0 == strcmp (t_check->label, t->label))
+ is_dup = GNUNET_YES;
+ }
+ if (GNUNET_NO == is_dup)
+ t_check->to_state = s1;
+ else
+ state_remove_transition (t_check->from_state, t_check);
+ }
}
}
- // 2. Add all transitions from s2 to sX to s1
+ /* 2. Add all transitions from s2 to sX to s1 */
for (t_check = s2->transitions_head; NULL != t_check; t_check = t_check->next)
{
if (t_check->to_state != s1)
state_add_transition (ctx, s1, t_check->label, t_check->to_state);
}
- // 3. Rename s1 to {s1,s2}
- new_name = GNUNET_strdup (s1->name);
- if (NULL != s1->name)
- {
- GNUNET_free (s1->name);
- s1->name = NULL;
- }
+ /* 3. Rename s1 to {s1,s2} */
+#if REGEX_DEBUG_DFA
+ char *new_name;
+
+ new_name = s1->name;
GNUNET_asprintf (&s1->name, "{%s,%s}", new_name, s2->name);
GNUNET_free (new_name);
+#endif
- // remove state
+ /* remove state */
GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s2);
a->state_count--;
automaton_destroy_state (s2);
}
+
/**
* Add a state to the automaton 'a', always use this function to alter the
* states DLL of the automaton.
@@ -749,154 +443,1316 @@ automaton_add_state (struct GNUNET_REGEX_Automaton *a,
a->state_count++;
}
-/**
- * Function that is called with each state, when traversing an automaton.
- *
- * @param cls closure
- * @param s state
- */
-typedef void (*GNUNET_REGEX_traverse_action) (void *cls,
- struct GNUNET_REGEX_State * s);
/**
- * Traverses all states that are reachable from state 's'. Expects the states to
- * be unmarked (s->marked == GNUNET_NO). Performs 'action' on each visited
- * state.
+ * Depth-first traversal (DFS) of all states that are reachable from state
+ * 's'. Performs 'action' on each visited state.
*
- * @param cls closure.
* @param s start state.
+ * @param marks an array of size a->state_count to remember which state was
+ * already visited.
+ * @param count current count of the state.
+ * @param check function that is checked before advancing on each transition
+ * in the DFS.
+ * @param check_cls closure for check.
* @param action action to be performed on each state.
+ * @param action_cls closure for action.
*/
static void
-automaton_state_traverse (void *cls, struct GNUNET_REGEX_State *s,
- GNUNET_REGEX_traverse_action action)
+automaton_state_traverse (struct GNUNET_REGEX_State *s, int *marks,
+ unsigned int *count,
+ GNUNET_REGEX_traverse_check check, void *check_cls,
+ GNUNET_REGEX_traverse_action action, void *action_cls)
{
- struct Transition *t;
+ struct GNUNET_REGEX_Transition *t;
+
+ if (GNUNET_YES == marks[s->traversal_id])
+ return;
- if (GNUNET_NO == s->marked)
+ marks[s->traversal_id] = GNUNET_YES;
+
+ if (NULL != action)
+ action (action_cls, *count, s);
+
+ (*count)++;
+
+ for (t = s->transitions_head; NULL != t; t = t->next)
{
- s->marked = GNUNET_YES;
+ if (NULL == check ||
+ (NULL != check && GNUNET_YES == check (check_cls, s, t)))
+ {
+ automaton_state_traverse (t->to_state, marks, count, check, check_cls,
+ action, action_cls);
+ }
+ }
+}
- if (action > 0)
- action (cls, s);
- for (t = s->transitions_head; NULL != t; t = t->next)
- automaton_state_traverse (cls, t->to_state, action);
+/**
+ * Traverses the given automaton using depth-first-search (DFS) from it's start
+ * state, visiting all reachable states and calling 'action' on each one of
+ * them.
+ *
+ * @param a automaton to be traversed.
+ * @param start start state, pass a->start or NULL to traverse the whole automaton.
+ * @param check function that is checked before advancing on each transition
+ * in the DFS.
+ * @param check_cls closure for check.
+ * @param action action to be performed on each state.
+ * @param action_cls closure for action
+ */
+void
+GNUNET_REGEX_automaton_traverse (const struct GNUNET_REGEX_Automaton *a,
+ struct GNUNET_REGEX_State *start,
+ GNUNET_REGEX_traverse_check check,
+ void *check_cls,
+ GNUNET_REGEX_traverse_action action,
+ void *action_cls)
+{
+ unsigned int count;
+ struct GNUNET_REGEX_State *s;
+
+ if (NULL == a || 0 == a->state_count)
+ return;
+
+ int marks[a->state_count];
+
+ for (count = 0, s = a->states_head; NULL != s && count < a->state_count;
+ s = s->next, count++)
+ {
+ s->traversal_id = count;
+ marks[s->traversal_id] = GNUNET_NO;
}
+
+ count = 0;
+
+ if (NULL == start)
+ s = a->start;
+ else
+ s = start;
+
+ automaton_state_traverse (s, marks, &count, check, check_cls, action,
+ action_cls);
}
+
/**
- * Traverses the given automaton from it's start state, visiting all reachable
- * states and calling 'action' on each one of them.
+ * String container for faster string operations.
+ */
+struct StringBuffer
+{
+ /**
+ * Buffer holding the string (may start in the middle!);
+ * NOT 0-terminated!
+ */
+ char *sbuf;
+
+ /**
+ * Allocated buffer.
+ */
+ char *abuf;
+
+ /**
+ * Length of the string in the buffer.
+ */
+ size_t slen;
+
+ /**
+ * Number of bytes allocated for 'sbuf'
+ */
+ unsigned int blen;
+
+ /**
+ * Buffer currently represents "NULL" (not the empty string!)
+ */
+ int16_t null_flag;
+
+ /**
+ * If this entry is part of the last/current generation array,
+ * this flag is GNUNET_YES if the last and current generation are
+ * identical (and thus copying is unnecessary if the value didn't
+ * change). This is used in an optimization that improves
+ * performance by about 1% --- if we use int16_t here. With just
+ * "int" for both flags, performance drops (on my system) significantly,
+ * most likely due to increased cache misses.
+ */
+ int16_t synced;
+
+};
+
+
+/**
+ * Compare two strings for equality. If either is NULL they are not equal.
*
- * @param cls closure.
- * @param a automaton.
- * @param action action to be performed on each state.
+ * @param s1 first string for comparison.
+ * @param s2 second string for comparison.
+ *
+ * @return 0 if the strings are the same or both NULL, 1 or -1 if not.
+ */
+static int
+sb_nullstrcmp (const struct StringBuffer *s1,
+ const struct StringBuffer *s2)
+{
+ if ( (GNUNET_YES == s1->null_flag) &&
+ (GNUNET_YES == s2->null_flag) )
+ return 0;
+ if ( (GNUNET_YES == s1->null_flag) ||
+ (GNUNET_YES == s2->null_flag) )
+ return -1;
+ if (s1->slen != s2->slen)
+ return -1;
+ return memcmp (s1->sbuf, s2->sbuf, s1->slen);
+}
+
+
+/**
+ * Compare two strings for equality.
+ *
+ * @param s1 first string for comparison.
+ * @param s2 second string for comparison.
+ *
+ * @return 0 if the strings are the same, 1 or -1 if not.
+ */
+static int
+sb_strcmp (const struct StringBuffer *s1,
+ const struct StringBuffer *s2)
+{
+ if (s1->slen != s2->slen)
+ return -1;
+ return memcmp (s1->sbuf, s2->sbuf, s1->slen);
+}
+
+
+/**
+ * Reallocate the buffer of 'ret' to fit 'nlen' characters;
+ * move the existing string to the beginning of the new buffer.
+ *
+ * @param ret current buffer, to be updated
+ * @param nlen target length for the buffer, must be at least ret->slen
*/
static void
-automaton_traverse (void *cls, struct GNUNET_REGEX_Automaton *a,
- GNUNET_REGEX_traverse_action action)
+sb_realloc (struct StringBuffer *ret,
+ size_t nlen)
{
- struct GNUNET_REGEX_State *s;
+ char *old;
+
+ GNUNET_assert (nlen >= ret->slen);
+ old = ret->abuf;
+ ret->abuf = GNUNET_malloc (nlen);
+ ret->blen = nlen;
+ memcpy (ret->abuf,
+ ret->sbuf,
+ ret->slen);
+ ret->sbuf = ret->abuf;
+ GNUNET_free_non_null (old);
+}
+
- for (s = a->states_head; NULL != s; s = s->next)
- s->marked = GNUNET_NO;
+/**
+ * Append a string.
+ *
+ * @param ret where to write the result
+ * @param sarg string to append
+ */
+static void
+sb_append (struct StringBuffer *ret,
+ const struct StringBuffer *sarg)
+{
+ if (GNUNET_YES == ret->null_flag)
+ ret->slen = 0;
+ ret->null_flag = GNUNET_NO;
+ if (ret->blen < sarg->slen + ret->slen)
+ sb_realloc (ret, ret->blen + sarg->slen + 128);
+ memcpy (&ret->sbuf[ret->slen],
+ sarg->sbuf,
+ sarg->slen);
+ ret->slen += sarg->slen;
+}
+
- automaton_state_traverse (cls, a->start, action);
+/**
+ * Append a C string.
+ *
+ * @param ret where to write the result
+ * @param cstr string to append
+ */
+static void
+sb_append_cstr (struct StringBuffer *ret,
+ const char *cstr)
+{
+ size_t cstr_len = strlen (cstr);
+
+ if (GNUNET_YES == ret->null_flag)
+ ret->slen = 0;
+ ret->null_flag = GNUNET_NO;
+ if (ret->blen < cstr_len + ret->slen)
+ sb_realloc (ret, ret->blen + cstr_len + 128);
+ memcpy (&ret->sbuf[ret->slen],
+ cstr,
+ cstr_len);
+ ret->slen += cstr_len;
+}
+
+
+/**
+ * Wrap a string buffer, that is, set ret to the format string
+ * which contains an "%s" which is to be replaced with the original
+ * content of 'ret'. Note that optimizing this function is not
+ * really worth it, it is rarely called.
+ *
+ * @param ret where to write the result and take the input for %.*s from
+ * @param format format string, fprintf-style, with exactly one "%.*s"
+ * @param extra_chars how long will the result be, in addition to 'sarg' length
+ */
+static void
+sb_wrap (struct StringBuffer *ret,
+ const char *format,
+ size_t extra_chars)
+{
+ char *temp;
+
+ if (GNUNET_YES == ret->null_flag)
+ ret->slen = 0;
+ ret->null_flag = GNUNET_NO;
+ temp = GNUNET_malloc (ret->slen + extra_chars + 1);
+ GNUNET_snprintf (temp,
+ ret->slen + extra_chars + 1,
+ format,
+ (int) ret->slen,
+ ret->sbuf);
+ GNUNET_free_non_null (ret->abuf);
+ ret->abuf = temp;
+ ret->sbuf = temp;
+ ret->blen = ret->slen + extra_chars + 1;
+ ret->slen = ret->slen + extra_chars;
}
+
/**
- * Reverses all transitions of the given automaton.
+ * Format a string buffer. Note that optimizing this function is not
+ * really worth it, it is rarely called.
*
- * @param a automaton.
+ * @param ret where to write the result
+ * @param format format string, fprintf-style, with exactly one "%.*s"
+ * @param extra_chars how long will the result be, in addition to 'sarg' length
+ * @param sarg string to print into the format
*/
static void
-automaton_reverse (struct GNUNET_REGEX_Automaton *a)
+sb_printf1 (struct StringBuffer *ret,
+ const char *format,
+ size_t extra_chars,
+ const struct StringBuffer *sarg)
{
- struct GNUNET_REGEX_State *s;
- struct Transition *t;
- struct Transition *t_next;
- struct GNUNET_REGEX_State *s_swp;
+ if (ret->blen < sarg->slen + extra_chars + 1)
+ sb_realloc (ret,
+ sarg->slen + extra_chars + 1);
+ ret->null_flag = GNUNET_NO;
+ ret->sbuf = ret->abuf;
+ ret->slen = sarg->slen + extra_chars;
+ GNUNET_snprintf (ret->sbuf,
+ ret->blen,
+ format,
+ (int) sarg->slen,
+ sarg->sbuf);
+}
- for (s = a->states_head; NULL != s; s = s->next)
- for (t = s->transitions_head; NULL != t; t = t->next)
- t->mark = GNUNET_NO;
- for (s = a->states_head; NULL != s; s = s->next)
+/**
+ * Format a string buffer.
+ *
+ * @param ret where to write the result
+ * @param format format string, fprintf-style, with exactly two "%.*s"
+ * @param extra_chars how long will the result be, in addition to 'sarg1/2' length
+ * @param sarg1 first string to print into the format
+ * @param sarg2 second string to print into the format
+ */
+static void
+sb_printf2 (struct StringBuffer *ret,
+ const char *format,
+ size_t extra_chars,
+ const struct StringBuffer *sarg1,
+ const struct StringBuffer *sarg2)
+{
+ if (ret->blen < sarg1->slen + sarg2->slen + extra_chars + 1)
+ sb_realloc (ret,
+ sarg1->slen + sarg2->slen + extra_chars + 1);
+ ret->null_flag = GNUNET_NO;
+ ret->slen