aboutsummaryrefslogtreecommitdiff
path: root/src/regex/regex.c
diff options
context:
space:
mode:
authorszengel <szengel@140774ce-b5e7-0310-ab8b-a85725594a96>2012-09-20 21:46:06 +0000
committerszengel <szengel@140774ce-b5e7-0310-ab8b-a85725594a96>2012-09-20 21:46:06 +0000
commit0dc00ea8713aefb73eb055947a1c3b93ec0ec1a0 (patch)
treeb5e67562a94cd3a7b3564908f222f0d6b420efbe /src/regex/regex.c
parenta284c47e8ba1225fe636c2ea6622bfb39bd2cf89 (diff)
optimizations
git-svn-id: https://gnunet.org/svn/gnunet@23927 140774ce-b5e7-0310-ab8b-a85725594a96
Diffstat (limited to 'src/regex/regex.c')
-rw-r--r--src/regex/regex.c75
1 files changed, 54 insertions, 21 deletions
diff --git a/src/regex/regex.c b/src/regex/regex.c
index d659cb949e..a5d84ce956 100644
--- a/src/regex/regex.c
+++ b/src/regex/regex.c
@@ -642,13 +642,19 @@ GNUNET_REGEX_add_multi_strides_to_dfa (struct GNUNET_REGEX_Context *regex_ctx,
const unsigned int stride_len)
{
struct GNUNET_REGEX_Strided_Context ctx = { stride_len, NULL, NULL };
+ struct GNUNET_REGEX_State *s;
+ struct GNUNET_REGEX_State *s_next;
struct GNUNET_REGEX_Transition *t;
struct GNUNET_REGEX_Transition *t_next;
if (1 > stride_len || GNUNET_YES == dfa->is_multistrided)
return;
- // Compute the new transitions.
+ // Unmark all states
+ for (s = dfa->states_head; NULL != s; s = s->next)
+ s->marked = GNUNET_NO;
+
+ // Compute the new transitions of given stride_len
GNUNET_REGEX_automaton_traverse (dfa, dfa->start, NULL, NULL,
&add_multi_strides_to_dfa, &ctx);
@@ -662,6 +668,15 @@ GNUNET_REGEX_add_multi_strides_to_dfa (struct GNUNET_REGEX_Context *regex_ctx,
GNUNET_free (t);
}
+ // Remove marked states (including their incoming and outgoing transitions)
+ for (s = dfa->states_head; NULL != s; s = s_next)
+ {
+ s_next = s->next;
+ if (GNUNET_YES == s->marked)
+ automaton_remove_state (dfa, s);
+ }
+
+ // Mark this automaton as multistrided
dfa->is_multistrided = GNUNET_YES;
}
@@ -1425,36 +1440,47 @@ dfa_state_create (struct GNUNET_REGEX_Context *ctx,
/**
- * Move from the given state 's' to the next state on transition 'label'
+ * Move from the given state 's' to the next state on transition 'str'. Consumes
+ * as much of the given 'str' as possible (usefull for strided DFAs). On return
+ * 's' will point to the next state, and the length of the substring used for
+ * this transition will be returned. If no transition possible 0 is returned and
+ * 's' points to NULL.
*
- * @param s starting state
- * @param label edge label to follow
+ * @param s starting state, will point to the next state or NULL (if no
+ * transition possible)
+ * @param str edge label to follow (will match longest common prefix)
*
- * @return new state or NULL, if transition on label not possible
+ * @return length of the substring comsumed from 'str'
*/
-static struct GNUNET_REGEX_State *
-dfa_move (struct GNUNET_REGEX_State *s, const char *label)
+static unsigned int
+dfa_move (struct GNUNET_REGEX_State **s, const char *str)
{
struct GNUNET_REGEX_Transition *t;
struct GNUNET_REGEX_State *new_s;
+ unsigned int len;
+ unsigned int max_len;
if (NULL == s)
- return NULL;
+ return 0;
new_s = NULL;
-
- for (t = s->transitions_head; NULL != t; t = t->next)
+ max_len = 0;
+ for (t = (*s)->transitions_head; NULL != t; t = t->next)
{
- // TODO: Use strstr to match substring and return number of char's that have
- // been consumed'
- if (0 == strcmp (label, t->label))
+ len = strlen (t->label);
+
+ if (0 == strncmp (t->label, str, len))
{
- new_s = t->to_state;
- break;
+ if (len >= max_len)
+ {
+ max_len = len;
+ new_s = t->to_state;
+ }
}
}
- return new_s;
+ *s = new_s;
+ return max_len;
}
/**
@@ -2142,6 +2168,14 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len)
int atomcount;
} *p;
+ if (NULL == regex || 0 == strlen (regex) || 0 == len)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "Could not parse regex. Empty regex string provided.\n");
+
+ return NULL;
+ }
+
GNUNET_REGEX_context_init (&ctx);
regexp = regex;
@@ -2439,8 +2473,8 @@ static int
evaluate_dfa (struct GNUNET_REGEX_Automaton *a, const char *string)
{
const char *strp;
- char str[2];
struct GNUNET_REGEX_State *s;
+ unsigned int step_len;
if (DFA != a->type)
{
@@ -2455,11 +2489,10 @@ evaluate_dfa (struct GNUNET_REGEX_Automaton *a, const char *string)
if ((NULL == string || 0 == strlen (string)) && s->accepting)
return 0;
- str[1] = '\0';
- for (strp = string; NULL != strp && *strp; strp++)
+ for (strp = string; NULL != strp && *strp; strp += step_len)
{
- str[0] = *strp;
- s = dfa_move (s, str);
+ step_len = dfa_move (&s, strp);
+
if (NULL == s)
break;
}