diff options
author | szengel <szengel@140774ce-b5e7-0310-ab8b-a85725594a96> | 2012-09-24 19:11:42 +0000 |
---|---|---|
committer | szengel <szengel@140774ce-b5e7-0310-ab8b-a85725594a96> | 2012-09-24 19:11:42 +0000 |
commit | a7abaced3ce7588f411ecd5feff14faccf24c928 (patch) | |
tree | 48f0ae9c9918dc0ebc45c73956b529b370c919f9 /src/regex | |
parent | 2d47c30eddc8d0033e8e2b95fe0a8606de416abf (diff) |
regex: iteration improvements/fixes
git-svn-id: https://gnunet.org/svn/gnunet@23982 140774ce-b5e7-0310-ab8b-a85725594a96
Diffstat (limited to 'src/regex')
-rw-r--r-- | src/regex/regex.c | 188 | ||||
-rw-r--r-- | src/regex/test_regex_iterate_api.c | 165 |
2 files changed, 203 insertions, 150 deletions
diff --git a/src/regex/regex.c b/src/regex/regex.c index 580e9a65f5..a4126e02dd 100644 --- a/src/regex/regex.c +++ b/src/regex/regex.c @@ -86,7 +86,6 @@ state_add_transition (struct GNUNET_REGEX_Context *ctx, struct GNUNET_REGEX_State *from_state, const char *label, struct GNUNET_REGEX_State *to_state) { - int is_dup; struct GNUNET_REGEX_Transition *t; struct GNUNET_REGEX_Transition *oth; @@ -97,20 +96,13 @@ state_add_transition (struct GNUNET_REGEX_Context *ctx, } // Do not add duplicate state transitions - is_dup = GNUNET_NO; for (t = from_state->transitions_head; NULL != t; t = t->next) { if (t->to_state == to_state && 0 == nullstrcmp (t->label, label) && t->from_state == from_state) - { - is_dup = GNUNET_YES; - break; - } + return; } - if (GNUNET_YES == is_dup) - return; - // sort transitions by label for (oth = from_state->transitions_head; NULL != oth; oth = oth->next) { @@ -151,10 +143,11 @@ state_remove_transition (struct GNUNET_REGEX_State *state, if (transition->from_state != state) return; + GNUNET_free_non_null (transition->label); + state->transition_count--; GNUNET_CONTAINER_DLL_remove (state->transitions_head, state->transitions_tail, transition); - GNUNET_free_non_null (transition->label); GNUNET_free (transition); } @@ -257,11 +250,12 @@ state_set_compare (struct GNUNET_REGEX_StateSet *sset1, static void state_set_clear (struct GNUNET_REGEX_StateSet *set) { - if (NULL != set) - { - GNUNET_free_non_null (set->states); - GNUNET_free (set); - } + if (NULL == set) + return; + + if (set->len > 0) + GNUNET_array_grow (set->states, set->len, 0); + GNUNET_free (set); } @@ -302,17 +296,14 @@ automaton_destroy_state (struct GNUNET_REGEX_State *s) GNUNET_free_non_null (s->name); GNUNET_free_non_null (s->proof); + state_set_clear (s->nfa_set); for (t = s->transitions_head; NULL != t; t = next_t) { next_t = t->next; - GNUNET_CONTAINER_DLL_remove (s->transitions_head, s->transitions_tail, t); - GNUNET_free_non_null (t->label); - GNUNET_free (t); + state_remove_transition (s, t); } - state_set_clear (s->nfa_set); - GNUNET_free (s); } @@ -329,34 +320,30 @@ static void automaton_remove_state (struct GNUNET_REGEX_Automaton *a, struct GNUNET_REGEX_State *s) { - struct GNUNET_REGEX_State *ss; struct GNUNET_REGEX_State *s_check; struct GNUNET_REGEX_Transition *t_check; + struct GNUNET_REGEX_Transition *t_check_next; if (NULL == a || NULL == s) return; - // remove state - ss = s; - GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s); - a->state_count--; - // remove all transitions leading to this state for (s_check = a->states_head; NULL != s_check; s_check = s_check->next) { for (t_check = s_check->transitions_head; NULL != t_check; - t_check = t_check->next) + t_check = t_check_next) { - if (t_check->to_state == ss) - { - GNUNET_CONTAINER_DLL_remove (s_check->transitions_head, - s_check->transitions_tail, t_check); - s_check->transition_count--; - } + t_check_next = t_check->next; + if (t_check->to_state == s) + state_remove_transition (s_check, t_check); } } - automaton_destroy_state (ss); + // remove state + GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s); + a->state_count--; + + automaton_destroy_state (s); } @@ -1703,8 +1690,6 @@ dfa_compress_paths_helper (struct GNUNET_REGEX_State *start, t->from_state = start; GNUNET_CONTAINER_DLL_insert (*transitions_head, *transitions_tail, t); - GNUNET_free_non_null (label); - if (GNUNET_NO == cur->marked) { dfa_compress_paths_helper (cur, cur, NULL, transitions_head, @@ -1733,6 +1718,7 @@ dfa_compress_paths_helper (struct GNUNET_REGEX_State *start, dfa_compress_paths_helper (start, t->to_state, new_label, transitions_head, transitions_tail); } + GNUNET_free (new_label); } } @@ -2563,11 +2549,11 @@ GNUNET_REGEX_automaton_destroy (struct GNUNET_REGEX_Automaton *a) GNUNET_free_non_null (a->regex); GNUNET_free_non_null (a->canonical_regex); - for (s = a->states_head; NULL != s;) + for (s = a->states_head; NULL != s; s = next_state) { next_state = s->next; + GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s); automaton_destroy_state (s); - s = next_state; } GNUNET_free (a); @@ -2815,7 +2801,6 @@ GNUNET_REGEX_check_proof (const char *proof, const struct GNUNET_HashCode *key) * * @param min_len minimum length of the path in the graph. * @param max_len maximum length of the path in the graph. - * @param cur_len current length of the path already traversed. * @param consumed_string string consumed by traversing the graph till this state. * @param state current state of the automaton. * @param iterator iterator function called for each edge. @@ -2823,8 +2808,7 @@ GNUNET_REGEX_check_proof (const char *proof, const struct GNUNET_HashCode *key) */ static void iterate_initial_edge (const unsigned int min_len, const unsigned int max_len, - unsigned int cur_len, char *consumed_string, - struct GNUNET_REGEX_State *state, + char *consumed_string, struct GNUNET_REGEX_State *state, GNUNET_REGEX_KeyIterator iterator, void *iterator_cls) { unsigned int i; @@ -2834,22 +2818,56 @@ iterate_initial_edge (const unsigned int min_len, const unsigned int max_len, struct GNUNET_REGEX_Edge edges[num_edges]; struct GNUNET_HashCode hash; - if (cur_len > min_len && NULL != consumed_string && cur_len <= max_len) + unsigned int cur_len; + + if (NULL != consumed_string) + cur_len = strlen (consumed_string); + else + cur_len = 0; + + if (cur_len > min_len && NULL != consumed_string) { - for (i = 0, t = state->transitions_head; NULL != t; t = t->next, i++) + + if (cur_len <= max_len) { - edges[i].label = t->label; - edges[i].destination = t->to_state->hash; - } + for (i = 0, t = state->transitions_head; NULL != t && i < num_edges; + t = t->next, i++) + { + edges[i].label = t->label; + edges[i].destination = t->to_state->hash; + } - GNUNET_CRYPTO_hash (consumed_string, strlen (consumed_string), &hash); - iterator (iterator_cls, &hash, consumed_string, state->accepting, num_edges, - edges); + GNUNET_CRYPTO_hash (consumed_string, strlen (consumed_string), &hash); + iterator (iterator_cls, &hash, consumed_string, state->accepting, + num_edges, edges); + + // Special case for regex consisting of just a string that is shorter than max_len + if (GNUNET_YES == state->accepting && cur_len > 1 && + state->transition_count < 1) + { + edges[0].label = &consumed_string[cur_len - 1]; + edges[0].destination = state->hash; + temp = GNUNET_strdup (consumed_string); + temp[cur_len - 1] = '\0'; + GNUNET_CRYPTO_hash (temp, cur_len - 1, &hash); + iterator (iterator_cls, &hash, temp, GNUNET_NO, 1, edges); + GNUNET_free (temp); + } + } + else + { + edges[0].label = &consumed_string[max_len]; + edges[0].destination = state->hash; + temp = GNUNET_strdup (consumed_string); + temp[max_len] = '\0'; + GNUNET_CRYPTO_hash (temp, max_len, &hash); + iterator (iterator_cls, &hash, temp, GNUNET_NO, 1, edges); + GNUNET_free (temp); + } } if (cur_len < max_len) { - cur_len++; for (t = state->transitions_head; NULL != t; t = t->next) { if (NULL != consumed_string) @@ -2857,8 +2875,8 @@ iterate_initial_edge (const unsigned int min_len, const unsigned int max_len, else GNUNET_asprintf (&temp, "%s", t->label); - iterate_initial_edge (min_len, max_len, cur_len, temp, t->to_state, - iterator, iterator_cls); + iterate_initial_edge (min_len, max_len, temp, t->to_state, iterator, + iterator_cls); GNUNET_free (temp); } } @@ -2866,69 +2884,8 @@ iterate_initial_edge (const unsigned int min_len, const unsigned int max_len, /** - * Iterate over all initial edges that aren't actually part of the automaton. - * This is needed to find the initial states returned by - * GNUNET_REGEX_get_first_key. Iteration will start at the first state that has - * more than one outgoing edge, i.e. the state that branches the graph. - * For example consider the following graph: - * a -> b -> c -> d -> ... - * \-> e -> ... - * - * This function will not iterate over the edges leading to "c", because these - * will be covered by the iterate_edges function. - * - * @param a the automaton for which the initial states should be computed. - * @param initial_len length of the initial state string. - * @param iterator iterator function called for each edge. - * @param iterator_cls closure for the iterator function. - */ -void -iterate_initial_edges (struct GNUNET_REGEX_Automaton *a, - const unsigned int initial_len, - GNUNET_REGEX_KeyIterator iterator, void *iterator_cls) -{ - char *consumed_string; - char *temp; - struct GNUNET_REGEX_State *s; - unsigned int cur_len; - - if (1 > initial_len) - return; - - consumed_string = NULL; - s = a->start; - cur_len = 0; - - if (1 == s->transition_count) - { - do - { - if (NULL != consumed_string) - { - temp = consumed_string; - GNUNET_asprintf (&consumed_string, "%s%s", consumed_string, - s->transitions_head->label); - GNUNET_free (temp); - } - else - GNUNET_asprintf (&consumed_string, "%s", s->transitions_head->label); - - s = s->transitions_head->to_state; - cur_len += strlen (s->transitions_head->label); - } - while (cur_len < initial_len && 1 == s->transition_count); - } - - iterate_initial_edge (cur_len, initial_len, cur_len, consumed_string, s, - iterator, iterator_cls); - - GNUNET_free_non_null (consumed_string); -} - - -/** * Iterate over all edges helper function starting from state 's', calling - * iterator function for each edge. + * iterator function for each edge if the automaton. * * @param s state. * @param iterator iterator function called for each edge. @@ -2976,6 +2933,7 @@ GNUNET_REGEX_iterate_all_edges (struct GNUNET_REGEX_Automaton *a, for (s = a->states_head; NULL != s; s = s->next) s->marked = GNUNET_NO; - iterate_initial_edges (a, INITIAL_BITS, iterator, iterator_cls); + iterate_initial_edge (0, INITIAL_BITS, NULL, a->start, iterator, + iterator_cls); iterate_edge (a->start, iterator, iterator_cls); } diff --git a/src/regex/test_regex_iterate_api.c b/src/regex/test_regex_iterate_api.c index b8f3cd2668..84bb6e9fbf 100644 --- a/src/regex/test_regex_iterate_api.c +++ b/src/regex/test_regex_iterate_api.c @@ -28,13 +28,25 @@ #include "gnunet_regex_lib.h" #include "regex_internal.h" +#define GNUNET_REGEX_ITERATE_SAVE_DEBUG_GRAPH GNUNET_NO + static unsigned int transition_counter; struct IteratorContext { int error; int should_save_graph; - FILE *graph_file; + FILE *graph_filep; + unsigned int string_count; + char *const *strings; + unsigned int match_count; +}; + +struct RegexStringPair +{ + char *regex; + unsigned int string_count; + char *strings[20]; }; void @@ -44,21 +56,41 @@ key_iterator (void *cls, const struct GNUNET_HashCode *key, const char *proof, { unsigned int i; struct IteratorContext *ctx = cls; + char *out_str; + char *state_id = GNUNET_strdup (GNUNET_h2s (key)); - GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating... (accepting: %i)\n", - accepting); + if (GNUNET_YES == ctx->should_save_graph) + { + if (GNUNET_YES == accepting) + GNUNET_asprintf (&out_str, "\"%s\" [shape=doublecircle]\n", state_id); + else + GNUNET_asprintf (&out_str, "\"%s\" [shape=circle]\n", state_id); + fwrite (out_str, strlen (out_str), 1, ctx->graph_filep); + GNUNET_free (out_str); + + for (i = 0; i < num_edges; i++) + { + transition_counter++; + GNUNET_asprintf (&out_str, "\"%s\" -> \"%s\" [label = \"%s (%s)\"]\n", + state_id, GNUNET_h2s (&edges[i].destination), + edges[i].label, proof); + fwrite (out_str, strlen (out_str), 1, ctx->graph_filep); - if (NULL != proof) - GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Proof: %s\n", proof); + GNUNET_free (out_str); + } + } + else + { + for (i = 0; i < num_edges; i++) + transition_counter++; + } - if (NULL != key) - GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Hash: %s\n", GNUNET_h2s (key)); + GNUNET_free (state_id); - for (i = 0; i < num_edges; i++) + for (i = 0; i < ctx->string_count; i++) { - transition_counter++; - GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Edge %i: Label: %s Destination: %s\n", - i, edges[i].label, GNUNET_h2s (&edges[i].destination)); + if (0 == strcmp (proof, ctx->strings[i])) + ctx->match_count++; } ctx->error += (GNUNET_OK == GNUNET_REGEX_check_proof (proof, key)) ? 0 : 1; @@ -80,49 +112,112 @@ main (int argc, char *argv[]) unsigned int i; unsigned int num_transitions; struct IteratorContext ctx = { 0, 0, NULL }; + char *filename = NULL; error = 0; - const char *regex[17] = { - "ab(c|d)+c*(a(b|c)+d)+(bla)+", - "(bla)*", - "b(lab)*la", - "(ab)*", - "ab(c|d)+c*(a(b|c)+d)+(bla)(bla)*", - "z(abc|def)?xyz", - "1*0(0|1)*", - "a*b*", - "a+X*y+c|p|R|Z*K*y*R+w|Y*6+n+h*k*w+V*F|W*B*e*", - "abcd:(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1):(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)", - "abc(1|0)*def", - "ab|ac", - "(ab)(ab)*", - "ab|cd|ef|gh", - "a|b|c|d|e|f|g", - "(ab)|(ac)", - "x*|(0|1|2)(a|b|c|d)" + const struct RegexStringPair rxstr[10] = { + {"ab(c|d)+c*(a(b|c)+d)+(bla)+", 2, {"abcdcdca", "abcabdbl"}}, + {"abcdefghijklmnop*qst", 1, {"abcdefgh"}}, + {"VPN-4-1(0|1)*", 2, {"VPN-4-10", "VPN-4-11"}}, + {"a+X*y+c|p|R|Z*K*y*R+w|Y*6+n+h*k*w+V*F|W*B*e*", 4, + {"aaaaaaaa", "aaXXyyyc", "p", "Y"}}, + {"a*", 8, + {"a", "aa", "aaa", "aaaa", "aaaaa", "aaaaaa", "aaaaaaa", "aaaaaaaa"}}, + {"xzxzxzxzxz", 1, {"xzxzxzxz"}}, + {"xyz*", 2, {"xy", "xyz"}}, + {"ab", 1, {"a"}}, + {"abcd:(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1):(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)", 2, {"abcd:000", "abcd:101"}}, + {"x*|(0|1|2)(a|b|c|d)", 2, {"xxxxxxxx", "0a"}} }; - for (i = 0; i < 17; i++) + const char *graph_start_str = "digraph G {\nrankdir=LR\n"; + const char *graph_end_str = "\n}\n"; + + for (i = 0; i < 10; i++) { + // Create graph + if (GNUNET_YES == GNUNET_REGEX_ITERATE_SAVE_DEBUG_GRAPH) + { + GNUNET_asprintf (&filename, "iteration_graph_%u.dot", i); + ctx.graph_filep = fopen (filename, "w"); + if (NULL == ctx.graph_filep) + { + GNUNET_log (GNUNET_ERROR_TYPE_WARNING, + "Could not open file %s for saving iteration graph.\n", + filename); + ctx.should_save_graph = GNUNET_NO; + } + else + { + ctx.should_save_graph = GNUNET_YES; + fwrite (graph_start_str, strlen (graph_start_str), 1, ctx.graph_filep); + } + GNUNET_free (filename); + } + else + { + ctx.should_save_graph = GNUNET_NO; + } + + // Iterate over DFA edges transition_counter = 0; - dfa = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i])); + ctx.string_count = rxstr[i].string_count; + ctx.strings = rxstr[i].strings; + ctx.match_count = 0; + dfa = GNUNET_REGEX_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex)); GNUNET_REGEX_iterate_all_edges (dfa, key_iterator, &ctx); num_transitions = GNUNET_REGEX_get_transition_count (dfa); - if (transition_counter != num_transitions) + + if (transition_counter < num_transitions) { - GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, + GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Automaton has %d transitions, iterated over %d transitions\n", num_transitions, transition_counter); + error += 1; + break; + } + + if (ctx.match_count < ctx.string_count) + { + GNUNET_log (GNUNET_ERROR_TYPE_ERROR, + "Missing initial states for regex %s\n", rxstr[i].regex); + error += (ctx.string_count - ctx.match_count); } + else if (ctx.match_count > ctx.string_count) + { + GNUNET_log (GNUNET_ERROR_TYPE_ERROR, + "Doublicate initial transitions for regex %s\n", + rxstr[i].regex); + error += (ctx.string_count - ctx.match_count); + } + GNUNET_REGEX_automaton_destroy (dfa); + + // Finish graph + if (GNUNET_YES == ctx.should_save_graph) + { + fwrite (graph_end_str, strlen (graph_end_str), 1, ctx.graph_filep); + fclose (ctx.graph_filep); + ctx.graph_filep = NULL; + ctx.should_save_graph = GNUNET_NO; + } } - for (i = 0; i < 17; i++) + + for (i = 0; i < 10; i++) { - dfa = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i])); + dfa = GNUNET_REGEX_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex)); GNUNET_REGEX_dfa_add_multi_strides (NULL, dfa, 2); GNUNET_REGEX_iterate_all_edges (dfa, key_iterator, &ctx); + + if (ctx.match_count < ctx.string_count) + { + GNUNET_log (GNUNET_ERROR_TYPE_ERROR, + "Missing initial states for regex %s\n", rxstr[i].regex); + error += (ctx.string_count - ctx.match_count); + } + GNUNET_REGEX_automaton_destroy (dfa); } |