aboutsummaryrefslogtreecommitdiff
path: root/src/regex/test_regex_eval_api.c
diff options
context:
space:
mode:
authorBertrand Marc <beberking@gmail.com>2012-06-06 20:47:48 +0200
committerBertrand Marc <beberking@gmail.com>2012-06-06 20:47:48 +0200
commit740b30688bd745a527f96f9116c19acb3480971a (patch)
tree2709a3f4dba11c174aa9e1ba3612e30c578e76a9 /src/regex/test_regex_eval_api.c
parent2b81464a43485fcc8ce079fafdee7b7a171835f4 (diff)
Imported Upstream version 0.9.3upstream/0.9.3
Diffstat (limited to 'src/regex/test_regex_eval_api.c')
-rw-r--r--src/regex/test_regex_eval_api.c311
1 files changed, 311 insertions, 0 deletions
diff --git a/src/regex/test_regex_eval_api.c b/src/regex/test_regex_eval_api.c
new file mode 100644
index 0000000..c63e97c
--- /dev/null
+++ b/src/regex/test_regex_eval_api.c
@@ -0,0 +1,311 @@
+/*
+ This file is part of GNUnet
+ (C) 2012 Christian Grothoff (and other contributing authors)
+
+ GNUnet is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3, or (at your
+ option) any later version.
+
+ GNUnet is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GNUnet; see the file COPYING. If not, write to the
+ Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA.
+*/
+/**
+ * @file regex/test_regex_eval_api.c
+ * @brief test for regex.c
+ * @author Maximilian Szengel
+ */
+#include <regex.h>
+#include <time.h>
+#include "platform.h"
+#include "gnunet_regex_lib.h"
+
+enum Match_Result
+{
+ match = 0,
+ nomatch = 1
+};
+
+struct Regex_String_Pair
+{
+ char *regex;
+ int string_count;
+ char *strings[20];
+ enum Match_Result expected_results[20];
+};
+
+static const char allowed_literals[] =
+ "0123456789" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz";
+
+int
+test_random (unsigned int rx_length, unsigned int max_str_len,
+ unsigned int str_count)
+{
+ int i;
+ int j;
+ int rx_exp;
+ char rand_rx[rx_length + 1];
+ char matching_str[str_count][max_str_len + 1];
+ char *rand_rxp;
+ char *matching_strp;
+ int char_op_switch;
+ int last_was_op;
+ char current_char;
+ int eval;
+ int eval_check;
+ struct GNUNET_REGEX_Automaton *dfa;
+ regex_t rx;
+ regmatch_t matchptr[1];
+ char error[200];
+ int result;
+ unsigned int str_len;
+
+ // At least one string is needed for matching
+ GNUNET_assert (str_count > 0);
+ // The string should be at least as long as the regex itself
+ GNUNET_assert (max_str_len >= rx_length);
+
+ rand_rxp = rand_rx;
+ matching_strp = matching_str[0];
+ current_char = 0;
+ last_was_op = 1;
+
+ // Generate random regex and a string that matches the regex
+ for (i = 0; i < rx_length; i++)
+ {
+ char_op_switch = 0 + (int) (1.0 * rand () / (RAND_MAX + 1.0));
+
+ if (0 == char_op_switch && !last_was_op)
+ {
+ last_was_op = 1;
+ rx_exp = rand () % 4;
+
+ switch (rx_exp)
+ {
+ case 0:
+ current_char = '+';
+ break;
+ case 1:
+ current_char = '*';
+ break;
+ case 2:
+ current_char = '?';
+ break;
+ case 3:
+ if (i < rx_length - 1) // '|' cannot be at the end
+ current_char = '|';
+ else
+ current_char =
+ allowed_literals[rand () % (sizeof (allowed_literals) - 1)];
+ break;
+ }
+ }
+ else
+ {
+ current_char =
+ allowed_literals[rand () % (sizeof (allowed_literals) - 1)];
+ last_was_op = 0;
+ }
+
+ if (current_char != '+' && current_char != '*' && current_char != '?' &&
+ current_char != '|')
+ {
+ *matching_strp = current_char;
+ matching_strp++;
+ }
+
+ *rand_rxp = current_char;
+ rand_rxp++;
+ }
+ *rand_rxp = '\0';
+ *matching_strp = '\0';
+
+ // Generate some random strings for matching...
+ // Start at 1, because the first string is generated above during regex generation
+ for (i = 1; i < str_count; i++)
+ {
+ str_len = rand () % max_str_len;
+ for (j = 0; j < str_len; j++)
+ matching_str[i][j] =
+ allowed_literals[rand () % (sizeof (allowed_literals) - 1)];
+ matching_str[i][str_len] = '\0';
+ }
+
+ // Now match
+ result = 0;
+ for (i = 0; i < str_count; i++)
+ {
+ // Match string using DFA
+ dfa = GNUNET_REGEX_construct_dfa (rand_rx, strlen (rand_rx));
+ if (NULL == dfa)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Constructing DFA failed\n");
+ return -1;
+ }
+
+ eval = GNUNET_REGEX_eval (dfa, matching_str[i]);
+ GNUNET_REGEX_automaton_destroy (dfa);
+
+ // Match string using glibc regex
+ if (0 != regcomp (&rx, rand_rx, REG_EXTENDED))
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "Could not compile regex using regcomp\n");
+ return -1;
+ }
+
+ eval_check = regexec (&rx, matching_str[i], 1, matchptr, 0);
+ regfree (&rx);
+
+ // We only want to match the whole string, because that's what our DFA does, too.
+ if (eval_check == 0 &&
+ (matchptr[0].rm_so != 0 ||
+ matchptr[0].rm_eo != strlen (matching_str[i])))
+ eval_check = 1;
+
+ // compare result
+ if (eval_check != eval)
+ {
+ regerror (eval_check, &rx, error, sizeof error);
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "Unexpected result:\nregex: %s\nstring: %s\ngnunet regex: %i\nglibc regex: %i\nglibc error: %s\n\n",
+ rand_rx, matching_str, eval, eval_check, error);
+ result += 1;
+ }
+ }
+ return result;
+}
+
+int
+test_automaton (struct GNUNET_REGEX_Automaton *a, regex_t * rx,
+ struct Regex_String_Pair *rxstr)
+{
+ int result;
+ int eval;
+ int eval_check;
+ char error[200];
+ regmatch_t matchptr[1];
+ int i;
+
+ if (NULL == a)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Automaton was NULL\n");
+ return 1;
+ }
+
+ result = 0;
+
+ for (i = 0; i < rxstr->string_count; i++)
+ {
+ eval = GNUNET_REGEX_eval (a, rxstr->strings[i]);
+ eval_check = regexec (rx, rxstr->strings[i], 1, matchptr, 0);
+
+ // We only want to match the whole string, because that's what our DFA does, too.
+ if (eval_check == 0 &&
+ (matchptr[0].rm_so != 0 ||
+ matchptr[0].rm_eo != strlen (rxstr->strings[i])))
+ eval_check = 1;
+
+ if ((rxstr->expected_results[i] == match && (0 != eval || 0 != eval_check))
+ || (rxstr->expected_results[i] == nomatch &&
+ (0 == eval || 0 == eval_check)))
+ {
+ result = 1;
+ regerror (eval_check, rx, error, sizeof error);
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "Unexpected result:\nregex: %s\nstring: %s\nexpected result: %i\n"
+ "gnunet regex: %i\nglibc regex: %i\nglibc error: %s\nrm_so: %i\nrm_eo: %i\n\n",
+ rxstr->regex, rxstr->strings[i], rxstr->expected_results[i],
+ eval, eval_check, error, matchptr[0].rm_so,
+ matchptr[0].rm_eo);
+ }
+ }
+ return result;
+}
+
+int
+main (int argc, char *argv[])
+{
+ GNUNET_log_setup ("test-regex",
+#if VERBOSE
+ "DEBUG",
+#else
+ "WARNING",
+#endif
+ NULL);
+
+ struct GNUNET_REGEX_Automaton *a;
+ regex_t rx;
+ int i;
+ int check_nfa;
+ int check_dfa;
+ int check_rand;
+
+ struct Regex_String_Pair rxstr[8] = {
+ {"ab?(abcd)?", 5,
+ {"ababcd", "abab", "aabcd", "a", "abb"},
+ {match, nomatch, match, match, nomatch}},
+ {"ab(c|d)+c*(a(b|c)d)+", 5,
+ {"abcdcdcdcdddddabd", "abcd", "abcddddddccccccccccccccccccccccccabdacdabd",
+ "abccccca", "abcdcdcdccdabdabd"},
+ {match, nomatch, match, nomatch, match}},
+ {"ab+c*(a(bx|c)d)+", 5,
+ {"abcdcdcdcdddddabd", "abcd", "abcddddddccccccccccccccccccccccccabdacdabd",
+ "abccccca", "abcdcdcdccdabdabd"},
+ {nomatch, nomatch, nomatch, nomatch, nomatch}},
+ {"a+X*y+c|p|R|Z*K*y*R+w|Y*6+n+h*k*w+V*F|W*B*e*", 1,
+ {"kaXycQepRZKyRwY6nhkwVFWBegNVtLPj39XhJJ6bEifRSZRYZg"},
+ {nomatch}},
+ {"k|a+X*y+c|Q*e|p|R|Z*K*y*R+w|Y*6+n+h*k*w+V*F|W*B*e*g|N+V|t+L|P*j*3*9+X*h*J|J*6|b|E*i*f*R+S|Z|R|Y*Z|g*", 1,
+ {"kaXycQepRZKyRwY6nhkwVFWBegNVtLPj39XhJJ6bEifRSZRYZg"},
+ {nomatch}},
+ {"F?W+m+2*6*c*s|P?U?a|B|y*i+t+A|V|6*C*7*e?Z*n*i|J?5+g?W*V?7*j?p?1|r?B?C+E+3+6*i+W*P?K?0|D+7?y*m+3?g?K?", 1,
+ {"osfjsodfonONONOnosndfsdnfsd"},
+ {nomatch}},
+ {"V|M*o?x*p*d+h+b|E*m?h?Y*E*O?W*W*P+o?Z+H*M|I*q+C*a+5?5*9|b?z|G*y*k?R|p+u|8*h?B+l*H|e|L*O|1|F?v*0?5|C+", 1,
+ {"VMoxpdhbEmhYEOWWPoZHMIqCa559bzGykRpu8hBlHeLO1Fv05C"},
+ {nomatch}},
+ {"ab(c|d)+c*(a(b|c)d)+", 1,
+ {"abacd"},
+ {nomatch}}
+ };
+
+ check_nfa = 0;
+ check_dfa = 0;
+ check_rand = 0;
+
+ for (i = 0; i < 8; i++)
+ {
+ if (0 != regcomp (&rx, rxstr[i].regex, REG_EXTENDED))
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "Could not compile regex using regcomp()\n");
+ return 1;
+ }
+
+ // NFA test
+ a = GNUNET_REGEX_construct_nfa (rxstr[i].regex, strlen (rxstr[i].regex));
+ check_nfa += test_automaton (a, &rx, &rxstr[i]);
+ GNUNET_REGEX_automaton_destroy (a);
+
+ // DFA test
+ a = GNUNET_REGEX_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex));
+ check_dfa += test_automaton (a, &rx, &rxstr[i]);
+ GNUNET_REGEX_automaton_destroy (a);
+
+ regfree (&rx);
+ }
+
+ srand (time (NULL));
+ for (i = 0; i < 150; i++)
+ check_rand += test_random (150, 200, 25);
+
+ return check_nfa + check_dfa + check_rand;
+}