aboutsummaryrefslogtreecommitdiff
path: root/src/regex
diff options
context:
space:
mode:
Diffstat (limited to 'src/regex')
-rw-r--r--src/regex/Makefile.am155
-rw-r--r--src/regex/Makefile.in501
-rw-r--r--src/regex/gnunet-daemon-regexprofiler.c472
-rw-r--r--src/regex/gnunet-regex-profiler.c1889
-rw-r--r--src/regex/gnunet-regex-simulation-profiler.c711
-rw-r--r--src/regex/perf-regex.c87
-rw-r--r--src/regex/plugin_block_regex.c256
-rw-r--r--src/regex/regex.c3373
-rw-r--r--src/regex/regex_block_lib.c210
-rw-r--r--src/regex/regex_block_lib.h98
-rw-r--r--src/regex/regex_dht.c790
-rw-r--r--src/regex/regex_graph.c317
-rw-r--r--src/regex/regex_internal.h484
-rw-r--r--src/regex/regex_random.c170
-rw-r--r--src/regex/regex_simulation_profiler_test.conf7
-rw-r--r--src/regex/regex_test_lib.c291
-rw-r--r--src/regex/regex_test_lib.h80
-rw-r--r--src/regex/test_regex_eval_api.c284
-rw-r--r--src/regex/test_regex_graph_api.c157
-rw-r--r--src/regex/test_regex_iptoregex.c103
-rw-r--r--src/regex/test_regex_iterate_api.c229
-rw-r--r--src/regex/test_regex_proofs.c171
22 files changed, 9584 insertions, 1251 deletions
diff --git a/src/regex/Makefile.am b/src/regex/Makefile.am
index 8c73c60..1203897 100644
--- a/src/regex/Makefile.am
+++ b/src/regex/Makefile.am
@@ -8,35 +8,158 @@ if USE_COVERAGE
AM_CFLAGS = --coverage
endif
-lib_LTLIBRARIES = libgnunetregex.la
+lib_LTLIBRARIES = libgnunetregexblock.la \
+ libgnunetregex.la
+
+
+libgnunetregexblock_la_SOURCES = \
+ regex_block_lib.c regex_block_lib.h
+libgnunetregexblock_la_LIBADD = \
+ $(top_builddir)/src/util/libgnunetutil.la \
+ $(XLIB) \
+ $(LTLIBINTL)
+libgnunetregexblock_la_LDFLAGS = \
+ $(GN_LIB_LDFLAGS) $(WINFLAGS) \
+ -version-info 1:0:0
libgnunetregex_la_SOURCES = \
- regex.c
+ regex_internal.h regex.c \
+ regex_graph.c regex_random.c \
+ regex_dht.c
libgnunetregex_la_LIBADD = -lm \
- $(top_builddir)/src/util/libgnunetutil.la
+ $(top_builddir)/src/util/libgnunetutil.la \
+ $(top_builddir)/src/dht/libgnunetdht.la \
+ $(top_builddir)/src/statistics/libgnunetstatistics.la \
+ $(top_builddir)/src/regex/libgnunetregexblock.la
+libgnunetregex_la_DEPENDENCIES = \
+ libgnunetregexblock.la
libgnunetregex_la_LDFLAGS = \
- $(GN_LIB_LDFLAGS) \
- -version-info 0:0:0
+ $(GN_LIB_LDFLAGS) \
+ -version-info 2:0:1
+
+
+plugindir = $(libdir)/gnunet
+
+plugin_LTLIBRARIES = \
+ libgnunet_plugin_block_regex.la
+
+libgnunet_plugin_block_regex_la_SOURCES = \
+ plugin_block_regex.c
+libgnunet_plugin_block_regex_la_LIBADD = \
+ $(top_builddir)/src/regex/libgnunetregexblock.la \
+ $(top_builddir)/src/block/libgnunetblock.la \
+ $(top_builddir)/src/util/libgnunetutil.la
+libgnunet_plugin_block_regex_la_LDFLAGS = \
+ $(GN_PLUGIN_LDFLAGS)
+libgnunet_plugin_block_regex_la_DEPENDENCIES = \
+ libgnunetregexblock.la
+
+if HAVE_MYSQL
+noinst_mysql_progs = \
+gnunet-regex-simulation-profiler
+
+gnunet_regex_simulation_profiler_SOURCES = \
+ gnunet-regex-simulation-profiler.c
+gnunet_regex_simulation_profiler_LDADD = \
+ $(top_builddir)/src/util/libgnunetutil.la \
+ $(top_builddir)/src/regex/libgnunetregex.la \
+ $(top_builddir)/src/mysql/libgnunetmysql.la
+gnunet_regex_simulation_profiler_DEPENDENCIES = \
+ libgnunetregex.la
+endif
+
+noinst_LTLIBRARIES = libgnunetregextest.la
+
+libgnunetregextest_la_SOURCES = \
+ regex_test_lib.c regex_test_lib.h
+libgnunetregextest_la_LIBADD = \
+ $(top_builddir)/src/util/libgnunetutil.la \
+ $(top_builddir)/src/regex/libgnunetregex.la
+libgnunetregextest_la_DEPENDENCIES = \
+ libgnunetregex.la
+
+
+noinst_PROGRAMS = $(noinst_mysql_progs) \
+ perf-regex \
+ gnunet-regex-profiler \
+ gnunet-daemon-regexprofiler
+
+perf_regex_SOURCES = \
+ perf-regex.c
+perf_regex_LDADD = \
+ $(top_builddir)/src/util/libgnunetutil.la \
+ $(top_builddir)/src/regex/libgnunetregex.la \
+ $(top_builddir)/src/regex/libgnunetregextest.la
+perf_regex_DEPENDENCIES = \
+ libgnunetregex.la \
+ libgnunetregextest.la
+
+gnunet_regex_profiler_SOURCES = \
+ gnunet-regex-profiler.c
+gnunet_regex_profiler_LDADD = \
+ $(top_builddir)/src/util/libgnunetutil.la \
+ $(top_builddir)/src/dht/libgnunetdht.la \
+ $(top_builddir)/src/testbed/libgnunettestbed.la \
+ $(top_builddir)/src/regex/libgnunetregex.la \
+ $(top_builddir)/src/statistics/libgnunetstatistics.la
+gnunet_regex_profiler_DEPENDENCIES = \
+ $(top_builddir)/src/dht/libgnunetdht.la \
+ libgnunetregex.la
+
+gnunet_daemon_regexprofiler_SOURCES = \
+ gnunet-daemon-regexprofiler.c
+gnunet_daemon_regexprofiler_LDADD = \
+ $(top_builddir)/src/util/libgnunetutil.la \
+ $(top_builddir)/src/dht/libgnunetdht.la \
+ $(top_builddir)/src/regex/libgnunetregex.la \
+ $(top_builddir)/src/statistics/libgnunetstatistics.la
+gnunet_daemon_regexprofiler_DEPENDENCIES = \
+ $(top_builddir)/src/dht/libgnunetdht.la \
+ libgnunetregex.la
+
+
check_PROGRAMS = \
- test_regex_eval_api \
- test_regex_iterate_api
+ test_regex_eval_api \
+ test_regex_iterate_api \
+ test_regex_proofs \
+ test_regex_graph_api \
+ test_regex_iptoregex
if ENABLE_TEST_RUN
-TESTS = $(check_PROGRAMS)
+ TESTS = $(check_PROGRAMS)
endif
test_regex_eval_api_SOURCES = \
- test_regex_eval_api.c
+ test_regex_eval_api.c
test_regex_eval_api_LDADD = \
- $(top_builddir)/src/regex/libgnunetregex.la \
- $(top_builddir)/src/util/libgnunetutil.la
+ $(top_builddir)/src/regex/libgnunetregex.la \
+ $(top_builddir)/src/util/libgnunetutil.la
test_regex_iterate_api_SOURCES = \
- test_regex_iterate_api.c
+ test_regex_iterate_api.c
test_regex_iterate_api_LDADD = \
- $(top_builddir)/src/regex/libgnunetregex.la \
- $(top_builddir)/src/util/libgnunetutil.la
+ $(top_builddir)/src/regex/libgnunetregex.la \
+ $(top_builddir)/src/util/libgnunetutil.la
+
+test_regex_proofs_SOURCES = \
+ test_regex_proofs.c
+test_regex_proofs_LDADD = \
+ $(top_builddir)/src/regex/libgnunetregex.la \
+ $(top_builddir)/src/util/libgnunetutil.la
+
+test_regex_graph_api_SOURCES = \
+ test_regex_graph_api.c
+test_regex_graph_api_LDADD = \
+ $(top_builddir)/src/regex/libgnunetregex.la \
+ $(top_builddir)/src/util/libgnunetutil.la
+
+test_regex_iptoregex_SOURCES = \
+ test_regex_iptoregex.c
+test_regex_iptoregex_LDADD = \
+ $(top_builddir)/src/util/libgnunetutil.la \
+ $(top_builddir)/src/regex/libgnunetregex.la
+
-EXTRA_DIST =
-# test_regex_data.conf
+EXTRA_DIST = \
+ regex_simulation_profiler_test.conf
diff --git a/src/regex/Makefile.in b/src/regex/Makefile.in
index c0c0704..fffad51 100644
--- a/src/regex/Makefile.in
+++ b/src/regex/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -15,7 +15,25 @@
@SET_MAKE@
+
VPATH = @srcdir@
+am__make_dryrun = \
+ { \
+ am__dry=no; \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
+ | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+ *) \
+ for am__flg in $$MAKEFLAGS; do \
+ case $$am__flg in \
+ *=*|--*) ;; \
+ *n*) am__dry=yes; break;; \
+ esac; \
+ done;; \
+ esac; \
+ test $$am__dry = yes; \
+ }
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
@@ -35,21 +53,26 @@ POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
target_triplet = @target@
+noinst_PROGRAMS = $(am__EXEEXT_1) perf-regex$(EXEEXT) \
+ gnunet-regex-profiler$(EXEEXT) \
+ gnunet-daemon-regexprofiler$(EXEEXT)
check_PROGRAMS = test_regex_eval_api$(EXEEXT) \
- test_regex_iterate_api$(EXEEXT)
+ test_regex_iterate_api$(EXEEXT) test_regex_proofs$(EXEEXT) \
+ test_regex_graph_api$(EXEEXT) test_regex_iptoregex$(EXEEXT)
subdir = src/regex
DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/m4/absolute-header.m4 \
$(top_srcdir)/m4/align.m4 $(top_srcdir)/m4/argz.m4 \
- $(top_srcdir)/m4/gettext.m4 $(top_srcdir)/m4/iconv.m4 \
- $(top_srcdir)/m4/lib-ld.m4 $(top_srcdir)/m4/lib-link.m4 \
- $(top_srcdir)/m4/lib-prefix.m4 $(top_srcdir)/m4/libcurl.m4 \
- $(top_srcdir)/m4/libgcrypt.m4 $(top_srcdir)/m4/libtool.m4 \
- $(top_srcdir)/m4/libunistring.m4 $(top_srcdir)/m4/ltdl.m4 \
- $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
- $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
- $(top_srcdir)/m4/nls.m4 $(top_srcdir)/m4/po.m4 \
+ $(top_srcdir)/m4/gettext.m4 $(top_srcdir)/m4/glib-2.0.m4 \
+ $(top_srcdir)/m4/iconv.m4 $(top_srcdir)/m4/lib-ld.m4 \
+ $(top_srcdir)/m4/lib-link.m4 $(top_srcdir)/m4/lib-prefix.m4 \
+ $(top_srcdir)/m4/libcurl.m4 $(top_srcdir)/m4/libgcrypt.m4 \
+ $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/libunistring.m4 \
+ $(top_srcdir)/m4/ltdl.m4 $(top_srcdir)/m4/ltoptions.m4 \
+ $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+ $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/m4/nls.m4 \
+ $(top_srcdir)/m4/pkg.m4 $(top_srcdir)/m4/po.m4 \
$(top_srcdir)/m4/progtest.m4 $(top_srcdir)/acinclude.m4 \
$(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
@@ -79,29 +102,86 @@ am__nobase_list = $(am__nobase_strip_setup); \
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
-am__installdirs = "$(DESTDIR)$(libdir)"
-LTLIBRARIES = $(lib_LTLIBRARIES)
-libgnunetregex_la_DEPENDENCIES = \
- $(top_builddir)/src/util/libgnunetutil.la
-am_libgnunetregex_la_OBJECTS = regex.lo
-libgnunetregex_la_OBJECTS = $(am_libgnunetregex_la_OBJECTS)
-AM_V_lt = $(am__v_lt_$(V))
-am__v_lt_ = $(am__v_lt_$(AM_DEFAULT_VERBOSITY))
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
+am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(plugindir)"
+LTLIBRARIES = $(lib_LTLIBRARIES) $(noinst_LTLIBRARIES) \
+ $(plugin_LTLIBRARIES)
+am_libgnunet_plugin_block_regex_la_OBJECTS = plugin_block_regex.lo
+libgnunet_plugin_block_regex_la_OBJECTS = \
+ $(am_libgnunet_plugin_block_regex_la_OBJECTS)
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
am__v_lt_0 = --silent
+libgnunet_plugin_block_regex_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \
+ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \
+ $(AM_CFLAGS) $(CFLAGS) \
+ $(libgnunet_plugin_block_regex_la_LDFLAGS) $(LDFLAGS) -o $@
+am_libgnunetregex_la_OBJECTS = regex.lo regex_graph.lo regex_random.lo \
+ regex_dht.lo
+libgnunetregex_la_OBJECTS = $(am_libgnunetregex_la_OBJECTS)
libgnunetregex_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \
$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \
$(AM_CFLAGS) $(CFLAGS) $(libgnunetregex_la_LDFLAGS) $(LDFLAGS) \
-o $@
+am__DEPENDENCIES_1 =
+libgnunetregexblock_la_DEPENDENCIES = \
+ $(top_builddir)/src/util/libgnunetutil.la \
+ $(am__DEPENDENCIES_1)
+am_libgnunetregexblock_la_OBJECTS = regex_block_lib.lo
+libgnunetregexblock_la_OBJECTS = $(am_libgnunetregexblock_la_OBJECTS)
+libgnunetregexblock_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \
+ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \
+ $(AM_CFLAGS) $(CFLAGS) $(libgnunetregexblock_la_LDFLAGS) \
+ $(LDFLAGS) -o $@
+am_libgnunetregextest_la_OBJECTS = regex_test_lib.lo
+libgnunetregextest_la_OBJECTS = $(am_libgnunetregextest_la_OBJECTS)
+@HAVE_MYSQL_TRUE@am__EXEEXT_1 = \
+@HAVE_MYSQL_TRUE@ gnunet-regex-simulation-profiler$(EXEEXT)
+PROGRAMS = $(noinst_PROGRAMS)
+am_gnunet_daemon_regexprofiler_OBJECTS = \
+ gnunet-daemon-regexprofiler.$(OBJEXT)
+gnunet_daemon_regexprofiler_OBJECTS = \
+ $(am_gnunet_daemon_regexprofiler_OBJECTS)
+am_gnunet_regex_profiler_OBJECTS = gnunet-regex-profiler.$(OBJEXT)
+gnunet_regex_profiler_OBJECTS = $(am_gnunet_regex_profiler_OBJECTS)
+am__gnunet_regex_simulation_profiler_SOURCES_DIST = \
+ gnunet-regex-simulation-profiler.c
+@HAVE_MYSQL_TRUE@am_gnunet_regex_simulation_profiler_OBJECTS = \
+@HAVE_MYSQL_TRUE@ gnunet-regex-simulation-profiler.$(OBJEXT)
+gnunet_regex_simulation_profiler_OBJECTS = \
+ $(am_gnunet_regex_simulation_profiler_OBJECTS)
+am_perf_regex_OBJECTS = perf-regex.$(OBJEXT)
+perf_regex_OBJECTS = $(am_perf_regex_OBJECTS)
am_test_regex_eval_api_OBJECTS = test_regex_eval_api.$(OBJEXT)
test_regex_eval_api_OBJECTS = $(am_test_regex_eval_api_OBJECTS)
test_regex_eval_api_DEPENDENCIES = \
$(top_builddir)/src/regex/libgnunetregex.la \
$(top_builddir)/src/util/libgnunetutil.la
+am_test_regex_graph_api_OBJECTS = test_regex_graph_api.$(OBJEXT)
+test_regex_graph_api_OBJECTS = $(am_test_regex_graph_api_OBJECTS)
+test_regex_graph_api_DEPENDENCIES = \
+ $(top_builddir)/src/regex/libgnunetregex.la \
+ $(top_builddir)/src/util/libgnunetutil.la
+am_test_regex_iptoregex_OBJECTS = test_regex_iptoregex.$(OBJEXT)
+test_regex_iptoregex_OBJECTS = $(am_test_regex_iptoregex_OBJECTS)
+test_regex_iptoregex_DEPENDENCIES = \
+ $(top_builddir)/src/util/libgnunetutil.la \
+ $(top_builddir)/src/regex/libgnunetregex.la
am_test_regex_iterate_api_OBJECTS = test_regex_iterate_api.$(OBJEXT)
test_regex_iterate_api_OBJECTS = $(am_test_regex_iterate_api_OBJECTS)
test_regex_iterate_api_DEPENDENCIES = \
$(top_builddir)/src/regex/libgnunetregex.la \
$(top_builddir)/src/util/libgnunetutil.la
+am_test_regex_proofs_OBJECTS = test_regex_proofs.$(OBJEXT)
+test_regex_proofs_OBJECTS = $(am_test_regex_proofs_OBJECTS)
+test_regex_proofs_DEPENDENCIES = \
+ $(top_builddir)/src/regex/libgnunetregex.la \
+ $(top_builddir)/src/util/libgnunetutil.la
DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
depcomp = $(SHELL) $(top_srcdir)/depcomp
am__depfiles_maybe = depfiles
@@ -112,27 +192,47 @@ LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
$(AM_CFLAGS) $(CFLAGS)
-AM_V_CC = $(am__v_CC_$(V))
-am__v_CC_ = $(am__v_CC_$(AM_DEFAULT_VERBOSITY))
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
am__v_CC_0 = @echo " CC " $@;
-AM_V_at = $(am__v_at_$(V))
-am__v_at_ = $(am__v_at_$(AM_DEFAULT_VERBOSITY))
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
am__v_at_0 = @
CCLD = $(CC)
LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
$(AM_LDFLAGS) $(LDFLAGS) -o $@
-AM_V_CCLD = $(am__v_CCLD_$(V))
-am__v_CCLD_ = $(am__v_CCLD_$(AM_DEFAULT_VERBOSITY))
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
am__v_CCLD_0 = @echo " CCLD " $@;
-AM_V_GEN = $(am__v_GEN_$(V))
-am__v_GEN_ = $(am__v_GEN_$(AM_DEFAULT_VERBOSITY))
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
am__v_GEN_0 = @echo " GEN " $@;
-SOURCES = $(libgnunetregex_la_SOURCES) $(test_regex_eval_api_SOURCES) \
- $(test_regex_iterate_api_SOURCES)
-DIST_SOURCES = $(libgnunetregex_la_SOURCES) \
- $(test_regex_eval_api_SOURCES) \
- $(test_regex_iterate_api_SOURCES)
+SOURCES = $(libgnunet_plugin_block_regex_la_SOURCES) \
+ $(libgnunetregex_la_SOURCES) $(libgnunetregexblock_la_SOURCES) \
+ $(libgnunetregextest_la_SOURCES) \
+ $(gnunet_daemon_regexprofiler_SOURCES) \
+ $(gnunet_regex_profiler_SOURCES) \
+ $(gnunet_regex_simulation_profiler_SOURCES) \
+ $(perf_regex_SOURCES) $(test_regex_eval_api_SOURCES) \
+ $(test_regex_graph_api_SOURCES) \
+ $(test_regex_iptoregex_SOURCES) \
+ $(test_regex_iterate_api_SOURCES) $(test_regex_proofs_SOURCES)
+DIST_SOURCES = $(libgnunet_plugin_block_regex_la_SOURCES) \
+ $(libgnunetregex_la_SOURCES) $(libgnunetregexblock_la_SOURCES) \
+ $(libgnunetregextest_la_SOURCES) \
+ $(gnunet_daemon_regexprofiler_SOURCES) \
+ $(gnunet_regex_profiler_SOURCES) \
+ $(am__gnunet_regex_simulation_profiler_SOURCES_DIST) \
+ $(perf_regex_SOURCES) $(test_regex_eval_api_SOURCES) \
+ $(test_regex_graph_api_SOURCES) \
+ $(test_regex_iptoregex_SOURCES) \
+ $(test_regex_iterate_api_SOURCES) $(test_regex_proofs_SOURCES)
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
ETAGS = etags
CTAGS = ctags
am__tty_colors = \
@@ -173,6 +273,10 @@ EXEEXT = @EXEEXT@
EXT_LIBS = @EXT_LIBS@
EXT_LIB_PATH = @EXT_LIB_PATH@
FGREP = @FGREP@
+GLIB_CFLAGS = @GLIB_CFLAGS@
+GLIB_GENMARSHAL = @GLIB_GENMARSHAL@
+GLIB_LIBS = @GLIB_LIBS@
+GLIB_MKENUMS = @GLIB_MKENUMS@
GMSGFMT = @GMSGFMT@
GMSGFMT_015 = @GMSGFMT_015@
GNUNETDNS_GROUP = @GNUNETDNS_GROUP@
@@ -183,6 +287,7 @@ GN_LIBINTL = @GN_LIBINTL@
GN_LIB_LDFLAGS = @GN_LIB_LDFLAGS@
GN_PLUGIN_LDFLAGS = @GN_PLUGIN_LDFLAGS@
GN_USER_HOME_DIR = @GN_USER_HOME_DIR@
+GOBJECT_QUERY = @GOBJECT_QUERY@
GREP = @GREP@
HAVE_LIBUNISTRING = @HAVE_LIBUNISTRING@
INCLTDL = @INCLTDL@
@@ -205,6 +310,8 @@ LIBCURL_CPPFLAGS = @LIBCURL_CPPFLAGS@
LIBGCRYPT_CFLAGS = @LIBGCRYPT_CFLAGS@
LIBGCRYPT_CONFIG = @LIBGCRYPT_CONFIG@
LIBGCRYPT_LIBS = @LIBGCRYPT_LIBS@
+LIBGTOP_CFLAGS = @LIBGTOP_CFLAGS@
+LIBGTOP_LIBS = @LIBGTOP_LIBS@
LIBICONV = @LIBICONV@
LIBINTL = @LIBINTL@
LIBLTDL = @LIBLTDL@
@@ -226,6 +333,7 @@ LT_CONFIG_H = @LT_CONFIG_H@
LT_DLLOADERS = @LT_DLLOADERS@
LT_DLPREOPEN = @LT_DLPREOPEN@
MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
MONKEYPREFIX = @MONKEYPREFIX@
MSGFMT = @MSGFMT@
@@ -235,6 +343,7 @@ MYSQL_CPPFLAGS = @MYSQL_CPPFLAGS@
MYSQL_LDFLAGS = @MYSQL_LDFLAGS@
NM = @NM@
NMEDIT = @NMEDIT@
+NSS_DIR = @NSS_DIR@
OBJC = @OBJC@
OBJCDEPMODE = @OBJCDEPMODE@
OBJCFLAGS = @OBJCFLAGS@
@@ -250,6 +359,7 @@ PACKAGE_TARNAME = @PACKAGE_TARNAME@
PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
+PKG_CONFIG = @PKG_CONFIG@
POSTGRES_CPPFLAGS = @POSTGRES_CPPFLAGS@
POSTGRES_LDFLAGS = @POSTGRES_LDFLAGS@
POSUB = @POSUB@
@@ -281,6 +391,7 @@ abs_builddir = @abs_builddir@
abs_srcdir = @abs_srcdir@
abs_top_builddir = @abs_top_builddir@
abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
ac_ct_CC = @ac_ct_CC@
ac_ct_CXX = @ac_ct_CXX@
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
@@ -303,6 +414,7 @@ datarootdir = @datarootdir@
docdir = @docdir@
dvidir = @dvidir@
exec_prefix = @exec_prefix@
+gitcommand = @gitcommand@
host = @host@
host_alias = @host_alias@
host_cpu = @host_cpu@
@@ -316,7 +428,6 @@ libdir = @libdir@
libexecdir = @libexecdir@
localedir = @localedir@
localstatedir = @localstatedir@
-lt_ECHO = @lt_ECHO@
ltdl_LIBOBJS = @ltdl_LIBOBJS@
ltdl_LTLIBOBJS = @ltdl_LTLIBOBJS@
mandir = @mandir@
@@ -334,6 +445,7 @@ sbindir = @sbindir@
sharedstatedir = @sharedstatedir@
srcdir = @srcdir@
subdirs = @subdirs@
+svnversioncommand = @svnversioncommand@
sys_symbol_underscore = @sys_symbol_underscore@
sysconfdir = @sysconfdir@
target = @target@
@@ -347,33 +459,160 @@ top_srcdir = @top_srcdir@
INCLUDES = -I$(top_srcdir)/src/include
@MINGW_TRUE@WINFLAGS = -Wl,--no-undefined -Wl,--export-all-symbols
@USE_COVERAGE_TRUE@AM_CFLAGS = --coverage
-lib_LTLIBRARIES = libgnunetregex.la
+lib_LTLIBRARIES = libgnunetregexblock.la \
+ libgnunetregex.la
+
+libgnunetregexblock_la_SOURCES = \
+ regex_block_lib.c regex_block_lib.h
+
+libgnunetregexblock_la_LIBADD = \
+ $(top_builddir)/src/util/libgnunetutil.la \
+ $(XLIB) \
+ $(LTLIBINTL)
+
+libgnunetregexblock_la_LDFLAGS = \
+ $(GN_LIB_LDFLAGS) $(WINFLAGS) \
+ -version-info 1:0:0
+
libgnunetregex_la_SOURCES = \
- regex.c
+ regex_internal.h regex.c \
+ regex_graph.c regex_random.c \
+ regex_dht.c
libgnunetregex_la_LIBADD = -lm \
- $(top_builddir)/src/util/libgnunetutil.la
+ $(top_builddir)/src/util/libgnunetutil.la \
+ $(top_builddir)/src/dht/libgnunetdht.la \
+ $(top_builddir)/src/statistics/libgnunetstatistics.la \
+ $(top_builddir)/src/regex/libgnunetregexblock.la
+
+libgnunetregex_la_DEPENDENCIES = \
+ libgnunetregexblock.la
libgnunetregex_la_LDFLAGS = \
- $(GN_LIB_LDFLAGS) \
- -version-info 0:0:0
+ $(GN_LIB_LDFLAGS) \
+ -version-info 2:0:1
+
+plugindir = $(libdir)/gnunet
+plugin_LTLIBRARIES = \
+ libgnunet_plugin_block_regex.la
+
+libgnunet_plugin_block_regex_la_SOURCES = \
+ plugin_block_regex.c
+
+libgnunet_plugin_block_regex_la_LIBADD = \
+ $(top_builddir)/src/regex/libgnunetregexblock.la \
+ $(top_builddir)/src/block/libgnunetblock.la \
+ $(top_builddir)/src/util/libgnunetutil.la
+
+libgnunet_plugin_block_regex_la_LDFLAGS = \
+ $(GN_PLUGIN_LDFLAGS)
+
+libgnunet_plugin_block_regex_la_DEPENDENCIES = \
+ libgnunetregexblock.la
+
+@HAVE_MYSQL_TRUE@noinst_mysql_progs = \
+@HAVE_MYSQL_TRUE@gnunet-regex-simulation-profiler
+
+@HAVE_MYSQL_TRUE@gnunet_regex_simulation_profiler_SOURCES = \
+@HAVE_MYSQL_TRUE@ gnunet-regex-simulation-profiler.c
+
+@HAVE_MYSQL_TRUE@gnunet_regex_simulation_profiler_LDADD = \
+@HAVE_MYSQL_TRUE@ $(top_builddir)/src/util/libgnunetutil.la \
+@HAVE_MYSQL_TRUE@ $(top_builddir)/src/regex/libgnunetregex.la \
+@HAVE_MYSQL_TRUE@ $(top_builddir)/src/mysql/libgnunetmysql.la
+
+@HAVE_MYSQL_TRUE@gnunet_regex_simulation_profiler_DEPENDENCIES = \
+@HAVE_MYSQL_TRUE@ libgnunetregex.la
+
+noinst_LTLIBRARIES = libgnunetregextest.la
+libgnunetregextest_la_SOURCES = \
+ regex_test_lib.c regex_test_lib.h
+
+libgnunetregextest_la_LIBADD = \
+ $(top_builddir)/src/util/libgnunetutil.la \
+ $(top_builddir)/src/regex/libgnunetregex.la
+
+libgnunetregextest_la_DEPENDENCIES = \
+ libgnunetregex.la
+
+perf_regex_SOURCES = \
+ perf-regex.c
+
+perf_regex_LDADD = \
+ $(top_builddir)/src/util/libgnunetutil.la \
+ $(top_builddir)/src/regex/libgnunetregex.la \
+ $(top_builddir)/src/regex/libgnunetregextest.la
+
+perf_regex_DEPENDENCIES = \
+ libgnunetregex.la \
+ libgnunetregextest.la
+
+gnunet_regex_profiler_SOURCES = \
+ gnunet-regex-profiler.c
+
+gnunet_regex_profiler_LDADD = \
+ $(top_builddir)/src/util/libgnunetutil.la \
+ $(top_builddir)/src/dht/libgnunetdht.la \
+ $(top_builddir)/src/testbed/libgnunettestbed.la \
+ $(top_builddir)/src/regex/libgnunetregex.la \
+ $(top_builddir)/src/statistics/libgnunetstatistics.la
+
+gnunet_regex_profiler_DEPENDENCIES = \
+ $(top_builddir)/src/dht/libgnunetdht.la \
+ libgnunetregex.la
+
+gnunet_daemon_regexprofiler_SOURCES = \
+ gnunet-daemon-regexprofiler.c
+
+gnunet_daemon_regexprofiler_LDADD = \
+ $(top_builddir)/src/util/libgnunetutil.la \
+ $(top_builddir)/src/dht/libgnunetdht.la \
+ $(top_builddir)/src/regex/libgnunetregex.la \
+ $(top_builddir)/src/statistics/libgnunetstatistics.la
+
+gnunet_daemon_regexprofiler_DEPENDENCIES = \
+ $(top_builddir)/src/dht/libgnunetdht.la \
+ libgnunetregex.la
@ENABLE_TEST_RUN_TRUE@TESTS = $(check_PROGRAMS)
test_regex_eval_api_SOURCES = \
- test_regex_eval_api.c
+ test_regex_eval_api.c
test_regex_eval_api_LDADD = \
- $(top_builddir)/src/regex/libgnunetregex.la \
- $(top_builddir)/src/util/libgnunetutil.la
+ $(top_builddir)/src/regex/libgnunetregex.la \
+ $(top_builddir)/src/util/libgnunetutil.la
test_regex_iterate_api_SOURCES = \
- test_regex_iterate_api.c
+ test_regex_iterate_api.c
test_regex_iterate_api_LDADD = \
- $(top_builddir)/src/regex/libgnunetregex.la \
- $(top_builddir)/src/util/libgnunetutil.la
+ $(top_builddir)/src/regex/libgnunetregex.la \
+ $(top_builddir)/src/util/libgnunetutil.la
+
+test_regex_proofs_SOURCES = \
+ test_regex_proofs.c
+
+test_regex_proofs_LDADD = \
+ $(top_builddir)/src/regex/libgnunetregex.la \
+ $(top_builddir)/src/util/libgnunetutil.la
+
+test_regex_graph_api_SOURCES = \
+ test_regex_graph_api.c
+
+test_regex_graph_api_LDADD = \
+ $(top_builddir)/src/regex/libgnunetregex.la \
+ $(top_builddir)/src/util/libgnunetutil.la
+
+test_regex_iptoregex_SOURCES = \
+ test_regex_iptoregex.c
+
+test_regex_iptoregex_LDADD = \
+ $(top_builddir)/src/util/libgnunetutil.la \
+ $(top_builddir)/src/regex/libgnunetregex.la
+
+EXTRA_DIST = \
+ regex_simulation_profiler_test.conf
-EXTRA_DIST =
all: all-am
.SUFFIXES:
@@ -410,7 +649,6 @@ $(ACLOCAL_M4): $(am__aclocal_m4_deps)
$(am__aclocal_m4_deps):
install-libLTLIBRARIES: $(lib_LTLIBRARIES)
@$(NORMAL_INSTALL)
- test -z "$(libdir)" || $(MKDIR_P) "$(DESTDIR)$(libdir)"
@list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \
list2=; for p in $$list; do \
if test -f $$p; then \
@@ -418,6 +656,8 @@ install-libLTLIBRARIES: $(lib_LTLIBRARIES)
else :; fi; \
done; \
test -z "$$list2" || { \
+ echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \
+ $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \
echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \
$(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \
}
@@ -439,8 +679,55 @@ clean-libLTLIBRARIES:
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-libgnunetregex.la: $(libgnunetregex_la_OBJECTS) $(libgnunetregex_la_DEPENDENCIES)
+
+clean-noinstLTLIBRARIES:
+ -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
+ @list='$(noinst_LTLIBRARIES)'; for p in $$list; do \
+ dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
+ test "$$dir" != "$$p" || dir=.; \
+ echo "rm -f \"$${dir}/so_locations\""; \
+ rm -f "$${dir}/so_locations"; \
+ done
+install-pluginLTLIBRARIES: $(plugin_LTLIBRARIES)
+ @$(NORMAL_INSTALL)
+ @list='$(plugin_LTLIBRARIES)'; test -n "$(plugindir)" || list=; \
+ list2=; for p in $$list; do \
+ if test -f $$p; then \
+ list2="$$list2 $$p"; \
+ else :; fi; \
+ done; \
+ test -z "$$list2" || { \
+ echo " $(MKDIR_P) '$(DESTDIR)$(plugindir)'"; \
+ $(MKDIR_P) "$(DESTDIR)$(plugindir)" || exit 1; \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(plugindir)'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(plugindir)"; \
+ }
+
+uninstall-pluginLTLIBRARIES:
+ @$(NORMAL_UNINSTALL)
+ @list='$(plugin_LTLIBRARIES)'; test -n "$(plugindir)" || list=; \
+ for p in $$list; do \
+ $(am__strip_dir) \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(plugindir)/$$f'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(plugindir)/$$f"; \
+ done
+
+clean-pluginLTLIBRARIES:
+ -test -z "$(plugin_LTLIBRARIES)" || rm -f $(plugin_LTLIBRARIES)
+ @list='$(plugin_LTLIBRARIES)'; for p in $$list; do \
+ dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
+ test "$$dir" != "$$p" || dir=.; \
+ echo "rm -f \"$${dir}/so_locations\""; \
+ rm -f "$${dir}/so_locations"; \
+ done
+libgnunet_plugin_block_regex.la: $(libgnunet_plugin_block_regex_la_OBJECTS) $(libgnunet_plugin_block_regex_la_DEPENDENCIES) $(EXTRA_libgnunet_plugin_block_regex_la_DEPENDENCIES)
+ $(AM_V_CCLD)$(libgnunet_plugin_block_regex_la_LINK) -rpath $(plugindir) $(libgnunet_plugin_block_regex_la_OBJECTS) $(libgnunet_plugin_block_regex_la_LIBADD) $(LIBS)
+libgnunetregex.la: $(libgnunetregex_la_OBJECTS) $(libgnunetregex_la_DEPENDENCIES) $(EXTRA_libgnunetregex_la_DEPENDENCIES)
$(AM_V_CCLD)$(libgnunetregex_la_LINK) -rpath $(libdir) $(libgnunetregex_la_OBJECTS) $(libgnunetregex_la_LIBADD) $(LIBS)
+libgnunetregexblock.la: $(libgnunetregexblock_la_OBJECTS) $(libgnunetregexblock_la_DEPENDENCIES) $(EXTRA_libgnunetregexblock_la_DEPENDENCIES)
+ $(AM_V_CCLD)$(libgnunetregexblock_la_LINK) -rpath $(libdir) $(libgnunetregexblock_la_OBJECTS) $(libgnunetregexblock_la_LIBADD) $(LIBS)
+libgnunetregextest.la: $(libgnunetregextest_la_OBJECTS) $(libgnunetregextest_la_DEPENDENCIES) $(EXTRA_libgnunetregextest_la_DEPENDENCIES)
+ $(AM_V_CCLD)$(LINK) $(libgnunetregextest_la_OBJECTS) $(libgnunetregextest_la_LIBADD) $(LIBS)
clean-checkPROGRAMS:
@list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \
@@ -450,12 +737,42 @@ clean-checkPROGRAMS:
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
-test_regex_eval_api$(EXEEXT): $(test_regex_eval_api_OBJECTS) $(test_regex_eval_api_DEPENDENCIES)
+
+clean-noinstPROGRAMS:
+ @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \
+ echo " rm -f" $$list; \
+ rm -f $$list || exit $$?; \
+ test -n "$(EXEEXT)" || exit 0; \
+ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
+ echo " rm -f" $$list; \
+ rm -f $$list
+gnunet-daemon-regexprofiler$(EXEEXT): $(gnunet_daemon_regexprofiler_OBJECTS) $(gnunet_daemon_regexprofiler_DEPENDENCIES) $(EXTRA_gnunet_daemon_regexprofiler_DEPENDENCIES)
+ @rm -f gnunet-daemon-regexprofiler$(EXEEXT)
+ $(AM_V_CCLD)$(LINK) $(gnunet_daemon_regexprofiler_OBJECTS) $(gnunet_daemon_regexprofiler_LDADD) $(LIBS)
+gnunet-regex-profiler$(EXEEXT): $(gnunet_regex_profiler_OBJECTS) $(gnunet_regex_profiler_DEPENDENCIES) $(EXTRA_gnunet_regex_profiler_DEPENDENCIES)
+ @rm -f gnunet-regex-profiler$(EXEEXT)
+ $(AM_V_CCLD)$(LINK) $(gnunet_regex_profiler_OBJECTS) $(gnunet_regex_profiler_LDADD) $(LIBS)
+gnunet-regex-simulation-profiler$(EXEEXT): $(gnunet_regex_simulation_profiler_OBJECTS) $(gnunet_regex_simulation_profiler_DEPENDENCIES) $(EXTRA_gnunet_regex_simulation_profiler_DEPENDENCIES)
+ @rm -f gnunet-regex-simulation-profiler$(EXEEXT)
+ $(AM_V_CCLD)$(LINK) $(gnunet_regex_simulation_profiler_OBJECTS) $(gnunet_regex_simulation_profiler_LDADD) $(LIBS)
+perf-regex$(EXEEXT): $(perf_regex_OBJECTS) $(perf_regex_DEPENDENCIES) $(EXTRA_perf_regex_DEPENDENCIES)
+ @rm -f perf-regex$(EXEEXT)
+ $(AM_V_CCLD)$(LINK) $(perf_regex_OBJECTS) $(perf_regex_LDADD) $(LIBS)
+test_regex_eval_api$(EXEEXT): $(test_regex_eval_api_OBJECTS) $(test_regex_eval_api_DEPENDENCIES) $(EXTRA_test_regex_eval_api_DEPENDENCIES)
@rm -f test_regex_eval_api$(EXEEXT)
$(AM_V_CCLD)$(LINK) $(test_regex_eval_api_OBJECTS) $(test_regex_eval_api_LDADD) $(LIBS)
-test_regex_iterate_api$(EXEEXT): $(test_regex_iterate_api_OBJECTS) $(test_regex_iterate_api_DEPENDENCIES)
+test_regex_graph_api$(EXEEXT): $(test_regex_graph_api_OBJECTS) $(test_regex_graph_api_DEPENDENCIES) $(EXTRA_test_regex_graph_api_DEPENDENCIES)
+ @rm -f test_regex_graph_api$(EXEEXT)
+ $(AM_V_CCLD)$(LINK) $(test_regex_graph_api_OBJECTS) $(test_regex_graph_api_LDADD) $(LIBS)
+test_regex_iptoregex$(EXEEXT): $(test_regex_iptoregex_OBJECTS) $(test_regex_iptoregex_DEPENDENCIES) $(EXTRA_test_regex_iptoregex_DEPENDENCIES)
+ @rm -f test_regex_iptoregex$(EXEEXT)
+ $(AM_V_CCLD)$(LINK) $(test_regex_iptoregex_OBJECTS) $(test_regex_iptoregex_LDADD) $(LIBS)
+test_regex_iterate_api$(EXEEXT): $(test_regex_iterate_api_OBJECTS) $(test_regex_iterate_api_DEPENDENCIES) $(EXTRA_test_regex_iterate_api_DEPENDENCIES)
@rm -f test_regex_iterate_api$(EXEEXT)
$(AM_V_CCLD)$(LINK) $(test_regex_iterate_api_OBJECTS) $(test_regex_iterate_api_LDADD) $(LIBS)
+test_regex_proofs$(EXEEXT): $(test_regex_proofs_OBJECTS) $(test_regex_proofs_DEPENDENCIES) $(EXTRA_test_regex_proofs_DEPENDENCIES)
+ @rm -f test_regex_proofs$(EXEEXT)
+ $(AM_V_CCLD)$(LINK) $(test_regex_proofs_OBJECTS) $(test_regex_proofs_LDADD) $(LIBS)
mostlyclean-compile:
-rm -f *.$(OBJEXT)
@@ -463,33 +780,43 @@ mostlyclean-compile:
distclean-compile:
-rm -f *.tab.c
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gnunet-daemon-regexprofiler.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gnunet-regex-profiler.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gnunet-regex-simulation-profiler.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/perf-regex.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plugin_block_regex.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/regex.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/regex_block_lib.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/regex_dht.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/regex_graph.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/regex_random.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/regex_test_lib.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_regex_eval_api.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_regex_graph_api.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_regex_iptoregex.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_regex_iterate_api.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_regex_proofs.Po@am__quote@
.c.o:
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
-@am__fastdepCC_FALSE@ $(AM_V_CC) @AM_BACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@ $(COMPILE) -c $<
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c $<
.c.obj:
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
-@am__fastdepCC_FALSE@ $(AM_V_CC) @AM_BACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'`
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c `$(CYGPATH_W) '$<'`
.c.lo:
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
-@am__fastdepCC_FALSE@ $(AM_V_CC) @AM_BACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $<
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
mostlyclean-libtool:
-rm -f *.lo
@@ -630,14 +957,15 @@ check-TESTS: $(TESTS)
fi; \
dashes=`echo "$$dashes" | sed s/./=/g`; \
if test "$$failed" -eq 0; then \
- echo "$$grn$$dashes"; \
+ col="$$grn"; \
else \
- echo "$$red$$dashes"; \
+ col="$$red"; \
fi; \
- echo "$$banner"; \
- test -z "$$skipped" || echo "$$skipped"; \
- test -z "$$report" || echo "$$report"; \
- echo "$$dashes$$std"; \
+ echo "$${col}$$dashes$${std}"; \
+ echo "$${col}$$banner$${std}"; \
+ test -z "$$skipped" || echo "$${col}$$skipped$${std}"; \
+ test -z "$$report" || echo "$${col}$$report$${std}"; \
+ echo "$${col}$$dashes$${std}"; \
test "$$failed" -eq 0; \
else :; fi
@@ -675,9 +1003,9 @@ check-am: all-am
$(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS)
$(MAKE) $(AM_MAKEFLAGS) check-TESTS
check: check-am
-all-am: Makefile $(LTLIBRARIES)
+all-am: Makefile $(LTLIBRARIES) $(PROGRAMS)
installdirs:
- for dir in "$(DESTDIR)$(libdir)"; do \
+ for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(plugindir)"; do \
test -z "$$dir" || $(MKDIR_P) "$$dir"; \
done
install: install-am
@@ -690,10 +1018,15 @@ install-am: all-am
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
@@ -708,7 +1041,8 @@ maintainer-clean-generic:
clean: clean-am
clean-am: clean-checkPROGRAMS clean-generic clean-libLTLIBRARIES \
- clean-libtool mostlyclean-am
+ clean-libtool clean-noinstLTLIBRARIES clean-noinstPROGRAMS \
+ clean-pluginLTLIBRARIES mostlyclean-am
distclean: distclean-am
-rm -rf ./$(DEPDIR)
@@ -728,7 +1062,7 @@ info: info-am
info-am:
-install-data-am:
+install-data-am: install-pluginLTLIBRARIES
install-dvi: install-dvi-am
@@ -774,26 +1108,27 @@ ps: ps-am
ps-am:
-uninstall-am: uninstall-libLTLIBRARIES
+uninstall-am: uninstall-libLTLIBRARIES uninstall-pluginLTLIBRARIES
.MAKE: check-am install-am install-strip
.PHONY: CTAGS GTAGS all all-am check check-TESTS check-am clean \
clean-checkPROGRAMS clean-generic clean-libLTLIBRARIES \
- clean-libtool ctags distclean distclean-compile \
+ clean-libtool clean-noinstLTLIBRARIES clean-noinstPROGRAMS \
+ clean-pluginLTLIBRARIES ctags distclean distclean-compile \
distclean-generic distclean-libtool distclean-tags distdir dvi \
dvi-am html html-am info info-am install install-am \
install-data install-data-am install-dvi install-dvi-am \
install-exec install-exec-am install-html install-html-am \
install-info install-info-am install-libLTLIBRARIES \
- install-man install-pdf install-pdf-am install-ps \
- install-ps-am install-strip installcheck installcheck-am \
- installdirs maintainer-clean maintainer-clean-generic \
- mostlyclean mostlyclean-compile mostlyclean-generic \
- mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \
- uninstall-am uninstall-libLTLIBRARIES
-
-# test_regex_data.conf
+ install-man install-pdf install-pdf-am \
+ install-pluginLTLIBRARIES install-ps install-ps-am \
+ install-strip installcheck installcheck-am installdirs \
+ maintainer-clean maintainer-clean-generic mostlyclean \
+ mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
+ pdf pdf-am ps ps-am tags uninstall uninstall-am \
+ uninstall-libLTLIBRARIES uninstall-pluginLTLIBRARIES
+
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
diff --git a/src/regex/gnunet-daemon-regexprofiler.c b/src/regex/gnunet-daemon-regexprofiler.c
new file mode 100644
index 0000000..591cda3
--- /dev/null
+++ b/src/regex/gnunet-daemon-regexprofiler.c
@@ -0,0 +1,472 @@
+/*
+ This file is part of GNUnet.
+ (C) 2012, 2013 Christian Grothoff
+
+ GNUnet is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3, or (at your
+ option) any later version.
+
+ GNUnet is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GNUnet; see the file COPYING. If not, write to the
+ Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA.
+*/
+
+/**
+ * @file regex/gnunet-daemon-regexprofiler.c
+ * @brief daemon that uses mesh to announce a regular expression. Used in
+ * conjunction with gnunet-regex-profiler to announce regexes on serveral peers
+ * without the need to explicitly connect to the mesh service running on the
+ * peer from within the profiler.
+ * @author Maximilian Szengel
+ * @author Bartlomiej Polot
+ */
+#include "platform.h"
+#include "gnunet_util_lib.h"
+#include "gnunet_regex_lib.h"
+#include "gnunet_dht_service.h"
+#include "gnunet_statistics_service.h"
+
+/**
+ * Return value from 'main'.
+ */
+static int global_ret;
+
+/**
+ * Configuration we use.
+ */
+static const struct GNUNET_CONFIGURATION_Handle *cfg;
+
+/**
+ * Handle to the statistics service.
+ */
+static struct GNUNET_STATISTICS_Handle *stats_handle;
+
+/**
+ * Peer's dht handle.
+ */
+static struct GNUNET_DHT_Handle *dht_handle;
+
+/**
+ * Peer's regex announce handle.
+ */
+static struct GNUNET_REGEX_announce_handle *announce_handle;
+
+/**
+ * Hostkey generation context
+ */
+static struct GNUNET_CRYPTO_RsaKeyGenerationContext *keygen;
+
+/**
+ * Periodically reannounce regex.
+ */
+static GNUNET_SCHEDULER_TaskIdentifier reannounce_task;
+
+/**
+ * How often reannounce regex.
+ */
+static struct GNUNET_TIME_Relative reannounce_freq;
+
+/**
+ * Random delay to spread out load on the DHT.
+ */
+static struct GNUNET_TIME_Relative announce_delay;
+
+/**
+ * Local peer's PeerID.
+ */
+static struct GNUNET_PeerIdentity my_full_id;
+
+/**
+ * Maximal path compression length for regex announcing.
+ */
+static unsigned long long max_path_compression;
+
+/**
+ * Name of the file containing policies that this peer should announce. One
+ * policy per line.
+ */
+static char * policy_filename;
+
+/**
+ * Prefix to add before every regex we're announcing.
+ */
+static char * regex_prefix;
+
+/**
+ * Regex with prefix.
+ */
+static char *rx_with_pfx;
+
+
+/**
+ * Task run during shutdown.
+ *
+ * @param cls unused
+ * @param tc unused
+ */
+static void
+shutdown_task (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
+{
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "shutting down\n");
+
+ if (NULL != keygen)
+ {
+ GNUNET_CRYPTO_rsa_key_create_stop (keygen);
+ keygen = NULL;
+ }
+ if (NULL != announce_handle)
+ {
+ GNUNET_REGEX_announce_cancel (announce_handle);
+ announce_handle = NULL;
+ }
+
+ if (NULL != dht_handle)
+ {
+ GNUNET_DHT_disconnect (dht_handle);
+ dht_handle = NULL;
+ }
+
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "shut down\n");
+}
+
+
+/**
+ * Announce a previously announced regex re-using cached data.
+ *
+ * @param cls Closure (regex to announce if needed).
+ * @param tc TaskContext.
+ */
+static void
+reannounce_regex (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
+{
+ char *regex = cls;
+ reannounce_task = GNUNET_SCHEDULER_NO_TASK;
+ if (0 != (tc->reason & GNUNET_SCHEDULER_REASON_SHUTDOWN))
+ {
+ GNUNET_free (regex);
+ return;
+ }
+
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Announcing regex: %s\n", regex);
+ GNUNET_STATISTICS_update (stats_handle, "# regexes announced", 1, GNUNET_NO);
+ if (NULL == announce_handle && NULL != regex)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
+ "First time, creating regex: %s\n",
+ regex);
+ announce_handle = GNUNET_REGEX_announce (dht_handle,
+ &my_full_id,
+ regex,
+ (unsigned int) max_path_compression,
+ stats_handle);
+ }
+ else
+ {
+ GNUNET_assert (NULL != announce_handle);
+ GNUNET_REGEX_reannounce (announce_handle);
+ }
+
+ reannounce_task =
+ GNUNET_SCHEDULER_add_delayed (
+ GNUNET_TIME_relative_add (reannounce_freq,
+ GNUNET_TIME_relative_multiply (
+ GNUNET_TIME_UNIT_SECONDS,
+ GNUNET_CRYPTO_random_u32 (
+ GNUNET_CRYPTO_QUALITY_WEAK,
+ 600))),
+ &reannounce_regex,
+ cls);
+}
+
+
+/**
+ * Announce the given regular expression using Mesh and the path compression
+ * length read from config.
+ *
+ * @param regex regular expression to announce on this peer's mesh.
+ */
+static void
+announce_regex (const char * regex)
+{
+ char *copy;
+
+ if (NULL == regex || 0 == strlen (regex))
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Cannot announce empty regex\n");
+ return;
+ }
+
+ GNUNET_assert (GNUNET_SCHEDULER_NO_TASK == reannounce_task);
+ copy = GNUNET_strdup (regex);
+ reannounce_task = GNUNET_SCHEDULER_add_delayed (announce_delay,
+ reannounce_regex,
+ (void *) copy);
+}
+
+
+/**
+ * Load regular expressions from filename into 'rxes' array. Array needs to be freed.
+ *
+ * @param filename filename of the file containing the regexes, one per line.
+ * @param rx string with the union of all regular expressions.
+ *
+ * @return number of regular expressions read from filename and in rxes array.
+ * FIXME use load regex lib function
+ */
+static unsigned int
+load_regexes (const char *filename, char **rx)
+{
+ char *data;
+ char *buf;
+ uint64_t filesize;
+ unsigned int offset;
+ unsigned int rx_cnt;
+
+ if (GNUNET_YES != GNUNET_DISK_file_test (policy_filename))
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "Could not find policy file %s\n", policy_filename);
+ return 0;
+ }
+ if (GNUNET_OK != GNUNET_DISK_file_size (policy_filename, &filesize, GNUNET_YES, GNUNET_YES))
+ filesize = 0;
+ if (0 == filesize)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Policy file %s is empty.\n", policy_filename);
+ return 0;
+ }
+ data = GNUNET_malloc (filesize);
+ if (filesize != GNUNET_DISK_fn_read (policy_filename, data, filesize))
+ {
+ GNUNET_free (data);
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not read policy file %s.\n",
+ policy_filename);
+ return 0;
+ }
+ buf = data;
+ offset = 0;
+ rx_cnt = 0;
+ while (offset < (filesize - 1))
+ {
+ offset++;
+ if ((data[offset] == '\n') && (buf != &data[offset]))
+ {
+ data[offset] = '|';
+ buf = &data[offset + 1];
+ rx_cnt++;
+ }
+ else if ((data[offset] == '\n') || (data[offset] == '\0'))
+ buf = &data[offset + 1];
+ }
+ data[offset] = '\0';
+ *rx = data;
+
+ return rx_cnt;
+}
+
+
+/**
+ * Callback for hostkey read/generation
+ *
+ * @param cls Closure (not used).
+ * @param pk The private key of the local peer.
+ * @param emsg Error message if applicable.
+ */
+static void
+key_generation_cb (void *cls,
+ struct GNUNET_CRYPTO_RsaPrivateKey *pk,
+ const char *emsg)
+{
+ struct GNUNET_CRYPTO_RsaPublicKeyBinaryEncoded my_public_key;
+
+ keygen = NULL;
+ if (NULL == pk)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ _("Regexprofiler could not access hostkey: %s. Exiting.\n"),
+ emsg);
+ GNUNET_SCHEDULER_shutdown ();
+ return;
+ }
+
+ GNUNET_CRYPTO_rsa_key_get_public (pk, &my_public_key);
+ GNUNET_CRYPTO_hash (&my_public_key, sizeof (my_public_key),
+ &my_full_id.hashPubKey);
+
+ GNUNET_log (GNUNET_ERROR_TYPE_INFO,
+ "Regexprofiler for peer [%s] starting\n",
+ GNUNET_i2s(&my_full_id));
+ announce_regex (rx_with_pfx);
+ GNUNET_free (rx_with_pfx);
+}
+
+
+/**
+ * @brief Main function that will be run by the scheduler.
+ *
+ * @param cls closure
+ * @param args remaining command-line arguments
+ * @param cfgfile name of the configuration file used (for saving, can be NULL!)
+ * @param cfg_ configuration
+ */
+static void
+run (void *cls, char *const *args GNUNET_UNUSED,
+ const char *cfgfile GNUNET_UNUSED,
+ const struct GNUNET_CONFIGURATION_Handle *cfg_)
+{
+ char *regex = NULL;
+ char *keyfile;
+
+ cfg = cfg_;
+
+ if (GNUNET_OK !=
+ GNUNET_CONFIGURATION_get_value_filename (cfg, "GNUNETD", "HOSTKEY",
+ &keyfile))
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ _
+ ("%s service is lacking key configuration settings (%s). Exiting.\n"),
+ "regexprofiler", "hostkey");
+ GNUNET_SCHEDULER_shutdown ();
+ return;
+ }
+
+ if (GNUNET_OK !=
+ GNUNET_CONFIGURATION_get_value_number (cfg, "REGEXPROFILER", "MAX_PATH_COMPRESSION",
+ &max_path_compression))
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ _
+ ("%s service is lacking key configuration settings (%s). Exiting.\n"),
+ "regexprofiler", "max_path_compression");
+ global_ret = GNUNET_SYSERR;
+ GNUNET_SCHEDULER_shutdown ();
+ return;
+ }
+
+ if (GNUNET_OK !=
+ GNUNET_CONFIGURATION_get_value_filename (cfg, "REGEXPROFILER",
+ "POLICY_FILE", &policy_filename))
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ _
+ ("%s service is lacking key configuration settings (%s). Exiting.\n"),
+ "regexprofiler", "policy_file");
+ global_ret = GNUNET_SYSERR;
+ GNUNET_SCHEDULER_shutdown ();
+ return;
+ }
+
+ if (GNUNET_OK !=
+ GNUNET_CONFIGURATION_get_value_string (cfg, "REGEXPROFILER",
+ "REGEX_PREFIX", &regex_prefix))
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ _
+ ("%s service is lacking key configuration settings (%s). Exiting.\n"),
+ "regexprofiler", "regex_prefix");
+ global_ret = GNUNET_SYSERR;
+ GNUNET_SCHEDULER_shutdown ();
+ return;
+ }
+
+ if (GNUNET_OK !=
+ GNUNET_CONFIGURATION_get_value_time (cfg, "REGEXPROFILER",
+ "REANNOUNCE_FREQ", &reannounce_freq))
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "reannounce_freq not given. Using 10 minutes.\n");
+ reannounce_freq =
+ GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MINUTES, 10);
+
+ }
+ announce_delay =
+ GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_SECONDS,
+ GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, 600));
+
+ stats_handle = GNUNET_STATISTICS_create ("regexprofiler", cfg);
+
+ dht_handle = GNUNET_DHT_connect (cfg, 1);
+
+ if (NULL == dht_handle)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "Could not acquire dht handle. Exiting.\n");
+ global_ret = GNUNET_SYSERR;
+ GNUNET_SCHEDULER_shutdown ();
+ return;
+ }
+
+ /* Read regexes from policy files */
+ if (0 == load_regexes (policy_filename, &regex))
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "Policy file %s contains no policies. Exiting.\n",
+ policy_filename);
+ global_ret = GNUNET_SYSERR;
+ GNUNET_SCHEDULER_shutdown ();
+ return;
+ }
+
+ /* Announcing regexes from policy_filename */
+ GNUNET_asprintf (&rx_with_pfx, "%s(%s)", regex_prefix, regex);
+ GNUNET_free (regex);
+
+ keygen = GNUNET_CRYPTO_rsa_key_create_start (keyfile,
+ &key_generation_cb,
+ NULL);
+ GNUNET_free (keyfile);
+
+ /* Scheduled the task to clean up when shutdown is called */
+ GNUNET_SCHEDULER_add_delayed (GNUNET_TIME_UNIT_FOREVER_REL, &shutdown_task,
+ NULL);
+}
+
+
+/**
+ * The main function of the regexprofiler service.
+ *
+ * @param argc number of arguments from the command line
+ * @param argv command line arguments
+ * @return 0 ok, 1 on error
+ */
+int
+main (int argc, char *const *argv)
+{
+ static const struct GNUNET_GETOPT_CommandLineOption options[] = {
+ GNUNET_GETOPT_OPTION_END
+ };
+
+ if (GNUNET_OK != GNUNET_STRINGS_get_utf8_args (argc, argv, &argc, &argv))
+ return 2;
+ return (GNUNET_OK ==
+ GNUNET_PROGRAM_run (argc, argv, "regexprofiler",
+ gettext_noop
+ ("Daemon to announce regular expressions for the peer using mesh."),
+ options, &run, NULL)) ? global_ret : 1;
+}
+
+
+#ifdef LINUX
+#include <malloc.h>
+
+/**
+ * MINIMIZE heap size (way below 128k) since this process doesn't need much.
+ */
+void __attribute__ ((constructor)) GNUNET_ARM_memory_init ()
+{
+ mallopt (M_TRIM_THRESHOLD, 4 * 1024);
+ mallopt (M_TOP_PAD, 1 * 1024);
+ malloc_trim (0);
+}
+#endif
+
+
+/* end of gnunet-daemon-regexprofiler.c */
diff --git a/src/regex/gnunet-regex-profiler.c b/src/regex/gnunet-regex-profiler.c
new file mode 100644
index 0000000..f0a05c4
--- /dev/null
+++ b/src/regex/gnunet-regex-profiler.c
@@ -0,0 +1,1889 @@
+/*
+ This file is part of GNUnet.
+ (C) 2011 - 2013 Christian Grothoff (and other contributing authors)
+
+ GNUnet is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3, or (at your
+ option) any later version.
+
+ GNUnet is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GNUnet; see the file COPYING. If not, write to the
+ Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA.
+*/
+
+/**
+ * @file regex/gnunet-regex-profiler.c
+ * @brief Regex profiler for testing distributed regex use.
+ * @author Bartlomiej Polot
+ * @author Maximilian Szengel
+ *
+ */
+
+#include <string.h>
+
+#include "platform.h"
+#include "gnunet_applications.h"
+#include "gnunet_util_lib.h"
+#include "gnunet_regex_lib.h"
+#include "gnunet_dht_service.h"
+#include "gnunet_testbed_service.h"
+
+/**
+ * DLL of operations
+ */
+struct DLLOperation
+{
+ /**
+ * The testbed operation handle
+ */
+ struct GNUNET_TESTBED_Operation *op;
+
+ /**
+ * Closure
+ */
+ void *cls;
+
+ /**
+ * The next pointer for DLL
+ */
+ struct DLLOperation *next;
+
+ /**
+ * The prev pointer for DLL
+ */
+ struct DLLOperation *prev;
+};
+
+
+/**
+ * Available states during profiling
+ */
+enum State
+{
+ /**
+ * Initial state
+ */
+ STATE_INIT = 0,
+
+ /**
+ * Starting slaves
+ */
+ STATE_SLAVES_STARTING,
+
+ /**
+ * Creating peers
+ */
+ STATE_PEERS_CREATING,
+
+ /**
+ * Starting peers
+ */
+ STATE_PEERS_STARTING,
+
+ /**
+ * Linking peers
+ */
+ STATE_PEERS_LINKING,
+
+ /**
+ * Matching strings against announced regexes
+ */
+ STATE_SEARCH_REGEX,
+
+ /**
+ * Destroying peers; we can do this as the controller takes care of stopping a
+ * peer if it is running
+ */
+ STATE_PEERS_DESTROYING
+};
+
+
+/**
+ * Peer handles.
+ */
+struct RegexPeer
+{
+ /**
+ * Peer id.
+ */
+ unsigned int id;
+
+ /**
+ * Peer configuration handle.
+ */
+ struct GNUNET_CONFIGURATION_Handle *cfg;
+
+ /**
+ * The actual testbed peer handle.
+ */
+ struct GNUNET_TESTBED_Peer *peer_handle;
+
+ /**
+ * Host on which the peer is running.
+ */
+ struct GNUNET_TESTBED_Host *host_handle;
+
+ /**
+ * Filename of the peer's policy file.
+ */
+ char *policy_file;
+
+ /**
+ * Peers search string.
+ */
+ const char *search_str;
+
+ /**
+ * Set to GNUNET_YES if the peer successfully matched the above
+ * search string. GNUNET_NO if the string could not be matched
+ * during the profiler run. GNUNET_SYSERR if the string matching
+ * timed out. Undefined if search_str is NULL
+ */
+ int search_str_matched;
+
+ /**
+ * Peer's dht handle.
+ */
+ struct GNUNET_DHT_Handle *dht_handle;
+
+ /**
+ * Handle to a running regex search.
+ */
+ struct GNUNET_REGEX_search_handle *search_handle;
+
+ /**
+ * Testbed operation handle for the dht service.
+ */
+ struct GNUNET_TESTBED_Operation *dht_op_handle;
+
+ /**
+ * Peers's statistics handle.
+ */
+ struct GNUNET_STATISTICS_Handle *stats_handle;
+
+ /**
+ * Testbed operation handle for the statistics service.
+ */
+ struct GNUNET_TESTBED_Operation *stats_op_handle;
+
+ /**
+ * The starting time of a profiling step.
+ */
+ struct GNUNET_TIME_Absolute prof_start_time;
+};
+
+
+/**
+ * An array of hosts loaded from the hostkeys file
+ */
+static struct GNUNET_TESTBED_Host **hosts;
+
+/**
+ * Array of peer handles used to pass to
+ * GNUNET_TESTBED_overlay_configure_topology
+ */
+static struct GNUNET_TESTBED_Peer **peer_handles;
+
+/**
+ * The array of peers; we fill this as the peers are given to us by the testbed
+ */
+static struct RegexPeer *peers;
+
+/**
+ * Host registration handle
+ */
+static struct GNUNET_TESTBED_HostRegistrationHandle *reg_handle;
+
+/**
+ * Handle to the master controller process
+ */
+static struct GNUNET_TESTBED_ControllerProc *mc_proc;
+
+/**
+ * Handle to the master controller
+ */
+static struct GNUNET_TESTBED_Controller *mc;
+
+/**
+ * Handle to global configuration
+ */
+static struct GNUNET_CONFIGURATION_Handle *cfg;
+
+/**
+ * Head of the operations list
+ */
+static struct DLLOperation *dll_op_head;
+
+/**
+ * Tail of the operations list
+ */
+static struct DLLOperation *dll_op_tail;
+
+/**
+ * Peer linking - topology operation
+ */
+static struct GNUNET_TESTBED_Operation *topology_op;
+
+/**
+ * The handle for whether a host is habitable or not
+ */
+struct GNUNET_TESTBED_HostHabitableCheckHandle **hc_handles;
+
+/**
+ * Abort task identifier
+ */
+static GNUNET_SCHEDULER_TaskIdentifier abort_task;
+
+/**
+ * Shutdown task identifier
+ */
+static GNUNET_SCHEDULER_TaskIdentifier shutdown_task;
+
+/**
+ * Host registration task identifier
+ */
+static GNUNET_SCHEDULER_TaskIdentifier register_hosts_task;
+
+/**
+ * Global event mask for all testbed events
+ */
+static uint64_t event_mask;
+
+/**
+ * The starting time of a profiling step
+ */
+static struct GNUNET_TIME_Absolute prof_start_time;
+
+/**
+ * Duration profiling step has taken
+ */
+static struct GNUNET_TIME_Relative prof_time;
+
+/**
+ * Number of peers to be started by the profiler
+ */
+static unsigned int num_peers;
+
+/**
+ * Number of hosts in the hosts array
+ */
+static unsigned int num_hosts;
+
+/**
+ * Factor of number of links. num_links = num_peers * linking_factor.
+ */
+static unsigned int linking_factor;
+
+/**
+ * Number of random links to be established between peers
+ */
+static unsigned int num_links;
+
+/**
+ * Number of times we try overlay connect operations
+ */
+static unsigned int retry_links;
+
+/**
+ * Continuous failures during overlay connect operations
+ */
+static unsigned int cont_fails;
+
+/**
+ * Global testing status
+ */
+static int result;
+
+/**
+ * current state of profiling
+ */
+enum State state;
+
+/**
+ * Folder where policy files are stored.
+ */
+static char * policy_dir;
+
+/**
+ * Search strings.
+ */
+static char **search_strings;
+
+/**
+ * Number of search strings.
+ */
+static int num_search_strings;
+
+/**
+ * Number of peers found with search strings.
+ */
+static unsigned int peers_found;
+
+/**
+ * Search task identifier
+ */
+static GNUNET_SCHEDULER_TaskIdentifier search_task;
+
+/**
+ * Search timeout task identifier.
+ */
+static GNUNET_SCHEDULER_TaskIdentifier search_timeout_task;
+
+/**
+ * Search timeout in seconds.
+ */
+static struct GNUNET_TIME_Relative search_timeout = { 60000 };
+
+/**
+ * How long do we wait before starting the search?
+ * Default: 1 m.
+ */
+static struct GNUNET_TIME_Relative search_delay = { 60000 };
+
+/**
+ * File to log statistics to.
+ */
+static struct GNUNET_DISK_FileHandle *data_file;
+
+/**
+ * Filename to log statistics to.
+ */
+static char *data_filename;
+
+/**
+ * Maximal path compression length.
+ */
+static unsigned int max_path_compression;
+
+/**
+ * If we should distribute the search evenly throught all peers (each
+ * peer searches for a string) or if only one peer should search for
+ * all strings.
+ */
+static int no_distributed_search;
+
+/**
+ * Prefix used for regex announcing. We need to prefix the search
+ * strings with it, in order to find something.
+ */
+static char * regex_prefix;
+
+
+/******************************************************************************/
+/****************************** DECLARATIONS ********************************/
+/******************************************************************************/
+
+
+/**
+ * Search callback function.
+ *
+ * @param cls Closure provided in GNUNET_REGEX_search.
+ * @param id Peer providing a regex that matches the string.
+ * @param get_path Path of the get request.
+ * @param get_path_length Lenght of get_path.
+ * @param put_path Path of the put request.
+ * @param put_path_length Length of the put_path.
+ */
+static void
+regex_found_handler (void *cls,
+ const struct GNUNET_PeerIdentity *id,
+ const struct GNUNET_PeerIdentity *get_path,
+ unsigned int get_path_length,
+ const struct GNUNET_PeerIdentity *put_path,
+ unsigned int put_path_length);
+
+
+/**
+ * DHT connect callback.
+ *
+ * @param cls internal peer id.
+ * @param op operation handle.
+ * @param ca_result connect adapter result.
+ * @param emsg error message.
+ */
+static void
+dht_connect_cb (void *cls, struct GNUNET_TESTBED_Operation *op,
+ void *ca_result, const char *emsg);
+
+/**
+ * DHT connect adapter.
+ *
+ * @param cls not used.
+ * @param cfg configuration handle.
+ *
+ * @return
+ */
+static void *
+dht_ca (void *cls, const struct GNUNET_CONFIGURATION_Handle *cfg);
+
+
+/**
+ * Adapter function called to destroy a connection to
+ * the DHT service
+ *
+ * @param cls closure
+ * @param op_result service handle returned from the connect adapter
+ */
+static void
+dht_da (void *cls, void *op_result);
+
+
+/**
+ * Function called by testbed once we are connected to stats
+ * service. Get the statistics for the services of interest.
+ *
+ * @param cls the 'struct RegexPeer' for which we connected to stats
+ * @param op connect operation handle
+ * @param ca_result handle to stats service
+ * @param emsg error message on failure
+ */
+static void
+stats_connect_cb (void *cls,
+ struct GNUNET_TESTBED_Operation *op,
+ void *ca_result,
+ const char *emsg);
+
+
+/**
+ * Task to collect all statistics from all peers, will shutdown the
+ * profiler, when done.
+ *
+ * @param cls NULL
+ * @param tc the task context
+ */
+static void
+do_collect_stats (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc);
+
+
+/******************************************************************************/
+/******************************** SHUTDOWN **********************************/
+/******************************************************************************/
+
+
+/**
+ * Shutdown nicely
+ *
+ * @param cls NULL
+ * @param tc the task context
+ */
+static void
+do_shutdown (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
+{
+ struct DLLOperation *dll_op;
+ struct RegexPeer *peer;
+ unsigned int nhost;
+ unsigned int peer_cnt;
+ unsigned int search_str_cnt;
+ char output_buffer[512];
+ size_t size;
+
+ shutdown_task = GNUNET_SCHEDULER_NO_TASK;
+ if (GNUNET_SCHEDULER_NO_TASK != abort_task)
+ GNUNET_SCHEDULER_cancel (abort_task);
+ if (NULL != hc_handles)
+ {
+ for (nhost = 0; nhost < num_hosts; nhost++)
+ if (NULL != hc_handles[nhost])
+ GNUNET_TESTBED_is_host_habitable_cancel (hc_handles[nhost]);
+ GNUNET_free (hc_handles);
+ hc_handles = NULL;
+ }
+ if (GNUNET_SCHEDULER_NO_TASK != register_hosts_task)
+ GNUNET_SCHEDULER_cancel (register_hosts_task);
+
+ for (peer_cnt = 0; peer_cnt < num_peers; peer_cnt++)
+ {
+ peer = &peers[peer_cnt];
+
+ if (GNUNET_YES != peer->search_str_matched && NULL != data_file)
+ {
+ prof_time = GNUNET_TIME_absolute_get_duration (peer->prof_start_time);
+ size =
+ GNUNET_snprintf (output_buffer,
+ sizeof (output_buffer),
+ "%p Search string not found: %s (%d)\n%p On peer: %u (%p)\n%p With policy file: %s\n%p After: %s\n",
+ peer, peer->search_str, peer->search_str_matched,
+ peer, peer->id, peer,
+ peer, peer->policy_file,
+ peer,
+ GNUNET_STRINGS_relative_time_to_string (prof_time,
+ GNUNET_NO));
+ if (size != GNUNET_DISK_file_write (data_file, output_buffer, size))
+ GNUNET_log (GNUNET_ERROR_TYPE_WARNING, "Unable to write to file!\n");
+ }
+
+ if (NULL != peers[peer_cnt].dht_op_handle)
+ GNUNET_TESTBED_operation_done (peers[peer_cnt].dht_op_handle);
+ if (NULL != peers[peer_cnt].stats_op_handle)
+ GNUNET_TESTBED_operation_done (peers[peer_cnt].stats_op_handle);
+ }
+
+ if (NULL != data_file)
+ GNUNET_DISK_file_close (data_file);
+
+ for (search_str_cnt = 0;
+ search_str_cnt < num_search_strings && NULL != search_strings;
+ search_str_cnt++)
+ {
+ GNUNET_free_non_null (search_strings[search_str_cnt]);
+ }
+ GNUNET_free_non_null (search_strings);
+
+ if (NULL != reg_handle)
+ GNUNET_TESTBED_cancel_registration (reg_handle);
+ if (NULL != topology_op)
+ GNUNET_TESTBED_operation_done (topology_op);
+ for (nhost = 0; nhost < num_hosts; nhost++)
+ if (NULL != hosts[nhost])
+ GNUNET_TESTBED_host_destroy (hosts[nhost]);
+ GNUNET_free_non_null (hosts);
+
+ while (NULL != (dll_op = dll_op_head))
+ {
+ GNUNET_TESTBED_operation_done (dll_op->op);
+ GNUNET_CONTAINER_DLL_remove (dll_op_head, dll_op_tail, dll_op);
+ GNUNET_free (dll_op);
+ }
+ if (NULL != mc)
+ GNUNET_TESTBED_controller_disconnect (mc);
+ if (NULL != mc_proc)
+ GNUNET_TESTBED_controller_stop (mc_proc);
+ if (NULL != cfg)
+ GNUNET_CONFIGURATION_destroy (cfg);
+
+ GNUNET_SCHEDULER_shutdown (); /* Stop scheduler to shutdown testbed run */
+}
+
+
+/**
+ * abort task to run on test timed out
+ *
+ * @param cls NULL
+ * @param tc the task context
+ */
+static void
+do_abort (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
+{
+ unsigned long i = (unsigned long) cls;
+
+ GNUNET_log (GNUNET_ERROR_TYPE_WARNING, "Aborting %lu...\n", i);
+ abort_task = GNUNET_SCHEDULER_NO_TASK;
+ result = GNUNET_SYSERR;
+ if (GNUNET_SCHEDULER_NO_TASK != shutdown_task)
+ GNUNET_SCHEDULER_cancel (shutdown_task);
+ shutdown_task = GNUNET_SCHEDULER_add_now (&do_shutdown, NULL);
+}
+
+
+/******************************************************************************/
+/********************* STATISTICS SERVICE CONNECTIONS ***********************/
+/******************************************************************************/
+
+/**
+ * Adapter function called to establish a connection to
+ * statistics service.
+ *
+ * @param cls closure
+ * @param cfg configuration of the peer to connect to; will be available until
+ * GNUNET_TESTBED_operation_done() is called on the operation returned
+ * from GNUNET_TESTBED_service_connect()
+ * @return service handle to return in 'op_result', NULL on error
+ */
+static void *
+stats_ca (void *cls, const struct GNUNET_CONFIGURATION_Handle *cfg)
+{
+ return GNUNET_STATISTICS_create ("<driver>", cfg);
+}
+
+
+/**
+ * Adapter function called to destroy a connection to
+ * statistics service.
+ *
+ * @param cls closure
+ * @param op_result service handle returned from the connect adapter
+ */
+static void
+stats_da (void *cls, void *op_result)
+{
+ struct RegexPeer *peer = cls;
+
+ GNUNET_assert (op_result == peer->stats_handle);
+
+ GNUNET_STATISTICS_destroy (peer->stats_handle, GNUNET_NO);
+ peer->stats_handle = NULL;
+}
+
+
+/**
+ * Process statistic values. Write all values to global 'data_file', if present.
+ *
+ * @param cls closure
+ * @param subsystem name of subsystem that created the statistic
+ * @param name the name of the datum
+ * @param value the current value
+ * @param is_persistent GNUNET_YES if the value is persistent, GNUNET_NO if not
+ * @return GNUNET_OK to continue, GNUNET_SYSERR to abort iteration
+ */
+static int
+stats_iterator (void *cls, const char *subsystem, const char *name,
+ uint64_t value, int is_persistent)
+{
+ struct RegexPeer *peer = cls;
+ char output_buffer[512];
+ size_t size;
+
+ if (NULL == data_file)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_INFO,
+ "%p -> %s [%s]: %llu\n",
+ peer, subsystem, name, value);
+ return GNUNET_OK;
+ }
+ size =
+ GNUNET_snprintf (output_buffer,
+ sizeof (output_buffer),
+ "%p [%s] %llu %s\n",
+ peer,
+ subsystem, value, name);
+ if (size != GNUNET_DISK_file_write (data_file, output_buffer, size))
+ GNUNET_log (GNUNET_ERROR_TYPE_WARNING, "Unable to write to file!\n");
+
+ return GNUNET_OK;
+}
+
+
+/**
+ * Stats callback. Finish the stats testbed operation and when all stats have
+ * been iterated, shutdown the profiler.
+ *
+ * @param cls closure
+ * @param success GNUNET_OK if statistics were
+ * successfully obtained, GNUNET_SYSERR if not.
+ */
+static void
+stats_cb (void *cls,
+ int success)
+{
+ static unsigned int peer_cnt;
+ struct RegexPeer *peer = cls;
+
+ if (GNUNET_OK != success)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "Getting statistics for peer %u failed!\n",
+ peer->id);
+ return;
+ }
+
+ GNUNET_assert (NULL != peer->stats_op_handle);
+
+ GNUNET_TESTBED_operation_done (peer->stats_op_handle);
+ peer->stats_op_handle = NULL;
+
+ peer_cnt++;
+ peer = &peers[peer_cnt];
+
+ if (peer_cnt == num_peers)
+ {
+ struct GNUNET_TIME_Relative delay = { 100 };
+ shutdown_task = GNUNET_SCHEDULER_add_delayed (delay, &do_shutdown, NULL);
+ }
+ else
+ {
+ peer->stats_op_handle =
+ GNUNET_TESTBED_service_connect (NULL,
+ peer->peer_handle,
+ "statistics",
+ &stats_connect_cb,
+ peer,
+ &stats_ca,
+ &stats_da,
+ peer);
+ }
+}
+
+
+/**
+ * Function called by testbed once we are connected to stats
+ * service. Get the statistics for the services of interest.
+ *
+ * @param cls the 'struct RegexPeer' for which we connected to stats
+ * @param op connect operation handle
+ * @param ca_result handle to stats service
+ * @param emsg error message on failure
+ */
+static void
+stats_connect_cb (void *cls,
+ struct GNUNET_TESTBED_Operation *op,
+ void *ca_result,
+ const char *emsg)
+{
+ struct RegexPeer *peer = cls;
+
+ if (NULL == ca_result || NULL != emsg)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "Failed to connect to statistics service on peer %u: %s\n",
+ peer->id, emsg);
+
+ peer->stats_handle = NULL;
+ return;
+ }
+
+ peer->stats_handle = ca_result;
+
+ if (NULL == GNUNET_STATISTICS_get (peer->stats_handle, NULL, NULL,
+ GNUNET_TIME_UNIT_FOREVER_REL,
+ &stats_cb,
+ &stats_iterator, peer))
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "Could not get statistics of peer %u!\n", peer->id);
+ }
+}
+
+
+/**
+ * Task to collect all statistics from all peers, will shutdown the
+ * profiler, when done.
+ *
+ * @param cls NULL
+ * @param tc the task context
+ */
+static void
+do_collect_stats (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
+{
+ struct RegexPeer *peer = &peers[0];
+
+ GNUNET_assert (NULL != peer->peer_handle);
+
+ peer->stats_op_handle =
+ GNUNET_TESTBED_service_connect (NULL,
+ peer->peer_handle,
+ "statistics",
+ &stats_connect_cb,
+ peer,
+ &stats_ca,
+ &stats_da,
+ peer);
+}
+
+
+/******************************************************************************/
+/************************ MESH SERVICE CONNECTIONS **************************/
+/******************************************************************************/
+
+/**
+ * Method called when we've found a peer that announced a regex
+ * that matches our search string. Now get the statistics.
+ *
+ * @param cls Closure provided in GNUNET_REGEX_search.
+ * @param id Peer providing a regex that matches the string.
+ * @param get_path Path of the get request.
+ * @param get_path_length Lenght of get_path.
+ * @param put_path Path of the put request.
+ * @param put_path_length Length of the put_path.
+ */
+static void
+regex_found_handler (void *cls,
+ const struct GNUNET_PeerIdentity *id,
+ const struct GNUNET_PeerIdentity *get_path,
+ unsigned int get_path_length,
+ const struct GNUNET_PeerIdentity *put_path,
+ unsigned int put_path_length)
+{
+ struct RegexPeer *peer = cls;
+ char output_buffer[512];
+ size_t size;
+
+ if (GNUNET_YES == peer->search_str_matched)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
+ "String %s on peer %u already matched!\n",
+ peer->search_str, peer->id);
+ return;
+ }
+
+ peers_found++;
+
+ if (NULL == id)
+ {
+ // FIXME not possible right now
+ GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
+ "String matching timed out for string %s on peer %u (%i/%i)\n",
+ peer->search_str, peer->id, peers_found, num_search_strings);
+
+ printf ("String matching timed out for string %s on peer %u (%i/%i)\n",
+ peer->search_str, peer->id, peers_found, num_search_strings);
+
+ peer->search_str_matched = GNUNET_SYSERR;
+ }
+ else
+ {
+ prof_time = GNUNET_TIME_absolute_get_duration (peer->prof_start_time);
+ GNUNET_log (GNUNET_ERROR_TYPE_INFO,
+ "String %s successfully matched on peer %u after %s (%i/%i)\n",
+ peer->search_str, peer->id, GNUNET_STRINGS_relative_time_to_string (prof_time, GNUNET_NO),
+ peers_found, num_search_strings);
+
+ printf ("String %s successfully matched on peer %u after %s (%i/%i)\n",
+ peer->search_str, peer->id, GNUNET_STRINGS_relative_time_to_string (prof_time, GNUNET_NO),
+ peers_found, num_search_strings);
+ fflush (stdout);
+
+ peer->search_str_matched = GNUNET_YES;
+
+ if (NULL != data_file)
+ {
+ size =
+ GNUNET_snprintf (output_buffer,
+ sizeof (output_buffer),
+ "%p Peer: %u\n%p Host: %s\n%p Policy file: %s\n"
+ "%p Search string: %s\n%p Search duration: %s\n\n",
+ peer, peer->id,
+ peer,
+ GNUNET_TESTBED_host_get_hostname (peer->host_handle),
+ peer, peer->policy_file,
+ peer, peer->search_str,
+ peer,
+ GNUNET_STRINGS_relative_time_to_string (prof_time,
+ GNUNET_NO));
+
+ if (size != GNUNET_DISK_file_write (data_file, output_buffer, size))
+ GNUNET_log (GNUNET_ERROR_TYPE_WARNING, "Unable to write to file!\n");
+ }
+ }
+
+ GNUNET_TESTBED_operation_done (peer->dht_op_handle);
+ peer->dht_op_handle = NULL;
+
+ if (peers_found == num_search_strings)
+ {
+ prof_time = GNUNET_TIME_absolute_get_duration (prof_start_time);
+ GNUNET_log (GNUNET_ERROR_TYPE_INFO,
+ "All strings successfully matched in %s\n",
+ GNUNET_STRINGS_relative_time_to_string (prof_time, GNUNET_NO));
+ printf ("All strings successfully matched.\n");
+ fflush (stdout);
+
+ if (GNUNET_SCHEDULER_NO_TASK != search_timeout_task)
+ GNUNET_SCHEDULER_cancel (search_timeout_task);
+
+ printf ("Collecting stats and shutting down.\n");
+ GNUNET_SCHEDULER_add_now (&do_collect_stats, NULL);
+ }
+}
+
+
+/**
+ * Connect by string timeout task. This will cancel the profiler after the
+ * specified timeout 'search_timeout'.
+ *
+ * @param cls NULL
+ * @param tc the task context
+ */
+static void
+do_connect_by_string_timeout (void *cls,
+ const struct GNUNET_SCHEDULER_TaskContext * tc)
+{
+ GNUNET_log (GNUNET_ERROR_TYPE_INFO,
+ "Finding matches to all strings did not succeed after %s.\n",
+ GNUNET_STRINGS_relative_time_to_string (search_timeout, GNUNET_NO));
+ GNUNET_log (GNUNET_ERROR_TYPE_INFO,
+ "Found %i of %i strings\n", peers_found, num_search_strings);
+
+ printf ("Search timed out after %s. Collecting stats and shutting down.\n",
+ GNUNET_STRINGS_relative_time_to_string (search_timeout, GNUNET_NO));
+ fflush (stdout);
+
+ GNUNET_SCHEDULER_add_now (&do_collect_stats, NULL);
+}
+
+
+/**
+ * Connect by string task that is run to search for a string in the
+ * NFA. It first connects to the mesh service and when a connection is
+ * established it starts to search for the string.
+ *
+ * @param cls NULL
+ * @param tc the task context
+ */
+static void
+do_connect_by_string (void *cls,
+ const struct GNUNET_SCHEDULER_TaskContext * tc)
+{
+ printf ("Starting string search.\n");
+ fflush (stdout);
+
+ peers[0].search_str = search_strings[0];
+ peers[0].search_str_matched = GNUNET_NO;
+
+ GNUNET_log (GNUNET_ERROR_TYPE_INFO,
+ "Searching for string \"%s\" on peer %d with file %s\n",
+ peers[0].search_str, 0, peers[0].policy_file);
+
+ /* First connect to mesh service, then search for string. Next
+ connect will be in mesh_connect_cb */
+ peers[0].dht_op_handle =
+ GNUNET_TESTBED_service_connect (NULL,
+ peers[0].peer_handle,
+ "dht",
+ &dht_connect_cb,
+ &peers[0],
+ &dht_ca,
+ &dht_da,
+ &peers[0]);
+
+ search_timeout_task = GNUNET_SCHEDULER_add_delayed (search_timeout,
+ &do_connect_by_string_timeout, NULL);
+}
+
+/**
+ * Start searching for the next string in the DHT.
+ *
+ * @param cls Index of the next peer in the peers array.
+ * @param tc TaskContext.
+ */
+void
+find_next_string (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
+{
+ long next_p = (long) cls;
+
+ if (0 != (tc->reason & GNUNET_SCHEDULER_REASON_SHUTDOWN))
+ return;
+
+ GNUNET_log (GNUNET_ERROR_TYPE_INFO,
+ "Searching for string \"%s\" on peer %d with file %s\n",
+ peers[next_p].search_str, next_p, peers[next_p].policy_file);
+
+ /* FIXME
+ * dont connect to a new dht for each peer, we might want to seach for n
+ * strings on m peers where n > m
+ */
+ peers[next_p].dht_op_handle =
+ GNUNET_TESTBED_service_connect (NULL,
+ peers[next_p].peer_handle,
+ "dht",
+ &dht_connect_cb,
+ &peers[next_p],
+ &dht_ca,
+ &dht_da,
+ &peers[next_p]);
+}
+
+/**
+ * DHT connect callback. Called when we are connected to the dht service for
+ * the peer in 'cls'. If successfull we connect to the stats service of this
+ * peer and then try to match the search string of this peer.
+ *
+ * @param cls internal peer id.
+ * @param op operation handle.
+ * @param ca_result connect adapter result.
+ * @param emsg error message.
+ */
+static void
+dht_connect_cb (void *cls, struct GNUNET_TESTBED_Operation *op,
+ void *ca_result, const char *emsg)
+{
+ struct RegexPeer *peer = (struct RegexPeer *) cls;
+ static unsigned int peer_cnt;
+ unsigned int next_p;
+
+ if (NULL != emsg || NULL == op || NULL == ca_result)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "DHT connect failed: %s\n", emsg);
+ GNUNET_abort ();
+ }
+
+ GNUNET_assert (NULL != peer->dht_handle);
+ GNUNET_assert (peer->dht_op_handle == op);
+ GNUNET_assert (peer->dht_handle == ca_result);
+
+ peer->search_str_matched = GNUNET_NO;
+ peer->search_handle = GNUNET_REGEX_search (peer->dht_handle,
+ peer->search_str,
+ &regex_found_handler, peer,
+ NULL);
+ peer->prof_start_time = GNUNET_TIME_absolute_get ();
+
+ if (peer_cnt < (num_search_strings - 1))
+ {
+ if (GNUNET_YES == no_distributed_search)
+ next_p = 0;
+ else
+ next_p = (++peer_cnt % num_peers);
+
+ peers[next_p].search_str = search_strings[next_p];
+ peers[next_p].search_str_matched = GNUNET_NO;
+
+ /* Don't start all searches at once */
+ /* TODO add some intelligence to the timeout */
+ GNUNET_SCHEDULER_add_delayed (GNUNET_TIME_UNIT_SECONDS,
+ &find_next_string,
+ (void *) (long) next_p);
+ }
+}
+
+
+/**
+ * DHT connect adapter. Opens a connection to the dht service.
+ *
+ * @param cls Closure (peer).
+ * @param cfg Configuration handle.
+ *
+ * @return
+ */
+static void *
+dht_ca (void *cls, const struct GNUNET_CONFIGURATION_Handle *cfg)
+{
+ struct RegexPeer *peer = cls;
+
+ peer->dht_handle = GNUNET_DHT_connect (cfg, 32);
+
+ return peer->dht_handle;
+}
+
+
+/**
+ * Adapter function called to destroy a connection to the dht service.
+ *
+ * @param cls Closure (peer).
+ * @param op_result Service handle returned from the connect adapter.
+ */
+static void
+dht_da (void *cls, void *op_result)
+{
+ struct RegexPeer *peer = (struct RegexPeer *) cls;
+
+ GNUNET_assert (peer->dht_handle == op_result);
+
+ if (NULL != peer->search_handle)
+ {
+ GNUNET_REGEX_search_cancel (peer->search_handle);
+ peer->search_handle = NULL;
+ }
+
+ if (NULL != peer->dht_handle)
+ {
+ GNUNET_DHT_disconnect (peer->dht_handle);
+ peer->dht_handle = NULL;
+ }
+}
+
+
+/******************************************************************************/
+/*************************** TESTBED PEER SETUP *****************************/
+/******************************************************************************/
+
+
+/**
+ * Configure the peer overlay topology.
+ *
+ * @param cls NULL
+ * @param tc the task context
+ */
+static void
+do_configure_topology (void *cls,
+ const struct GNUNET_SCHEDULER_TaskContext * tc)
+{
+ /*
+ if (0 == linking_factor)
+ linking_factor = 1;
+ num_links = linking_factor * num_peers;
+ */
+ /* num_links = num_peers - 1; */
+ num_links = linking_factor;
+
+ /* Do overlay connect */
+ prof_start_time = GNUNET_TIME_absolute_get ();
+ topology_op =
+ GNUNET_TESTBED_overlay_configure_topology (NULL, num_peers, peer_handles,
+ NULL,
+ NULL,
+ NULL,
+ GNUNET_TESTBED_TOPOLOGY_ERDOS_RENYI,
+ num_links,
+ GNUNET_TESTBED_TOPOLOGY_RETRY_CNT,
+ (unsigned int) 0,
+ GNUNET_TESTBED_TOPOLOGY_OPTION_END);
+ if (NULL == topology_op)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "Cannot create topology, op handle was NULL\n");
+ GNUNET_assert (0);
+ }
+}
+
+
+/**
+ * Functions of this signature are called when a peer has been successfully
+ * started or stopped.
+ *
+ * @param cls the closure from GNUNET_TESTBED_peer_start/stop()
+ * @param emsg NULL on success; otherwise an error description
+ */
+static void
+peer_churn_cb (void *cls, const char *emsg)
+{
+ struct DLLOperation *dll_op = cls;
+ struct GNUNET_TESTBED_Operation *op;
+ static unsigned int started_peers;
+ unsigned int peer_cnt;
+
+ op = dll_op->op;
+ GNUNET_CONTAINER_DLL_remove (dll_op_head, dll_op_tail, dll_op);
+ GNUNET_free (dll_op);
+ if (NULL != emsg)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
+ _("An operation has failed while starting peers\n"));
+ GNUNET_TESTBED_operation_done (op);
+ if (GNUNET_SCHEDULER_NO_TASK != abort_task)
+ GNUNET_SCHEDULER_cancel (abort_task);
+ abort_task = GNUNET_SCHEDULER_add_now (&do_abort, (void*) __LINE__);
+ return;
+ }
+ GNUNET_TESTBED_operation_done (op);
+ if (++started_peers == num_peers)
+ {
+ prof_time = GNUNET_TIME_absolute_get_duration (prof_start_time);
+ GNUNET_log (GNUNET_ERROR_TYPE_INFO,
+ "All peers started successfully in %s\n",
+ GNUNET_STRINGS_relative_time_to_string (prof_time, GNUNET_NO));
+ result = GNUNET_OK;
+
+ peer_handles = GNUNET_malloc (sizeof (struct GNUNET_TESTBED_Peer *) * num_peers);
+ for (peer_cnt = 0; peer_cnt < num_peers; peer_cnt++)
+ peer_handles[peer_cnt] = peers[peer_cnt].peer_handle;
+
+ state = STATE_PEERS_LINKING;
+ GNUNET_SCHEDULER_add_now (&do_configure_topology, NULL);
+ }
+}
+
+
+/**
+ * Functions of this signature are called when a peer has been successfully
+ * created
+ *
+ * @param cls the closure from GNUNET_TESTBED_peer_create()
+ * @param peer the handle for the created peer; NULL on any error during
+ * creation
+ * @param emsg NULL if peer is not NULL; else MAY contain the error description
+ */
+static void
+peer_create_cb (void *cls, struct GNUNET_TESTBED_Peer *peer, const char *emsg)
+{
+ struct DLLOperation *dll_op = cls;
+ struct RegexPeer *peer_ptr;
+ static unsigned int created_peers;
+ unsigned int peer_cnt;
+
+ if (NULL != emsg)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
+ _("Creating a peer failed. Error: %s\n"), emsg);
+ GNUNET_TESTBED_operation_done (dll_op->op);
+ GNUNET_CONTAINER_DLL_remove (dll_op_head, dll_op_tail, dll_op);
+ GNUNET_free (dll_op);
+ if (GNUNET_SCHEDULER_NO_TASK != abort_task)
+ GNUNET_SCHEDULER_cancel (abort_task);
+ abort_task = GNUNET_SCHEDULER_add_now (&do_abort, (void*) __LINE__);
+ return;
+ }
+
+ peer_ptr = dll_op->cls;
+ GNUNET_assert (NULL == peer_ptr->peer_handle);
+ GNUNET_CONFIGURATION_destroy (peer_ptr->cfg);
+ peer_ptr->cfg = NULL;
+ peer_ptr->peer_handle = peer;
+ GNUNET_TESTBED_operation_done (dll_op->op);
+ GNUNET_CONTAINER_DLL_remove (dll_op_head, dll_op_tail, dll_op);
+ GNUNET_free (dll_op);
+
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Peer %i created on host %s\n",
+ peer_ptr->id,
+ GNUNET_TESTBED_host_get_hostname (peer_ptr->host_handle));
+
+ if (++created_peers == num_peers)
+ {
+ prof_time = GNUNET_TIME_absolute_get_duration (prof_start_time);
+ GNUNET_log (GNUNET_ERROR_TYPE_INFO,
+ "All peers created successfully in %s\n",
+ GNUNET_STRINGS_relative_time_to_string (prof_time, GNUNET_NO));
+ /* Now peers are to be started */
+ state = STATE_PEERS_STARTING;
+ prof_start_time = GNUNET_TIME_absolute_get ();
+ for (peer_cnt = 0; peer_cnt < num_peers; peer_cnt++)
+ {
+ dll_op = GNUNET_malloc (sizeof (struct DLLOperation));
+ dll_op->op = GNUNET_TESTBED_peer_start (dll_op, peers[peer_cnt].peer_handle,
+ &peer_churn_cb, dll_op);
+ GNUNET_CONTAINER_DLL_insert_tail (dll_op_head, dll_op_tail, dll_op);
+ }
+ }
+}
+
+
+/**
+ * Function called with a filename for each file in the policy directory. Create
+ * a peer for each filename and update the peer's configuration to include the
+ * max_path_compression specified as a command line argument as well as the
+ * policy_file for this peer. The gnunet-service-regexprofiler service is
+ * automatically started on this peer. The service reads the configurration and
+ * announces the regexes stored in the policy file 'filename'.
+ *
+ * @param cls closure
+ * @param filename complete filename (absolute path)
+ * @return GNUNET_OK to continue to iterate,
+ * GNUNET_SYSERR to abort iteration with error!
+ */
+static int
+policy_filename_cb (void *cls, const char *filename)
+{
+ static unsigned int peer_cnt;
+ struct DLLOperation *dll_op;
+ struct RegexPeer *peer = &peers[peer_cnt];
+
+ GNUNET_assert (NULL != peer);
+
+ peer->policy_file = GNUNET_strdup (filename);
+
+ GNUNET_log (GNUNET_ERROR_TYPE_INFO, "Creating peer %i on host %s for policy file %s\n",
+ peer->id,
+ GNUNET_TESTBED_host_get_hostname (peer->host_handle),
+ filename);
+
+ /* Set configuration options specific for this peer
+ (max_path_compression and policy_file */
+ peer->cfg = GNUNET_CONFIGURATION_dup (cfg);
+ GNUNET_CONFIGURATION_set_value_number (peer->cfg, "REGEXPROFILER",
+ "MAX_PATH_COMPRESSION",
+ (unsigned long long)max_path_compression);
+ GNUNET_CONFIGURATION_set_value_string (peer->cfg, "REGEXPROFILER",
+ "POLICY_FILE", filename);
+
+ dll_op = GNUNET_malloc (sizeof (struct DLLOperation));
+ dll_op->cls = &peers[peer_cnt];
+ dll_op->op = GNUNET_TESTBED_peer_create (mc,
+ peer->host_handle,
+ peer->cfg,
+ &peer_create_cb,
+ dll_op);
+ GNUNET_CONTAINER_DLL_insert_tail (dll_op_head, dll_op_tail, dll_op);
+
+ peer_cnt++;
+
+ return GNUNET_OK;
+}
+
+
+/**
+ * Controller event callback.
+ *
+ * @param cls NULL
+ * @param event the controller event
+ */
+static void
+controller_event_cb (void *cls,
+ const struct GNUNET_TESTBED_EventInformation *event)
+{
+ struct DLLOperation *dll_op;
+ struct GNUNET_TESTBED_Operation *op;
+ int ret;
+
+ switch (state)
+ {
+ case STATE_SLAVES_STARTING:
+ switch (event->type)
+ {
+ case GNUNET_TESTBED_ET_OPERATION_FINISHED:
+ {
+ static unsigned int slaves_started;
+ unsigned int peer_cnt;
+
+ dll_op = event->details.operation_finished.op_cls;
+ GNUNET_CONTAINER_DLL_remove (dll_op_head, dll_op_tail, dll_op);
+ GNUNET_free (dll_op);
+ op = event->details.operation_finished.operation;
+ if (NULL != event->details.operation_finished.emsg)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
+ _("An operation has failed while starting slaves\n"));
+ GNUNET_TESTBED_operation_done (op);
+ if (GNUNET_SCHEDULER_NO_TASK != abort_task)
+ GNUNET_SCHEDULER_cancel (abort_task);
+ abort_task = GNUNET_SCHEDULER_add_now (&do_abort, (void*) __LINE__);
+ return;
+ }
+ GNUNET_TESTBED_operation_done (op);
+ /* Proceed to start peers */
+ if (++slaves_started == num_hosts - 1)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_INFO,
+ "All slaves started successfully\n");
+
+ state = STATE_PEERS_CREATING;
+ prof_start_time = GNUNET_TIME_absolute_get ();
+
+ if (-1 == (ret = GNUNET_DISK_directory_scan (policy_dir,
+ NULL,
+ NULL)))
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ _("No files found in `%s'\n"),
+ policy_dir);
+ GNUNET_SCHEDULER_shutdown ();
+ return;
+ }
+ num_peers = (unsigned int) ret;
+ peers = GNUNET_malloc (sizeof (struct RegexPeer) * num_peers);
+
+ /* Initialize peers */
+ for (peer_cnt = 0; peer_cnt < num_peers; peer_cnt++)
+ {
+ struct RegexPeer *peer = &peers[peer_cnt];
+ peer->id = peer_cnt;
+ peer->policy_file = NULL;
+ /* Do not start peers on hosts[0] (master controller) */
+ peer->host_handle = hosts[1 + (peer_cnt % (num_hosts -1))];
+ peer->dht_handle = NULL;
+ peer->search_handle = NULL;
+ peer->stats_handle = NULL;
+ peer->stats_op_handle = NULL;
+ peer->search_str = NULL;
+ peer->search_str_matched = GNUNET_NO;
+ }
+
+ GNUNET_DISK_directory_scan (policy_dir,
+ &policy_filename_cb,
+ NULL);
+ }
+ }
+ break;
+ default:
+ GNUNET_assert (0);
+ }
+ break;
+ case STATE_PEERS_STARTING:
+ switch (event->type)
+ {
+ case GNUNET_TESTBED_ET_OPERATION_FINISHED:
+ /* Control reaches here when peer start fails */
+ case GNUNET_TESTBED_ET_PEER_START:
+ /* we handle peer starts in peer_churn_cb */
+ break;
+ default:
+ GNUNET_assert (0);
+ }
+ break;
+ case STATE_PEERS_LINKING:
+ switch (event->type)
+ {
+ static unsigned int established_links;
+ case GNUNET_TESTBED_ET_OPERATION_FINISHED:
+ /* Control reaches here when a peer linking operation fails */
+ if (NULL != event->details.operation_finished.emsg)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
+ _("An operation has failed while linking\n"));
+ printf ("F");
+ fflush (stdout);
+ retry_links++;
+ }
+ /* We do no retries, consider this link as established */
+ /* break; */
+ case GNUNET_TESTBED_ET_CONNECT:
+ {
+ char output_buffer[512];
+ size_t size;
+
+ if (0 == established_links)
+ printf ("Establishing links .");
+ else
+ {
+ printf (".");
+ fflush (stdout);
+ }
+ if (++established_links == num_links)
+ {
+ fflush (stdout);
+ prof_time = GNUNET_TIME_absolute_get_duration (prof_start_time);
+ GNUNET_log (GNUNET_ERROR_TYPE_INFO,
+ "%u links established in %s\n",
+ num_links,
+ GNUNET_STRINGS_relative_time_to_string (prof_time, GNUNET_NO));
+ result = GNUNET_OK;
+ GNUNET_free (peer_handles);
+
+ if (NULL != data_file)
+ {
+ size =
+ GNUNET_snprintf (output_buffer,
+ sizeof (output_buffer),
+ "# of peers: %u\n# of links established: %u\n"
+ "Time to establish links: %s\nLinking failures: %u\n"
+ "path compression length: %u\n# of search strings: %u\n",
+ num_peers,
+ (established_links - cont_fails),
+ GNUNET_STRINGS_relative_time_to_string (prof_time, GNUNET_NO),
+ cont_fails,
+ max_path_compression,
+ num_search_strings);
+
+ if (size != GNUNET_DISK_file_write (data_file, output_buffer, size))
+ GNUNET_log (GNUNET_ERROR_TYPE_WARNING, "Unable to write to file!\n");
+ }
+
+ printf ("\nWaiting %s before starting to search.\n",
+ GNUNET_STRINGS_relative_time_to_string (search_delay, GNUNET_YES));
+ fflush (stdout);
+
+ GNUNET_log (GNUNET_ERROR_TYPE_INFO,
+ "Waiting %s before starting to search.\n",
+ GNUNET_STRINGS_relative_time_to_string (search_delay, GNUNET_NO));
+
+ state = STATE_SEARCH_REGEX;
+
+ search_task = GNUNET_SCHEDULER_add_delayed (search_delay,
+ &do_connect_by_string, NULL);
+ }
+ }
+ break;
+ default:
+ GNUNET_assert (0);
+ }
+ break;
+ case STATE_SEARCH_REGEX:
+ {
+ /* Handled in service connect callback */
+ break;
+ }
+ default:
+ switch (state)
+ {
+ case STATE_PEERS_CREATING:
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Failed to create peer\n");
+ break;
+ default:
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "Unexpected controller_cb with state %i!\n", state);
+ }
+ GNUNET_assert (0);
+ }
+}
+
+
+/**
+ * Task to register all hosts available in the global host list.
+ *
+ * @param cls NULL
+ * @param tc the scheduler task context
+ */
+static void
+register_hosts (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc);
+
+
+/**
+ * Callback which will be called to after a host registration succeeded or failed
+ *
+ * @param cls the closure
+ * @param emsg the error message; NULL if host registration is successful
+ */
+static void
+host_registration_completion (void *cls, const char *emsg)
+{
+ reg_handle = NULL;
+ if (NULL != emsg)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
+ _("Host registration failed for a host. Error: %s\n"), emsg);
+ if (GNUNET_SCHEDULER_NO_TASK != abort_task)
+ GNUNET_SCHEDULER_cancel (abort_task);
+ abort_task = GNUNET_SCHEDULER_add_now (&do_abort, (void*) __LINE__);
+ return;
+ }
+ register_hosts_task = GNUNET_SCHEDULER_add_now (&register_hosts, NULL);
+}
+
+
+/**
+ * Task to register all hosts available in the global host list.
+ *
+ * @param cls NULL
+ * @param tc the scheduler task context
+ */
+static void
+register_hosts (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
+{
+ struct DLLOperation *dll_op;
+ static unsigned int reg_host;
+ unsigned int slave;
+
+ register_hosts_task = GNUNET_SCHEDULER_NO_TASK;
+ if (reg_host == num_hosts - 1)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_INFO,
+ "All hosts successfully registered\n");
+ /* Start slaves */
+ state = STATE_SLAVES_STARTING;
+ for (slave = 1; slave < num_hosts; slave++)
+ {
+ dll_op = GNUNET_malloc (sizeof (struct DLLOperation));
+ dll_op->op = GNUNET_TESTBED_controller_link (dll_op,
+ mc,
+ hosts[slave],
+ hosts[0],
+ cfg,
+ GNUNET_YES);
+ GNUNET_CONTAINER_DLL_insert_tail (dll_op_head, dll_op_tail, dll_op);
+ }
+ return;
+ }
+ reg_handle = GNUNET_TESTBED_register_host (mc, hosts[++reg_host],
+ host_registration_completion,
+ NULL);
+}
+
+
+/**
+ * Callback to signal successfull startup of the controller process.
+ *
+ * @param cls the closure from GNUNET_TESTBED_controller_start()
+ * @param config the configuration with which the controller has been started;
+ * NULL if status is not GNUNET_OK
+ * @param status GNUNET_OK if the startup is successfull; GNUNET_SYSERR if not,
+ * GNUNET_TESTBED_controller_stop() shouldn't be called in this case
+ */
+static void
+status_cb (void *cls, const struct GNUNET_CONFIGURATION_Handle *config, int status)
+{
+ if (GNUNET_SCHEDULER_NO_TASK != abort_task)
+ GNUNET_SCHEDULER_cancel (abort_task);
+ if (GNUNET_OK != status)
+ {
+ mc_proc = NULL;
+ printf("CRAPPP\n");
+ abort_task = GNUNET_SCHEDULER_add_now (&do_abort, (void*) __LINE__);
+ return;
+ }
+ event_mask = 0;
+ event_mask |= (1LL << GNUNET_TESTBED_ET_PEER_START);
+ event_mask |= (1LL << GNUNET_TESTBED_ET_PEER_STOP);
+ event_mask |= (1LL << GNUNET_TESTBED_ET_CONNECT);
+ event_mask |= (1LL << GNUNET_TESTBED_ET_DISCONNECT);
+ event_mask |= (1LL << GNUNET_TESTBED_ET_OPERATION_FINISHED);
+ mc = GNUNET_TESTBED_controller_connect (config, hosts[0], event_mask,
+ &controller_event_cb, NULL);
+ if (NULL == mc)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
+ _("Unable to connect to master controller -- Check config\n"));
+ abort_task = GNUNET_SCHEDULER_add_now (&do_abort, (void*) __LINE__);
+ return;
+ }
+ register_hosts_task = GNUNET_SCHEDULER_add_now (&register_hosts, NULL);
+ abort_task = GNUNET_SCHEDULER_add_delayed (GNUNET_TIME_UNIT_FOREVER_REL,
+ &do_abort, (void*) __LINE__);
+}
+
+
+/**
+ * Load search strings from given filename. One search string per line.
+ *
+ * @param filename filename of the file containing the search strings.
+ * @param strings set of strings loaded from file. Caller needs to free this
+ * if number returned is greater than zero.
+ * @param limit upper limit on the number of strings read from the file
+ * @return number of strings found in the file. GNUNET_SYSERR on error.
+ */
+static int
+load_search_strings (const char *filename, char ***strings, unsigned int limit)
+{
+ char *data;
+ char *buf;
+ uint64_t filesize;
+ unsigned int offset;
+ int str_cnt;
+ unsigned int i;
+
+ if (NULL == filename)
+ {
+ return GNUNET_SYSERR;
+ }
+
+ if (GNUNET_YES != GNUNET_DISK_file_test (filename))
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
+ "Could not find search strings file %s\n", filename);
+ return GNUNET_SYSERR;
+ }
+ if (GNUNET_OK != GNUNET_DISK_file_size (filename, &filesize, GNUNET_YES, GNUNET_YES))
+ filesize = 0;
+ if (0 == filesize)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_WARNING, "Search strings file %s is empty.\n", filename);
+ return GNUNET_SYSERR;
+ }
+ data = GNUNET_malloc (filesize);
+ if (filesize != GNUNET_DISK_fn_read (filename, data, filesize))
+ {
+ GNUNET_free (data);
+ GNUNET_log (GNUNET_ERROR_TYPE_WARNING, "Could not read search strings file %s.\n",
+ filename);
+ return GNUNET_SYSERR;
+ }
+ buf = data;
+ offset = 0;
+ str_cnt = 0;
+ while (offset < (filesize - 1) && str_cnt < limit)
+ {
+ offset++;
+ if (((data[offset] == '\n')) && (buf != &data[offset]))
+ {
+ data[offset] = '\0';
+ str_cnt++;
+ buf = &data[offset + 1];
+ }
+ else if ((data[offset] == '\n') || (data[offset] == '\0'))
+ buf = &data[offset + 1];
+ }
+ *strings = GNUNET_malloc (sizeof (char *) * str_cnt);
+ offset = 0;
+ for (i = 0; i < str_cnt; i++)
+ {
+ GNUNET_asprintf (&(*strings)[i], "%s%s", regex_prefix, &data[offset]);
+ offset += strlen (&data[offset]) + 1;
+ }
+ GNUNET_free (data);
+ return str_cnt;
+}
+
+
+/**
+ * Callbacks of this type are called by GNUNET_TESTBED_is_host_habitable to
+ * inform whether the given host is habitable or not. The Handle returned by
+ * GNUNET_TESTBED_is_host_habitable() is invalid after this callback is called
+ *
+ * @param cls NULL
+ * @param host the host whose status is being reported; will be NULL if the host
+ * given to GNUNET_TESTBED_is_host_habitable() is NULL
+ * @param status GNUNET_YES if it is habitable; GNUNET_NO if not
+ */
+static void
+host_habitable_cb (void *cls, const struct GNUNET_TESTBED_Host *host, int status)
+{
+ struct GNUNET_TESTBED_HostHabitableCheckHandle **hc_handle = cls;
+ static unsigned int hosts_checked;
+
+ *hc_handle = NULL;
+ if (GNUNET_NO == status)
+ {
+ if ((NULL != host) && (NULL != GNUNET_TESTBED_host_get_hostname (host)))
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR, _("Host %s cannot start testbed\n"),
+ GNUNET_TESTBED_host_get_hostname (host));
+ else
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR, _("Testbed cannot be started on localhost\n"));
+ GNUNET_SCHEDULER_cancel (abort_task);
+ abort_task = GNUNET_SCHEDULER_add_now (&do_abort, (void*) __LINE__);
+ return;
+ }
+ hosts_checked++;
+ /* printf (_("\rChecked %u hosts"), hosts_checked); */
+ /* fflush (stdout); */
+ if (hosts_checked < num_hosts)
+ return;
+ /* printf (_("\nAll hosts can start testbed. Creating peers\n")); */
+ GNUNET_free (hc_handles);
+ hc_handles = NULL;
+ mc_proc =
+ GNUNET_TESTBED_controller_start (GNUNET_TESTBED_host_get_hostname
+ (hosts[0]),
+ hosts[0],
+ cfg,
+ status_cb,
+ NULL);
+}
+
+
+/**
+ * Main function that will be run by the scheduler.
+ *
+ * @param cls closure
+ * @param args remaining command-line arguments
+ * @param cfgfile name of the configuration file used (for saving, can be NULL!)
+ * @param config configuration
+ */
+static void
+run (void *cls, char *const *args, const char *cfgfile,
+ const struct GNUNET_CONFIGURATION_Handle *config)
+{
+ unsigned int nhost;
+ unsigned int nsearchstrs;
+
+ if (NULL == args[0])
+ {
+ fprintf (stderr, _("No hosts-file specified on command line. Exiting.\n"));
+ return;
+ }
+ if (NULL == args[1])
+ {
+ fprintf (stderr, _("No policy directory specified on command line. Exiting.\n"));
+ return;
+ }
+ num_hosts = GNUNET_TESTBED_hosts_load_from_file (args[0], &hosts);
+ if (0 == num_hosts)
+ {
+ fprintf (stderr, _("No hosts loaded. Need at least one host\n"));
+ return;
+ }
+ printf (_("Checking whether given hosts can start testbed. Please wait\n"));
+ hc_handles = GNUNET_malloc (sizeof (struct
+ GNUNET_TESTBED_HostHabitableCheckHandle *)
+ * num_hosts);
+ for (nhost = 0; nhost < num_hosts; nhost++)
+ {
+ if (NULL == (hc_handles[nhost] = GNUNET_TESTBED_is_host_habitable (hosts[nhost], config,
+ &host_habitable_cb,
+ &hc_handles[nhost])))
+ {
+ GNUNET_break (0);
+ for (nhost = 0; nhost < num_hosts; nhost++)
+ if (NULL != hc_handles[nhost])
+ GNUNET_TESTBED_is_host_habitable_cancel (hc_handles[nhost]);
+ GNUNET_free (hc_handles);
+ hc_handles = NULL;
+ break;
+ }
+ }
+ if (num_hosts != nhost)
+ {
+ fprintf (stderr, _("Exiting\n"));
+ shutdown_task = GNUNET_SCHEDULER_add_now (&do_shutdown, NULL);
+ return;
+ }
+ if (NULL == config)
+ {
+ fprintf (stderr, _("No configuration file given. Exiting\n"));
+ shutdown_task = GNUNET_SCHEDULER_add_now (&do_shutdown, NULL);
+ return;
+ }
+
+ if (GNUNET_OK !=
+ GNUNET_CONFIGURATION_get_value_string (config, "REGEXPROFILER", "REGEX_PREFIX",
+ &regex_prefix))
+ {
+ fprintf (stderr, _("Configuration option (regex_prefix) missing. Exiting\n"));
+ shutdown_task = GNUNET_SCHEDULER_add_now (&do_shutdown, NULL);
+ return;
+ }
+
+ if ( (NULL != data_filename) &&
+ (NULL == (data_file =
+ GNUNET_DISK_file_open (data_filename,
+ GNUNET_DISK_OPEN_READWRITE |
+ GNUNET_DISK_OPEN_TRUNCATE |
+ GNUNET_DISK_OPEN_CREATE,
+ GNUNET_DISK_PERM_USER_READ |
+ GNUNET_DISK_PERM_USER_WRITE))) )
+ GNUNET_log_strerror_file (GNUNET_ERROR_TYPE_ERROR,
+ "open",
+ data_filename);
+ if (GNUNET_YES != GNUNET_DISK_directory_test (args[1], GNUNET_YES))
+ {
+ fprintf (stderr, _("Specified policies directory does not exist. Exiting.\n"));
+ shutdown_task = GNUNET_SCHEDULER_add_now (&do_shutdown, NULL);
+ return;
+ }
+ policy_dir = args[1];
+ if (GNUNET_YES != GNUNET_DISK_file_test (args[2]))
+ {
+ fprintf (stderr, _("No search strings file given. Exiting.\n"));
+ shutdown_task = GNUNET_SCHEDULER_add_now (&do_shutdown, NULL);
+ return;
+ }
+ nsearchstrs = load_search_strings (args[2], &search_strings, num_search_strings);
+ if (num_search_strings != nsearchstrs)
+ {
+ num_search_strings = nsearchstrs;
+ fprintf (stderr, _("Error loading search strings. Given file does not contain enough strings. Exiting.\n"));
+ shutdown_task = GNUNET_SCHEDULER_add_now (&do_shutdown, NULL);
+ return;
+ }
+ if (0 >= num_search_strings || NULL == search_strings)
+ {
+ fprintf (stderr, _("Error loading search strings. Exiting.\n"));
+ shutdown_task = GNUNET_SCHEDULER_add_now (&do_shutdown, NULL);
+ return;
+ }
+ unsigned int i;
+ for (i = 0; i < num_search_strings; i++)
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "search string: %s\n", search_strings[i]);
+ cfg = GNUNET_CONFIGURATION_dup (config);
+ abort_task =
+ GNUNET_SCHEDULER_add_delayed (GNUNET_TIME_relative_multiply
+ (GNUNET_TIME_UNIT_SECONDS, 5), &do_abort,
+ (void*) __LINE__);
+}
+
+
+/**
+ * Main function.
+ *
+ * @param argc argument count
+ * @param argv argument values
+ * @return 0 on success
+ */
+int
+main (int argc, char *const *argv)
+{
+ static const struct GNUNET_GETOPT_CommandLineOption options[] = {
+ {'d', "details", "FILENAME",
+ gettext_noop ("name of the file for writing statistics"),
+ 1, &GNUNET_GETOPT_set_string, &data_filename},
+ {'n', "num-links", "COUNT",
+ gettext_noop ("create COUNT number of random links between peers"),
+ GNUNET_YES, &GNUNET_GETOPT_set_uint, &linking_factor },
+ {'t', "matching-timeout", "TIMEOUT",
+ gettext_noop ("wait TIMEOUT before considering a string match as failed"),
+ GNUNET_YES, &GNUNET_GETOPT_set_relative_time, &search_timeout },
+ {'s', "search-delay", "DELAY",
+ gettext_noop ("wait DELAY before starting string search"),
+ GNUNET_YES, &GNUNET_GETOPT_set_relative_time, &search_delay },
+ {'a', "num-search-strings", "COUNT",
+ gettext_noop ("number of search strings to read from search strings file"),
+ GNUNET_YES, &GNUNET_GETOPT_set_uint, &num_search_strings },
+ {'p', "max-path-compression", "MAX_PATH_COMPRESSION",
+ gettext_noop ("maximum path compression length"),
+ 1, &GNUNET_GETOPT_set_uint, &max_path_compression},
+ {'i', "no-distributed-search", "",
+ gettext_noop ("if this option is set, only one peer is responsible for searching all strings"),
+ 0, &GNUNET_GETOPT_set_one, &no_distributed_search},
+ GNUNET_GETOPT_OPTION_END
+ };
+ int ret;
+
+ if (GNUNET_OK != GNUNET_STRINGS_get_utf8_args (argc, argv, &argc, &argv))
+ return 2;
+
+ result = GNUNET_SYSERR;
+ ret =
+ GNUNET_PROGRAM_run (argc, argv,
+ "gnunet-regex-profiler [OPTIONS] hosts-file policy-dir search-strings-file",
+ _("Profiler for regex"),
+ options, &run, NULL);
+
+ if (GNUNET_OK != ret)
+ return ret;
+ if (GNUNET_OK != result)
+ return 1;
+ return 0;
+}
diff --git a/src/regex/gnunet-regex-simulation-profiler.c b/src/regex/gnunet-regex-simulation-profiler.c
new file mode 100644
index 0000000..32b09ea
--- /dev/null
+++ b/src/regex/gnunet-regex-simulation-profiler.c
@@ -0,0 +1,711 @@
+/*
+ This file is part of GNUnet.
+ (C) 2011, 2012 Christian Grothoff (and other contributing authors)
+
+ GNUnet is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3, or (at your
+ option) any later version.
+
+ GNUnet is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GNUnet; see the file COPYING. If not, write to the
+ Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA.
+*/
+
+/**
+ * @file regex/gnunet-regex-simulation-profiler.c
+ * @brief Regex profiler that dumps all DFAs into a database instead of
+ * using the DHT (with mesh).
+ * @author Maximilian Szengel
+ *
+ */
+
+#include "platform.h"
+#include "gnunet_util_lib.h"
+#include "gnunet_regex_lib.h"
+#include "gnunet_mysql_lib.h"
+#include <mysql/mysql.h>
+
+/**
+ * MySQL statement to insert an edge.
+ */
+#define INSERT_EDGE_STMT "INSERT IGNORE INTO `%s` "\
+ "(`key`, `label`, `to_key`, `accepting`) "\
+ "VALUES (?, ?, ?, ?);"
+
+/**
+ * MySQL statement to select a key count.
+ */
+#define SELECT_KEY_STMT "SELECT COUNT(*) FROM `%s` "\
+ "WHERE `key` = ? AND `label` = ?;"
+
+/**
+ * Simple struct to keep track of progress, and print a
+ * nice little percentage meter for long running tasks.
+ */
+struct ProgressMeter
+{
+ /**
+ * Total number of elements.
+ */
+ unsigned int total;
+
+ /**
+ * Intervall for printing percentage.
+ */
+ unsigned int modnum;
+
+ /**
+ * Number of dots to print.
+ */
+ unsigned int dotnum;
+
+ /**
+ * Completed number.
+ */
+ unsigned int completed;
+
+ /**
+ * Should the meter be printed?
+ */
+ int print;
+
+ /**
+ * String to print on startup.
+ */
+ char *startup_string;
+};
+
+
+/**
+ * Handle for the progress meter
+ */
+static struct ProgressMeter *meter;
+
+/**
+ * Abort task identifier.
+ */
+static GNUNET_SCHEDULER_TaskIdentifier abort_task;
+
+/**
+ * Shutdown task identifier.
+ */
+static GNUNET_SCHEDULER_TaskIdentifier shutdown_task;
+
+/**
+ * Scan task identifier;
+ */
+static GNUNET_SCHEDULER_TaskIdentifier scan_task;
+
+/**
+ * Global testing status.
+ */
+static int result;
+
+/**
+ * MySQL context.
+ */
+static struct GNUNET_MYSQL_Context *mysql_ctx;
+
+/**
+ * MySQL prepared statement handle.
+ */
+static struct GNUNET_MYSQL_StatementHandle *stmt_handle;
+
+/**
+ * MySQL prepared statement handle for `key` select.
+ */
+static struct GNUNET_MYSQL_StatementHandle *select_stmt_handle;
+
+/**
+ * MySQL table name.
+ */
+static char *table_name;
+
+/**
+ * Policy dir containing files that contain policies.
+ */
+static char *policy_dir;
+
+/**
+ * Number of policy files.
+ */
+static unsigned int num_policy_files;
+
+/**
+ * Number of policies.
+ */
+static unsigned int num_policies;
+
+/**
+ * Maximal path compression length.
+ */
+static unsigned int max_path_compression;
+
+/**
+ * Number of merged transitions.
+ */
+static unsigned long long num_merged_transitions;
+
+/**
+ * Number of merged states from different policies.
+ */
+static unsigned long long num_merged_states;
+
+/**
+ * Prefix to add before every regex we're announcing.
+ */
+static char *regex_prefix;
+
+
+/**
+ * Create a meter to keep track of the progress of some task.
+ *
+ * @param total the total number of items to complete
+ * @param start_string a string to prefix the meter with (if printing)
+ * @param print GNUNET_YES to print the meter, GNUNET_NO to count
+ * internally only
+ *
+ * @return the progress meter
+ */
+static struct ProgressMeter *
+create_meter (unsigned int total, char *start_string, int print)
+{
+ struct ProgressMeter *ret;
+
+ ret = GNUNET_malloc (sizeof (struct ProgressMeter));
+ ret->print = print;
+ ret->total = total;
+ ret->modnum = total / 4;
+ if (ret->modnum == 0) /* Divide by zero check */
+ ret->modnum = 1;
+ ret->dotnum = (total / 50) + 1;
+ if (start_string != NULL)
+ ret->startup_string = GNUNET_strdup (start_string);
+ else
+ ret->startup_string = GNUNET_strdup ("");
+
+ return ret;
+}
+
+
+/**
+ * Update progress meter (increment by one).
+ *
+ * @param meter the meter to update and print info for
+ *
+ * @return GNUNET_YES if called the total requested,
+ * GNUNET_NO if more items expected
+ */
+static int
+update_meter (struct ProgressMeter *meter)
+{
+ if (meter->print == GNUNET_YES)
+ {
+ if (meter->completed % meter->modnum == 0)
+ {
+ if (meter->completed == 0)
+ {
+ FPRINTF (stdout, "%sProgress: [0%%", meter->startup_string);
+ }
+ else
+ FPRINTF (stdout, "%d%%",
+ (int) (((float) meter->completed / meter->total) * 100));
+ }
+ else if (meter->completed % meter->dotnum == 0)
+ FPRINTF (stdout, "%s", ".");
+
+ if (meter->completed + 1 == meter->total)
+ FPRINTF (stdout, "%d%%]\n", 100);
+ fflush (stdout);
+ }
+ meter->completed++;
+
+ if (meter->completed == meter->total)
+ return GNUNET_YES;
+ if (meter->completed > meter->total)
+ GNUNET_log (GNUNET_ERROR_TYPE_WARNING, "Progress meter overflow!!\n");
+ return GNUNET_NO;
+}
+
+
+/**
+ * Reset progress meter.
+ *
+ * @param meter the meter to reset
+ *
+ * @return GNUNET_YES if meter reset,
+ * GNUNET_SYSERR on error
+ */
+static int
+reset_meter (struct ProgressMeter *meter)
+{
+ if (meter == NULL)
+ return GNUNET_SYSERR;
+
+ meter->completed = 0;
+ return GNUNET_YES;
+}
+
+
+/**
+ * Release resources for meter
+ *
+ * @param meter the meter to free
+ */
+static void
+free_meter (struct ProgressMeter *meter)
+{
+ GNUNET_free_non_null (meter->startup_string);
+ GNUNET_free (meter);
+}
+
+
+/**
+ * Shutdown task.
+ *
+ * @param cls NULL
+ * @param tc the task context
+ */
+static void
+do_shutdown (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
+{
+ shutdown_task = GNUNET_SCHEDULER_NO_TASK;
+ if (GNUNET_SCHEDULER_NO_TASK != abort_task)
+ GNUNET_SCHEDULER_cancel (abort_task);
+ if (NULL != mysql_ctx)
+ GNUNET_MYSQL_context_destroy (mysql_ctx);
+ if (NULL != meter)
+ free_meter (meter);
+
+ GNUNET_SCHEDULER_shutdown (); /* Stop scheduler to shutdown testbed run */
+}
+
+
+/**
+ * abort task to run on test timed out
+ *
+ * @param cls NULL
+ * @param tc the task context
+ */
+static void
+do_abort (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
+{
+ GNUNET_log (GNUNET_ERROR_TYPE_WARNING, "Aborting\n");
+ abort_task = GNUNET_SCHEDULER_NO_TASK;
+ GNUNET_SCHEDULER_cancel (scan_task);
+ scan_task = GNUNET_SCHEDULER_NO_TASK;
+ result = GNUNET_SYSERR;
+ GNUNET_SCHEDULER_add_now (&do_shutdown, NULL);
+}
+
+
+/**
+ * Dummy function for prepared select. Always return GNUNET_OK.
+ *
+ * @param cls closure
+ * @param num_values number of values.
+ * @param values returned values from select stmt.
+ *
+ * @return GNUNET_OK
+ */
+static int
+return_ok (void *cls, unsigned int num_values, MYSQL_BIND * values)
+{
+ return GNUNET_OK;
+}
+
+
+/**
+ * Iterator over all states that inserts each state into the MySQL db.
+ *
+ * @param cls closure.
+ * @param key hash for current state.
+ * @param proof proof for current state.
+ * @param accepting GNUNET_YES if this is an accepting state, GNUNET_NO if not.
+ * @param num_edges number of edges leaving current state.
+ * @param edges edges leaving current state.
+ */
+static void
+regex_iterator (void *cls, const struct GNUNET_HashCode *key, const char *proof,
+ int accepting, unsigned int num_edges,
+ const struct GNUNET_REGEX_Edge *edges)
+{
+ unsigned int i;
+ int result;
+ unsigned long k_length;
+ unsigned long e_length;
+ unsigned long d_length;
+ MYSQL_BIND rbind[1];
+ unsigned long long total;
+
+ GNUNET_assert (NULL != mysql_ctx);
+
+ for (i = 0; i < num_edges; i++)
+ {
+ k_length = sizeof (struct GNUNET_HashCode);
+ e_length = strlen (edges[i].label);
+ d_length = sizeof (struct GNUNET_HashCode);
+ memset (rbind, 0, sizeof (rbind));
+ total = -1;
+ rbind[0].buffer_type = MYSQL_TYPE_LONGLONG;
+ rbind[0].buffer = &total;
+ rbind[0].is_unsigned = GNUNET_YES;
+
+ result =
+ GNUNET_MYSQL_statement_run_prepared_select (mysql_ctx,
+ select_stmt_handle, 1,
+ rbind, &return_ok, NULL,
+ MYSQL_TYPE_BLOB, key,
+ sizeof (struct
+ GNUNET_HashCode),
+ &k_length,
+ MYSQL_TYPE_STRING,
+ edges[i].label,
+ strlen (edges[i].label),
+ &e_length, -1);
+
+ if (GNUNET_SYSERR == result)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "Error executing prepared mysql select statement\n");
+ GNUNET_SCHEDULER_add_now (&do_abort, NULL);
+ return;
+ }
+
+ if (-1 != total && total > 0)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_INFO, "Total: %llu (%s, %s)\n", total,
+ GNUNET_h2s (key), edges[i].label);
+ }
+
+ result =
+ GNUNET_MYSQL_statement_run_prepared (mysql_ctx, stmt_handle, NULL,
+ MYSQL_TYPE_BLOB, key,
+ sizeof (struct GNUNET_HashCode),
+ &k_length, MYSQL_TYPE_STRING,
+ edges[i].label,
+ strlen (edges[i].label), &e_length,
+ MYSQL_TYPE_BLOB,
+ &edges[i].destination,
+ sizeof (struct GNUNET_HashCode),
+ &d_length, MYSQL_TYPE_LONG,
+ &accepting, GNUNET_YES, -1);
+
+ if (0 == result)
+ {
+ char *key_str = GNUNET_strdup (GNUNET_h2s (key));
+ char *to_key_str = GNUNET_strdup (GNUNET_h2s (&edges[i].destination));
+
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Merged (%s, %s, %s, %i)\n", key_str,
+ edges[i].label, to_key_str, accepting);
+ GNUNET_free (key_str);
+ GNUNET_free (to_key_str);
+ num_merged_transitions++;
+ }
+ else if (-1 != total)
+ {
+ num_merged_states++;
+ }
+
+ if (GNUNET_SYSERR == result || (1 != result && 0 != result))
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "Error executing prepared mysql statement for edge: Affected rows: %i, expected 0 or 1!\n",
+ result);
+ GNUNET_SCHEDULER_add_now (&do_abort, NULL);
+ }
+ }
+
+ if (0 == num_edges)
+ {
+ k_length = sizeof (struct GNUNET_HashCode);
+ e_length = 0;
+ d_length = 0;
+
+ result =
+ GNUNET_MYSQL_statement_run_prepared (mysql_ctx, stmt_handle, NULL,
+ MYSQL_TYPE_BLOB, key,
+ sizeof (struct GNUNET_HashCode),
+ &k_length, MYSQL_TYPE_STRING, NULL,
+ 0, &e_length, MYSQL_TYPE_BLOB,
+ NULL, 0, &d_length,
+ MYSQL_TYPE_LONG, &accepting,
+ GNUNET_YES, -1);
+
+ if (1 != result && 0 != result)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "Error executing prepared mysql statement for edge: Affected rows: %i, expected 0 or 1!\n",
+ result);
+ GNUNET_SCHEDULER_add_now (&do_abort, NULL);
+ }
+ }
+}
+
+
+/**
+ * Announce a regex by creating the DFA and iterating over each state, inserting
+ * each state into a MySQL database.
+ *
+ * @param regex regular expression.
+ * @return GNUNET_OK on success, GNUNET_SYSERR on failure.
+ */
+static int
+announce_regex (const char *regex)
+{
+ struct GNUNET_REGEX_Automaton *dfa;
+
+ dfa =
+ GNUNET_REGEX_construct_dfa (regex, strlen (regex), max_path_compression);
+
+ if (NULL == dfa)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Failed to create DFA for regex %s\n",
+ regex);
+ abort_task = GNUNET_SCHEDULER_add_now (&do_abort, NULL);
+ return GNUNET_SYSERR;
+ }
+
+ GNUNET_REGEX_iterate_all_edges (dfa, &regex_iterator, NULL);
+
+ GNUNET_REGEX_automaton_destroy (dfa);
+
+ return GNUNET_OK;
+}
+
+
+/**
+ * Function called with a filename.
+ *
+ * @param cls closure
+ * @param filename complete filename (absolute path)
+ * @return GNUNET_OK to continue to iterate,
+ * GNUNET_SYSERR to abort iteration with error!
+ */
+int
+policy_filename_cb (void *cls, const char *filename)
+{
+ char *regex;
+ char *data;
+ char *buf;
+ uint64_t filesize;
+ unsigned int offset;
+
+ GNUNET_assert (NULL != filename);
+
+ GNUNET_log (GNUNET_ERROR_TYPE_INFO, "Announcing regexes from file %s\n",
+ filename);
+
+ if (GNUNET_YES != GNUNET_DISK_file_test (filename))
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_WARNING, "Could not find policy file %s\n",
+ filename);
+ return GNUNET_OK;
+ }
+ if (GNUNET_OK !=
+ GNUNET_DISK_file_size (filename, &filesize, GNUNET_YES, GNUNET_YES))
+ filesize = 0;
+ if (0 == filesize)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_WARNING, "Policy file %s is empty.\n",
+ filename);
+ return GNUNET_OK;
+ }
+ data = GNUNET_malloc (filesize);
+ if (filesize != GNUNET_DISK_fn_read (filename, data, filesize))
+ {
+ GNUNET_free (data);
+ GNUNET_log (GNUNET_ERROR_TYPE_WARNING, "Could not read policy file %s.\n",
+ filename);
+ return GNUNET_OK;
+ }
+
+ update_meter (meter);
+
+ buf = data;
+ offset = 0;
+ regex = NULL;
+ while (offset < (filesize - 1))
+ {
+ offset++;
+ if (((data[offset] == '\n')) && (buf != &data[offset]))
+ {
+ data[offset] = '|';
+ num_policies++;
+ buf = &data[offset + 1];
+ }
+ else if ((data[offset] == '\n') || (data[offset] == '\0'))
+ buf = &data[offset + 1];
+ }
+ data[offset] = '\0';
+ GNUNET_asprintf (&regex, "%s(%s)", regex_prefix, data);
+ GNUNET_assert (NULL != regex);
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Announcing regex: %s\n", regex);
+
+ if (GNUNET_OK != announce_regex (regex))
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not announce regex %s\n",
+ regex);
+ }
+ GNUNET_free (regex);
+ GNUNET_free (data);
+ return GNUNET_OK;
+}
+
+
+/**
+ * Iterate over files contained in policy_dir.
+ *
+ * @param cls NULL
+ * @param tc the task context
+ */
+static void
+do_directory_scan (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
+{
+ struct GNUNET_TIME_Absolute start_time;
+ struct GNUNET_TIME_Relative duration;
+ char *stmt;
+
+ /* Create an MySQL prepared statement for the inserts */
+ GNUNET_asprintf (&stmt, INSERT_EDGE_STMT, table_name);
+ stmt_handle = GNUNET_MYSQL_statement_prepare (mysql_ctx, stmt);
+ GNUNET_free (stmt);
+
+ GNUNET_asprintf (&stmt, SELECT_KEY_STMT, table_name);
+ select_stmt_handle = GNUNET_MYSQL_statement_prepare (mysql_ctx, stmt);
+ GNUNET_free (stmt);
+
+ GNUNET_assert (NULL != stmt_handle);
+
+ meter =
+ create_meter (num_policy_files, "Announcing policy files\n", GNUNET_YES);
+ start_time = GNUNET_TIME_absolute_get ();
+ GNUNET_DISK_directory_scan (policy_dir, &policy_filename_cb, stmt_handle);
+ duration = GNUNET_TIME_absolute_get_duration (start_time);
+ reset_meter (meter);
+ free_meter (meter);
+ meter = NULL;
+
+ printf ("Announced %u files containing %u policies in %s\n"
+ "Duplicate transitions: %llu\nMerged states: %llu\n",
+ num_policy_files, num_policies,
+ GNUNET_STRINGS_relative_time_to_string (duration, GNUNET_NO),
+ num_merged_transitions, num_merged_states);
+
+ result = GNUNET_OK;
+ shutdown_task = GNUNET_SCHEDULER_add_now (&do_shutdown, NULL);
+}
+
+
+/**
+ * Main function that will be run by the scheduler.
+ *
+ * @param cls closure
+ * @param args remaining command-line arguments
+ * @param cfgfile name of the configuration file used (for saving, can be NULL!)
+ * @param config configuration
+ */
+static void
+run (void *cls, char *const *args, const char *cfgfile,
+ const struct GNUNET_CONFIGURATION_Handle *config)
+{
+ if (NULL == args[0])
+ {
+ fprintf (stderr,
+ _("No policy directory specified on command line. Exiting.\n"));
+ result = GNUNET_SYSERR;
+ return;
+ }
+ if (GNUNET_YES != GNUNET_DISK_directory_test (args[0], GNUNET_YES))
+ {
+ fprintf (stderr,
+ _("Specified policies directory does not exist. Exiting.\n"));
+ result = GNUNET_SYSERR;
+ return;
+ }
+ policy_dir = args[0];
+
+ num_policy_files = GNUNET_DISK_directory_scan (policy_dir, NULL, NULL);
+ meter = NULL;
+
+ if (NULL == table_name)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
+ "No table name specified, using default \"NFA\".\n");
+ table_name = "NFA";
+ }
+
+ mysql_ctx = GNUNET_MYSQL_context_create (config, "regex-mysql");
+ if (NULL == mysql_ctx)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Failed to create mysql context\n");
+ result = GNUNET_SYSERR;
+ return;
+ }
+
+ if (GNUNET_OK !=
+ GNUNET_CONFIGURATION_get_value_string (config, "regex-mysql",
+ "REGEX_PREFIX", &regex_prefix))
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ _
+ ("%s service is lacking key configuration settings (%s). Exiting.\n"),
+ "regexprofiler", "regex_prefix");
+ result = GNUNET_SYSERR;
+ return;
+ }
+
+
+ result = GNUNET_OK;
+
+ scan_task = GNUNET_SCHEDULER_add_now (&do_directory_scan, NULL);
+
+ /* Scheduled the task to clean up when shutdown is called */
+ shutdown_task =
+ GNUNET_SCHEDULER_add_delayed (GNUNET_TIME_UNIT_FOREVER_REL, &do_shutdown,
+ NULL);
+}
+
+
+/**
+ * Main function.
+ *
+ * @param argc argument count
+ * @param argv argument values
+ * @return 0 on success
+ */
+int
+main (int argc, char *const *argv)
+{
+ static const struct GNUNET_GETOPT_CommandLineOption options[] = {
+ {'t', "table", "TABLENAME",
+ gettext_noop ("name of the table to write DFAs"),
+ 1, &GNUNET_GETOPT_set_string, &table_name},
+ {'p', "max-path-compression", "MAX_PATH_COMPRESSION",
+ gettext_noop ("maximum path compression length"),
+ 1, &GNUNET_GETOPT_set_uint, &max_path_compression},
+ GNUNET_GETOPT_OPTION_END
+ };
+ int ret;
+
+ if (GNUNET_OK != GNUNET_STRINGS_get_utf8_args (argc, argv, &argc, &argv))
+ return 2;
+
+ result = GNUNET_SYSERR;
+ ret =
+ GNUNET_PROGRAM_run (argc, argv,
+ "gnunet-regex-simulationprofiler [OPTIONS] policy-dir",
+ _("Profiler for regex library"), options, &run, NULL);
+ if (GNUNET_OK != ret)
+ return ret;
+ if (GNUNET_OK != result)
+ return 1;
+ return 0;
+}
diff --git a/src/regex/perf-regex.c b/src/regex/perf-regex.c
new file mode 100644
index 0000000..45642db
--- /dev/null
+++ b/src/regex/perf-regex.c
@@ -0,0 +1,87 @@
+/*
+ This file is part of GNUnet.
+ (C) 2012 Christian Grothoff (and other contributing authors)
+
+ GNUnet is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3, or (at your
+ option) any later version.
+
+ GNUnet is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GNUnet; see the file COPYING. If not, write to the
+ Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA.
+*/
+
+/**
+ * @file src/regex/prof-regex.c
+ * @brief Test how long it takes to create a automaton from a string regex.
+ * @author Bartlomiej Polot
+ */
+#include <regex.h>
+#include <time.h>
+#include "platform.h"
+#include "gnunet_regex_lib.h"
+#include "regex_test_lib.h"
+
+static const char *exe;
+
+static void
+usage(void)
+{
+ fprintf (stderr, "Usage: %s REGEX_FILE COMPRESSION\n", exe);
+}
+
+/**
+ * The main function to obtain peer information.
+ *
+ * @param argc number of arguments from the command line
+ * @param argv command line arguments
+ * @return 0 ok, 1 on error
+ */
+int
+main (int argc, char *const *argv)
+{
+ struct GNUNET_REGEX_Automaton* dfa;
+ char **regexes;
+ char *buffer;
+ char *regex;
+ int compression;
+ long size;
+
+ GNUNET_log_setup ("perf-regex", "DEBUG", NULL);
+ exe = argv[0];
+ if (3 != argc)
+ {
+ usage();
+ return 1;
+ }
+ regexes = GNUNET_REGEX_read_from_file (argv[1]);
+
+ if (NULL == regexes)
+ {
+ usage();
+ return 2;
+ }
+ buffer = GNUNET_REGEX_combine (regexes);
+
+ GNUNET_asprintf (&regex, "GNVPN-0001-PAD(%s)(0|1)*", buffer);
+ size = strlen (regex);
+
+ // fprintf (stderr, "Combined regex (%ld bytes):\n%s\n", size, regex);
+ // return 0;
+
+ compression = atoi (argv[2]);
+ dfa = GNUNET_REGEX_construct_dfa (regex, size, compression);
+ GNUNET_REGEX_automaton_destroy (dfa);
+ GNUNET_free (buffer);
+ GNUNET_REGEX_free_from_file (regexes);
+ return 0;
+}
+
+/* end of prof-regex.c */
diff --git a/src/regex/plugin_block_regex.c b/src/regex/plugin_block_regex.c
new file mode 100644
index 0000000..d3c9735
--- /dev/null
+++ b/src/regex/plugin_block_regex.c
@@ -0,0 +1,256 @@
+/*
+ This file is part of GNUnet
+ (C) 2013 Christian Grothoff (and other contributing authors)
+
+ GNUnet is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3, or (at your
+ option) any later version.
+
+ GNUnet is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GNUnet; see the file COPYING. If not, write to the
+ Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA.
+*/
+
+/**
+ * @file regex/plugin_block_regex.c
+ * @brief blocks used for regex storage and search
+ * @author Bartlomiej Polot
+ */
+
+#include "platform.h"
+#include "gnunet_block_plugin.h"
+#include "block_regex.h"
+#include "regex_block_lib.h"
+
+/**
+ * Number of bits we set per entry in the bloomfilter.
+ * Do not change!
+ */
+#define BLOOMFILTER_K 16
+
+
+/**
+ * Show debug info about outgoing edges from a block.
+ *
+ * @param cls Closure (uunsed).
+ * @param token Edge label.
+ * @param len Length of @c token.
+ * @param key Block the edge point to.
+ *
+ * @return GNUNET_YES to keep iterating.
+ */
+static int
+rdebug (void *cls,
+ const char *token,
+ size_t len,
+ const struct GNUNET_HashCode *key)
+{
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " %s: %.*s\n",
+ GNUNET_h2s (key), len, token);
+ return GNUNET_YES;
+}
+
+
+/**
+ * Function called to validate a reply or a request. For
+ * request evaluation, simply pass "NULL" for the reply_block.
+ * Note that it is assumed that the reply has already been
+ * matched to the key (and signatures checked) as it would
+ * be done with the "get_key" function.
+ *
+ * @param cls closure
+ * @param type block type
+ * @param query original query (hash)
+ * @param bf pointer to bloom filter associated with query; possibly updated (!)
+ * @param bf_mutator mutation value for bf
+ * @param xquery extrended query data (can be NULL, depending on type)
+ * @param xquery_size number of bytes in xquery
+ * @param reply_block response to validate
+ * @param reply_block_size number of bytes in reply block
+ * @return characterization of result
+ */
+static enum GNUNET_BLOCK_EvaluationResult
+block_plugin_regex_evaluate (void *cls, enum GNUNET_BLOCK_Type type,
+ const struct GNUNET_HashCode * query,
+ struct GNUNET_CONTAINER_BloomFilter **bf,
+ int32_t bf_mutator, const void *xquery,
+ size_t xquery_size, const void *reply_block,
+ size_t reply_block_size)
+{
+ struct GNUNET_HashCode chash;
+ struct GNUNET_HashCode mhash;
+
+ switch (type)
+ {
+ case GNUNET_BLOCK_TYPE_REGEX:
+ if (NULL == reply_block)
+ return GNUNET_BLOCK_EVALUATION_REQUEST_VALID;
+ if (0 != xquery_size)
+ {
+ const char *query;
+
+ query = (const char *) xquery;
+ if ('\0' != query[xquery_size - 1]) /* must be valid string */
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "Block xquery not a valid string\n");
+ return GNUNET_BLOCK_EVALUATION_RESULT_INVALID;
+ }
+ }
+ else
+ {
+ const struct RegexBlock *rblock = reply_block;
+
+ GNUNET_break_op (0);
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Block with no xquery\n");
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " key: %s, %u edges\n",
+ GNUNET_h2s (&rblock->key), ntohl (rblock->n_edges));
+ GNUNET_REGEX_block_iterate (rblock, reply_block_size, &rdebug, NULL);
+ return GNUNET_BLOCK_EVALUATION_RESULT_INVALID;
+ }
+ switch (GNUNET_REGEX_block_check (reply_block,
+ reply_block_size,
+ xquery))
+ {
+ case GNUNET_SYSERR:
+ GNUNET_break_op(0);
+ return GNUNET_BLOCK_EVALUATION_RESULT_INVALID;
+ case GNUNET_NO:
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
+ "BLOCK XQUERY %s not accepted\n", xquery);
+ return GNUNET_BLOCK_EVALUATION_RESULT_IRRELEVANT;
+ default:
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
+ "BLOCK XQUERY %s accepted\n", xquery);
+ break;
+ }
+ if (NULL != bf)
+ {
+ GNUNET_CRYPTO_hash (reply_block, reply_block_size, &chash);
+ GNUNET_BLOCK_mingle_hash (&chash, bf_mutator, &mhash);
+ if (NULL != *bf)
+ {
+ if (GNUNET_YES == GNUNET_CONTAINER_bloomfilter_test (*bf, &mhash))
+ return GNUNET_BLOCK_EVALUATION_OK_DUPLICATE;
+ }
+ else
+ {
+ *bf = GNUNET_CONTAINER_bloomfilter_init (NULL, 8, BLOOMFILTER_K);
+ }
+ GNUNET_CONTAINER_bloomfilter_add (*bf, &mhash);
+ }
+ return GNUNET_BLOCK_EVALUATION_OK_MORE;
+
+
+ case GNUNET_BLOCK_TYPE_REGEX_ACCEPT:
+ if (0 != xquery_size)
+ {
+ GNUNET_break_op (0);
+ return GNUNET_BLOCK_EVALUATION_REQUEST_INVALID;
+ }
+ if (NULL == reply_block)
+ return GNUNET_BLOCK_EVALUATION_REQUEST_VALID;
+ if (sizeof (struct RegexAccept) != reply_block_size)
+ {
+ GNUNET_break_op(0);
+ return GNUNET_BLOCK_EVALUATION_RESULT_INVALID;
+ }
+ if (NULL != bf)
+ {
+ GNUNET_CRYPTO_hash (reply_block, reply_block_size, &chash);
+ GNUNET_BLOCK_mingle_hash (&chash, bf_mutator, &mhash);
+ if (NULL != *bf)
+ {
+ if (GNUNET_YES == GNUNET_CONTAINER_bloomfilter_test (*bf, &mhash))
+ return GNUNET_BLOCK_EVALUATION_OK_DUPLICATE;
+ }
+ else
+ {
+ *bf = GNUNET_CONTAINER_bloomfilter_init (NULL, 8, BLOOMFILTER_K);
+ }
+ GNUNET_CONTAINER_bloomfilter_add (*bf, &mhash);
+ }
+ return GNUNET_BLOCK_EVALUATION_OK_MORE;
+
+
+ default:
+ return GNUNET_BLOCK_EVALUATION_TYPE_NOT_SUPPORTED;
+ }
+}
+
+
+/**
+ * Function called to obtain the key for a block.
+ *
+ * @param cls closure
+ * @param type block type
+ * @param block block to get the key for
+ * @param block_size number of bytes in block
+ * @param key set to the key (query) for the given block
+ * @return GNUNET_OK on success, GNUNET_SYSERR if type not supported
+ * (or if extracting a key from a block of this type does not work)
+ */
+static int
+block_plugin_regex_get_key (void *cls, enum GNUNET_BLOCK_Type type,
+ const void *block, size_t block_size,
+ struct GNUNET_HashCode * key)
+{
+ switch (type)
+ {
+ case GNUNET_BLOCK_TYPE_REGEX:
+ GNUNET_assert (sizeof (struct RegexBlock) <= block_size);
+ *key = ((struct RegexBlock *) block)->key;
+ return GNUNET_OK;
+ case GNUNET_BLOCK_TYPE_REGEX_ACCEPT:
+ GNUNET_assert (sizeof (struct RegexAccept) <= block_size);
+ *key = ((struct RegexAccept *) block)->key;
+ return GNUNET_OK;
+ default:
+ GNUNET_break (0);
+ return GNUNET_SYSERR;
+ }
+}
+
+
+/**
+ * Entry point for the plugin.
+ */
+void *
+libgnunet_plugin_block_regex_init (void *cls)
+{
+ static enum GNUNET_BLOCK_Type types[] =
+ {
+ GNUNET_BLOCK_TYPE_REGEX,
+ GNUNET_BLOCK_TYPE_REGEX_ACCEPT,
+ GNUNET_BLOCK_TYPE_ANY /* end of list */
+ };
+ struct GNUNET_BLOCK_PluginFunctions *api;
+
+ api = GNUNET_malloc (sizeof (struct GNUNET_BLOCK_PluginFunctions));
+ api->evaluate = &block_plugin_regex_evaluate;
+ api->get_key = &block_plugin_regex_get_key;
+ api->types = types;
+ return api;
+}
+
+
+/**
+ * Exit point from the plugin.
+ */
+void *
+libgnunet_plugin_block_regex_done (void *cls)
+{
+ struct GNUNET_TRANSPORT_PluginFunctions *api = cls;
+
+ GNUNET_free (api);
+ return NULL;
+}
+
+/* end of plugin_block_regex.c */
diff --git a/src/regex/regex.c b/src/regex/regex.c
index 5244c26..ad8e56b 100644
--- a/src/regex/regex.c
+++ b/src/regex/regex.c
@@ -19,417 +19,80 @@
*/
/**
* @file src/regex/regex.c
- * @brief library to create automatons from regular expressions
+ * @brief library to create Deterministic Finite Automatons (DFAs) from regular
+ * expressions (regexes).
* @author Maximilian Szengel
*/
#include "platform.h"
#include "gnunet_container_lib.h"
#include "gnunet_crypto_lib.h"
#include "gnunet_regex_lib.h"
-#include "regex.h"
-
-#define initial_bits 10
-
-/**
- * Context that contains an id counter for states and transitions as well as a
- * DLL of automatons used as a stack for NFA construction.
- */
-struct GNUNET_REGEX_Context
-{
- /**
- * Unique state id.
- */
- unsigned int state_id;
-
- /**
- * Unique transition id.
- */
- unsigned int transition_id;
-
- /**
- * Unique SCC (Strongly Connected Component) id.
- */
- unsigned int scc_id;
-
- /**
- * DLL of GNUNET_REGEX_Automaton's used as a stack.
- */
- struct GNUNET_REGEX_Automaton *stack_head;
-
- /**
- * DLL of GNUNET_REGEX_Automaton's used as a stack.
- */
- struct GNUNET_REGEX_Automaton *stack_tail;
-};
+#include "regex_internal.h"
/**
- * Type of an automaton.
+ * Set this to GNUNET_YES to enable state naming. Used to debug NFA->DFA
+ * creation. Disabled by default for better performance.
*/
-enum GNUNET_REGEX_automaton_type
-{
- NFA,
- DFA
-};
+#define REGEX_DEBUG_DFA GNUNET_NO
/**
- * Automaton representation.
+ * Set of states using MDLL API.
*/
-struct GNUNET_REGEX_Automaton
+struct GNUNET_REGEX_StateSet_MDLL
{
/**
- * This is a linked list.
- */
- struct GNUNET_REGEX_Automaton *prev;
-
- /**
- * This is a linked list.
- */
- struct GNUNET_REGEX_Automaton *next;
-
- /**
- * First state of the automaton. This is mainly used for constructing an NFA,
- * where each NFA itself consists of one or more NFAs linked together.
- */
- struct GNUNET_REGEX_State *start;
-
- /**
- * End state of the automaton.
- */
- struct GNUNET_REGEX_State *end;
-
- /**
- * Number of states in the automaton.
+ * MDLL of states.
*/
- unsigned int state_count;
+ struct GNUNET_REGEX_State *head;
/**
- * DLL of states.
+ * MDLL of states.
*/
- struct GNUNET_REGEX_State *states_head;
+ struct GNUNET_REGEX_State *tail;
/**
- * DLL of states
- */
- struct GNUNET_REGEX_State *states_tail;
-
- /**
- * Type of the automaton.
- */
- enum GNUNET_REGEX_automaton_type type;
-};
-
-/**
- * A state. Can be used in DFA and NFA automatons.
- */
-struct GNUNET_REGEX_State
-{
- /**
- * This is a linked list.
- */
- struct GNUNET_REGEX_State *prev;
-
- /**
- * This is a linked list.
- */
- struct GNUNET_REGEX_State *next;
-
- /**
- * Unique state id.
- */
- unsigned int id;
-
- /**
- * If this is an accepting state or not.
- */
- int accepting;
-
- /**
- * Marking of the state. This is used for marking all visited states when
- * traversing all states of an automaton and for cases where the state id
- * cannot be used (dfa minimization).
- */
- int marked;
-
- /**
- * Marking the state as contained. This is used for checking, if the state is
- * contained in a set in constant time
- */
- int contained;
-
- /**
- * Marking the state as part of an SCC (Strongly Connected Component). All
- * states with the same scc_id are part of the same SCC. scc_id is 0, if state
- * is not a part of any SCC.
- */
- unsigned int scc_id;
-
- /**
- * Used for SCC detection.
- */
- int index;
-
- /**
- * Used for SCC detection.
- */
- int lowlink;
-
- /**
- * Human readable name of the automaton. Used for debugging and graph
- * creation.
- */
- char *name;
-
- /**
- * Hash of the state.
- */
- GNUNET_HashCode hash;
-
- /**
- * Proof for this state.
- */
- char *proof;
-
- /**
- * Number of transitions from this state to other states.
- */
- unsigned int transition_count;
-
- /**
- * DLL of transitions.
- */
- struct Transition *transitions_head;
-
- /**
- * DLL of transitions.
- */
- struct Transition *transitions_tail;
-
- /**
- * Set of states on which this state is based on. Used when creating a DFA out
- * of several NFA states.
- */
- struct GNUNET_REGEX_StateSet *nfa_set;
-};
-
-/**
- * Transition between two states. Each state can have 0-n transitions. If label
- * is 0, this is considered to be an epsilon transition.
- */
-struct Transition
-{
- /**
- * This is a linked list.
- */
- struct Transition *prev;
-
- /**
- * This is a linked list.
- */
- struct Transition *next;
-
- /**
- * Unique id of this transition.
- */
- unsigned int id;
-
- /**
- * Label for this transition. This is basically the edge label for the graph.
- */
- char label;
-
- /**
- * State to which this transition leads.
- */
- struct GNUNET_REGEX_State *to_state;
-
- /**
- * State from which this transition origins.
- */
- struct GNUNET_REGEX_State *from_state;
-
- /**
- * Mark this transition. For example when reversing the automaton.
- */
- int mark;
-};
-
-/**
- * Set of states.
- */
-struct GNUNET_REGEX_StateSet
-{
- /**
- * Array of states.
- */
- struct GNUNET_REGEX_State **states;
-
- /**
- * Length of the 'states' array.
+ * Length of the MDLL.
*/
unsigned int len;
};
-/*
- * Debug helper functions
- */
-void
-debug_print_transitions (struct GNUNET_REGEX_State *);
-
-void
-debug_print_state (struct GNUNET_REGEX_State *s)
-{
- char *proof;
-
- if (NULL == s->proof)
- proof = "NULL";
- else
- proof = s->proof;
-
- GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
- "State %i: %s marked: %i accepting: %i scc_id: %i transitions: %i proof: %s\n",
- s->id, s->name, s->marked, s->accepting, s->scc_id,
- s->transition_count, proof);
-
- GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Transitions:\n");
- debug_print_transitions (s);
-}
-
-void
-debug_print_states (struct GNUNET_REGEX_Automaton *a)
-{
- struct GNUNET_REGEX_State *s;
-
- for (s = a->states_head; NULL != s; s = s->next)
- debug_print_state (s);
-}
-
-void
-debug_print_transition (struct Transition *t)
-{
- char *to_state;
- char *from_state;
- char label;
-
- if (NULL == t)
- return;
-
- if (0 == t->label)
- label = '0';
- else
- label = t->label;
-
- if (NULL == t->to_state)
- to_state = "NULL";
- else
- to_state = t->to_state->name;
-
- if (NULL == t->from_state)
- from_state = "NULL";
- else
- from_state = t->from_state->name;
-
- GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Transition %i: From %s on %c to %s\n",
- t->id, from_state, label, to_state);
-}
-
-void
-debug_print_transitions (struct GNUNET_REGEX_State *s)
-{
- struct Transition *t;
-
- for (t = s->transitions_head; NULL != t; t = t->next)
- debug_print_transition (t);
-}
/**
- * Recursive function doing DFS with 'v' as a start, detecting all SCCs inside
- * the subgraph reachable from 'v'. Used with scc_tarjan function to detect all
- * SCCs inside an automaton.
+ * Append state to the given StateSet '
*
- * @param ctx context
- * @param v start vertex
- * @param index current index
- * @param stack stack for saving all SCCs
- * @param stack_size current size of the stack
+ * @param set set to be modified
+ * @param state state to be appended
*/
static void
-scc_tarjan_strongconnect (struct GNUNET_REGEX_Context *ctx,
- struct GNUNET_REGEX_State *v, int *index,
- struct GNUNET_REGEX_State **stack,
- unsigned int *stack_size)
+state_set_append (struct GNUNET_REGEX_StateSet *set,
+ struct GNUNET_REGEX_State *state)
{
- struct GNUNET_REGEX_State *w;
- struct Transition *t;
-
- v->index = *index;
- v->lowlink = *index;
- (*index)++;
- stack[(*stack_size)++] = v;
- v->contained = 1;
-
- for (t = v->transitions_head; NULL != t; t = t->next)
- {
- w = t->to_state;
- if (NULL != w && w->index < 0)
- {
- scc_tarjan_strongconnect (ctx, w, index, stack, stack_size);
- v->lowlink = (v->lowlink > w->lowlink) ? w->lowlink : v->lowlink;
- }
- else if (0 != w->contained)
- v->lowlink = (v->lowlink > w->index) ? w->index : v->lowlink;
- }
-
- if (v->lowlink == v->index)
- {
- w = stack[--(*stack_size)];
- w->contained = 0;
-
- if (v != w)
- {
- ctx->scc_id++;
- while (v != w)
- {
- w->scc_id = ctx->scc_id;
- w = stack[--(*stack_size)];
- w->contained = 0;
- }
- w->scc_id = ctx->scc_id;
- }
- }
+ if (set->off == set->size)
+ GNUNET_array_grow (set->states, set->size, set->size * 2 + 4);
+ set->states[set->off++] = state;
}
+
/**
- * Detect all SCCs (Strongly Connected Components) inside the given automaton.
- * SCCs will be marked using the scc_id on each state.
+ * Compare two strings for equality. If either is NULL they are not equal.
*
- * @param ctx context
- * @param a automaton
+ * @param str1 first string for comparison.
+ * @param str2 second string for comparison.
+ *
+ * @return 0 if the strings are the same or both NULL, 1 or -1 if not.
*/
-static void
-scc_tarjan (struct GNUNET_REGEX_Context *ctx, struct GNUNET_REGEX_Automaton *a)
+static int
+nullstrcmp (const char *str1, const char *str2)
{
- int index;
- struct GNUNET_REGEX_State *v;
- struct GNUNET_REGEX_State *stack[a->state_count];
- unsigned int stack_size;
-
- for (v = a->states_head; NULL != v; v = v->next)
- {
- v->contained = 0;
- v->index = -1;
- v->lowlink = -1;
- }
-
- stack_size = 0;
- index = 0;
+ if ((NULL == str1) != (NULL == str2))
+ return -1;
+ if ((NULL == str1) && (NULL == str2))
+ return 0;
- for (v = a->states_head; NULL != v; v = v->next)
- {
- if (v->index < 0)
- scc_tarjan_strongconnect (ctx, v, &index, stack, &stack_size);
- }
+ return strcmp (str1, str2);
}
+
/**
* Adds a transition from one state to another on 'label'. Does not add
* duplicate states.
@@ -441,11 +104,11 @@ scc_tarjan (struct GNUNET_REGEX_Context *ctx, struct GNUNET_REGEX_Automaton *a)
*/
static void
state_add_transition (struct GNUNET_REGEX_Context *ctx,
- struct GNUNET_REGEX_State *from_state, const char label,
+ struct GNUNET_REGEX_State *from_state, const char *label,
struct GNUNET_REGEX_State *to_state)
{
- int is_dup;
- struct Transition *t;
+ struct GNUNET_REGEX_Transition *t;
+ struct GNUNET_REGEX_Transition *oth;
if (NULL == from_state)
{
@@ -453,33 +116,64 @@ state_add_transition (struct GNUNET_REGEX_Context *ctx,
return;
}
- // Do not add duplicate state transitions
- is_dup = GNUNET_NO;
+ /* Do not add duplicate state transitions */
for (t = from_state->transitions_head; NULL != t; t = t->next)
{
- if (t->to_state == to_state && t->label == label &&
+ if (t->to_state == to_state && 0 == nullstrcmp (t->label, label) &&
t->from_state == from_state)
- {
- is_dup = GNUNET_YES;
- break;
- }
+ return;
}
- if (is_dup)
- return;
+ /* sort transitions by label */
+ for (oth = from_state->transitions_head; NULL != oth; oth = oth->next)
+ {
+ if (0 < nullstrcmp (oth->label, label))
+ break;
+ }
- t = GNUNET_malloc (sizeof (struct Transition));
- t->id = ctx->transition_id++;
- t->label = label;
+ t = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Transition));
+ if (NULL != ctx)
+ t->id = ctx->transition_id++;
+ if (NULL != label)
+ t->label = GNUNET_strdup (label);
+ else
+ t->label = NULL;
t->to_state = to_state;
t->from_state = from_state;
- // Add outgoing transition to 'from_state'
+ /* Add outgoing transition to 'from_state' */
from_state->transition_count++;
- GNUNET_CONTAINER_DLL_insert (from_state->transitions_head,
- from_state->transitions_tail, t);
+ GNUNET_CONTAINER_DLL_insert_before (from_state->transitions_head,
+ from_state->transitions_tail, oth, t);
}
+
+/**
+ * Remove a 'transition' from 'state'.
+ *
+ * @param state state from which the to-be-removed transition originates.
+ * @param transition transition that should be removed from state 'state'.
+ */
+static void
+state_remove_transition (struct GNUNET_REGEX_State *state,
+ struct GNUNET_REGEX_Transition *transition)
+{
+ if (NULL == state || NULL == transition)
+ return;
+
+ if (transition->from_state != state)
+ return;
+
+ GNUNET_free_non_null (transition->label);
+
+ state->transition_count--;
+ GNUNET_CONTAINER_DLL_remove (state->transitions_head, state->transitions_tail,
+ transition);
+
+ GNUNET_free (transition);
+}
+
+
/**
* Compare two states. Used for sorting.
*
@@ -493,27 +187,26 @@ state_add_transition (struct GNUNET_REGEX_Context *ctx,
static int
state_compare (const void *a, const void *b)
{
- struct GNUNET_REGEX_State **s1;
- struct GNUNET_REGEX_State **s2;
-
- s1 = (struct GNUNET_REGEX_State **) a;
- s2 = (struct GNUNET_REGEX_State **) b;
+ struct GNUNET_REGEX_State **s1 = (struct GNUNET_REGEX_State **) a;
+ struct GNUNET_REGEX_State **s2 = (struct GNUNET_REGEX_State **) b;
return (*s1)->id - (*s2)->id;
}
+
/**
* Get all edges leaving state 's'.
*
* @param s state.
- * @param edges all edges leaving 's'.
+ * @param edges all edges leaving 's', expected to be allocated and have enough
+ * space for s->transitions_count elements.
*
* @return number of edges.
*/
static unsigned int
state_get_edges (struct GNUNET_REGEX_State *s, struct GNUNET_REGEX_Edge *edges)
{
- struct Transition *t;
+ struct GNUNET_REGEX_Transition *t;
unsigned int count;
if (NULL == s)
@@ -525,7 +218,7 @@ state_get_edges (struct GNUNET_REGEX_State *s, struct GNUNET_REGEX_Edge *edges)
{
if (NULL != t->to_state)
{
- edges[count].label = &t->label;
+ edges[count].label = t->label;
edges[count].destination = t->to_state->hash;
count++;
}
@@ -533,39 +226,37 @@ state_get_edges (struct GNUNET_REGEX_State *s, struct GNUNET_REGEX_Edge *edges)
return count;
}
+
/**
* Compare to state sets by comparing the id's of the states that are contained
* in each set. Both sets are expected to be sorted by id!
*
* @param sset1 first state set
* @param sset2 second state set
- *
- * @return an integer less than, equal to, or greater than zero
- * if the first argument is considered to be respectively
- * less than, equal to, or greater than the second.
+ * @return 0 if the sets are equal, otherwise non-zero
*/
static int
state_set_compare (struct GNUNET_REGEX_StateSet *sset1,
struct GNUNET_REGEX_StateSet *sset2)
{
int result;
- int i;
+ unsigned int i;
if (NULL == sset1 || NULL == sset2)
return 1;
- result = sset1->len - sset2->len;
-
- for (i = 0; i < sset1->len; i++)
- {
- if (0 != result)
+ result = sset1->off - sset2->off;
+ if (result < 0)
+ return -1;
+ if (result > 0)
+ return 1;
+ for (i = 0; i < sset1->off; i++)
+ if (0 != (result = state_compare (&sset1->states[i], &sset2->states[i])))
break;
-
- result = state_compare (&sset1->states[i], &sset2->states[i]);
- }
return result;
}
+
/**
* Clears the given StateSet 'set'
*
@@ -574,14 +265,11 @@ state_set_compare (struct GNUNET_REGEX_StateSet *sset1,
static void
state_set_clear (struct GNUNET_REGEX_StateSet *set)
{
- if (NULL != set)
- {
- if (NULL != set->states)
- GNUNET_free (set->states);
- GNUNET_free (set);
- }
+ GNUNET_array_grow (set->states, set->size, 0);
+ set->off = 0;
}
+
/**
* Clears an automaton fragment. Does not destroy the states inside the
* automaton.
@@ -602,6 +290,7 @@ automaton_fragment_clear (struct GNUNET_REGEX_Automaton *a)
GNUNET_free (a);
}
+
/**
* Frees the memory used by State 's'
*
@@ -610,30 +299,25 @@ automaton_fragment_clear (struct GNUNET_REGEX_Automaton *a)
static void
automaton_destroy_state (struct GNUNET_REGEX_State *s)
{
- struct Transition *t;
- struct Transition *next_t;
+ struct GNUNET_REGEX_Transition *t;
+ struct GNUNET_REGEX_Transition *next_t;
if (NULL == s)
return;
- if (NULL != s->name)
- GNUNET_free (s->name);
-
- if (NULL != s->proof)
- GNUNET_free (s->proof);
-
+ GNUNET_free_non_null (s->name);
+ GNUNET_free_non_null (s->proof);
+ state_set_clear (&s->nfa_set);
for (t = s->transitions_head; NULL != t; t = next_t)
{
next_t = t->next;
- GNUNET_CONTAINER_DLL_remove (s->transitions_head, s->transitions_tail, t);
- GNUNET_free (t);
+ state_remove_transition (s, t);
}
- state_set_clear (s->nfa_set);
-
GNUNET_free (s);
}
+
/**
* Remove a state from the given automaton 'a'. Always use this function when
* altering the states of an automaton. Will also remove all transitions leading
@@ -646,39 +330,36 @@ static void
automaton_remove_state (struct GNUNET_REGEX_Automaton *a,
struct GNUNET_REGEX_State *s)
{
- struct GNUNET_REGEX_State *ss;
struct GNUNET_REGEX_State *s_check;
- struct Transition *t_check;
+ struct GNUNET_REGEX_Transition *t_check;
+ struct GNUNET_REGEX_Transition *t_check_next;
if (NULL == a || NULL == s)
return;
- // remove state
- ss = s;
- GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s);
- a->state_count--;
-
- // remove all transitions leading to this state
+ /* remove all transitions leading to this state */
for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
{
for (t_check = s_check->transitions_head; NULL != t_check;
- t_check = t_check->next)
+ t_check = t_check_next)
{
- if (t_check->to_state == ss)
- {
- GNUNET_CONTAINER_DLL_remove (s_check->transitions_head,
- s_check->transitions_tail, t_check);
- s_check->transition_count--;
- }
+ t_check_next = t_check->next;
+ if (t_check->to_state == s)
+ state_remove_transition (s_check, t_check);
}
}
- automaton_destroy_state (ss);
+ /* remove state */
+ GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s);
+ a->state_count--;
+
+ automaton_destroy_state (s);
}
+
/**
* Merge two states into one. Will merge 's1' and 's2' into 's1' and destroy
- * 's2'.
+ * 's2'. 's1' will contain all (non-duplicate) outgoing transitions of 's2'.
*
* @param ctx context
* @param a automaton
@@ -692,48 +373,61 @@ automaton_merge_states (struct GNUNET_REGEX_Context *ctx,
struct GNUNET_REGEX_State *s2)
{
struct GNUNET_REGEX_State *s_check;
- struct Transition *t_check;
- char *new_name;
-
- GNUNET_assert (NULL != ctx && NULL != a && NULL != s1 && NULL != s2);
+ struct GNUNET_REGEX_Transition *t_check;
+ struct GNUNET_REGEX_Transition *t;
+ struct GNUNET_REGEX_Transition *t_next;
+ int is_dup;
if (s1 == s2)
return;
- // 1. Make all transitions pointing to s2 point to s1
+ /* 1. Make all transitions pointing to s2 point to s1, unless this transition
+ * does not already exists, if it already exists remove transition. */
for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
{
- for (t_check = s_check->transitions_head; NULL != t_check;
- t_check = t_check->next)
+ for (t_check = s_check->transitions_head; NULL != t_check; t_check = t_next)
{
+ t_next = t_check->next;
+
if (s2 == t_check->to_state)
- t_check->to_state = s1;
+ {
+ is_dup = GNUNET_NO;
+ for (t = t_check->from_state->transitions_head; NULL != t; t = t->next)
+ {
+ if (t->to_state == s1 && 0 == strcmp (t_check->label, t->label))
+ is_dup = GNUNET_YES;
+ }
+ if (GNUNET_NO == is_dup)
+ t_check->to_state = s1;
+ else
+ state_remove_transition (t_check->from_state, t_check);
+ }
}
}
- // 2. Add all transitions from s2 to sX to s1
+ /* 2. Add all transitions from s2 to sX to s1 */
for (t_check = s2->transitions_head; NULL != t_check; t_check = t_check->next)
{
if (t_check->to_state != s1)
state_add_transition (ctx, s1, t_check->label, t_check->to_state);
}
- // 3. Rename s1 to {s1,s2}
- new_name = GNUNET_strdup (s1->name);
- if (NULL != s1->name)
- {
- GNUNET_free (s1->name);
- s1->name = NULL;
- }
+ /* 3. Rename s1 to {s1,s2} */
+#if REGEX_DEBUG_DFA
+ char *new_name;
+
+ new_name = s1->name;
GNUNET_asprintf (&s1->name, "{%s,%s}", new_name, s2->name);
GNUNET_free (new_name);
+#endif
- // remove state
+ /* remove state */
GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s2);
a->state_count--;
automaton_destroy_state (s2);
}
+
/**
* Add a state to the automaton 'a', always use this function to alter the
* states DLL of the automaton.
@@ -749,154 +443,1316 @@ automaton_add_state (struct GNUNET_REGEX_Automaton *a,
a->state_count++;
}
-/**
- * Function that is called with each state, when traversing an automaton.
- *
- * @param cls closure
- * @param s state
- */
-typedef void (*GNUNET_REGEX_traverse_action) (void *cls,
- struct GNUNET_REGEX_State * s);
/**
- * Traverses all states that are reachable from state 's'. Expects the states to
- * be unmarked (s->marked == GNUNET_NO). Performs 'action' on each visited
- * state.
+ * Depth-first traversal (DFS) of all states that are reachable from state
+ * 's'. Performs 'action' on each visited state.
*
- * @param cls closure.
* @param s start state.
+ * @param marks an array of size a->state_count to remember which state was
+ * already visited.
+ * @param count current count of the state.
+ * @param check function that is checked before advancing on each transition
+ * in the DFS.
+ * @param check_cls closure for check.
* @param action action to be performed on each state.
+ * @param action_cls closure for action.
*/
static void
-automaton_state_traverse (void *cls, struct GNUNET_REGEX_State *s,
- GNUNET_REGEX_traverse_action action)
+automaton_state_traverse (struct GNUNET_REGEX_State *s, int *marks,
+ unsigned int *count,
+ GNUNET_REGEX_traverse_check check, void *check_cls,
+ GNUNET_REGEX_traverse_action action, void *action_cls)
{
- struct Transition *t;
+ struct GNUNET_REGEX_Transition *t;
+
+ if (GNUNET_YES == marks[s->traversal_id])
+ return;
- if (GNUNET_NO == s->marked)
+ marks[s->traversal_id] = GNUNET_YES;
+
+ if (NULL != action)
+ action (action_cls, *count, s);
+
+ (*count)++;
+
+ for (t = s->transitions_head; NULL != t; t = t->next)
{
- s->marked = GNUNET_YES;
+ if (NULL == check ||
+ (NULL != check && GNUNET_YES == check (check_cls, s, t)))
+ {
+ automaton_state_traverse (t->to_state, marks, count, check, check_cls,
+ action, action_cls);
+ }
+ }
+}
- if (action > 0)
- action (cls, s);
- for (t = s->transitions_head; NULL != t; t = t->next)
- automaton_state_traverse (cls, t->to_state, action);
+/**
+ * Traverses the given automaton using depth-first-search (DFS) from it's start
+ * state, visiting all reachable states and calling 'action' on each one of
+ * them.
+ *
+ * @param a automaton to be traversed.
+ * @param start start state, pass a->start or NULL to traverse the whole automaton.
+ * @param check function that is checked before advancing on each transition
+ * in the DFS.
+ * @param check_cls closure for check.
+ * @param action action to be performed on each state.
+ * @param action_cls closure for action
+ */
+void
+GNUNET_REGEX_automaton_traverse (const struct GNUNET_REGEX_Automaton *a,
+ struct GNUNET_REGEX_State *start,
+ GNUNET_REGEX_traverse_check check,
+ void *check_cls,
+ GNUNET_REGEX_traverse_action action,
+ void *action_cls)
+{
+ unsigned int count;
+ struct GNUNET_REGEX_State *s;
+
+ if (NULL == a || 0 == a->state_count)
+ return;
+
+ int marks[a->state_count];
+
+ for (count = 0, s = a->states_head; NULL != s && count < a->state_count;
+ s = s->next, count++)
+ {
+ s->traversal_id = count;
+ marks[s->traversal_id] = GNUNET_NO;
}
+
+ count = 0;
+
+ if (NULL == start)
+ s = a->start;
+ else
+ s = start;
+
+ automaton_state_traverse (s, marks, &count, check, check_cls, action,
+ action_cls);
}
+
/**
- * Traverses the given automaton from it's start state, visiting all reachable
- * states and calling 'action' on each one of them.
+ * String container for faster string operations.
+ */
+struct StringBuffer
+{
+ /**
+ * Buffer holding the string (may start in the middle!);
+ * NOT 0-terminated!
+ */
+ char *sbuf;
+
+ /**
+ * Allocated buffer.
+ */
+ char *abuf;
+
+ /**
+ * Length of the string in the buffer.
+ */
+ size_t slen;
+
+ /**
+ * Number of bytes allocated for 'sbuf'
+ */
+ unsigned int blen;
+
+ /**
+ * Buffer currently represents "NULL" (not the empty string!)
+ */
+ int16_t null_flag;
+
+ /**
+ * If this entry is part of the last/current generation array,
+ * this flag is GNUNET_YES if the last and current generation are
+ * identical (and thus copying is unnecessary if the value didn't
+ * change). This is used in an optimization that improves
+ * performance by about 1% --- if we use int16_t here. With just
+ * "int" for both flags, performance drops (on my system) significantly,
+ * most likely due to increased cache misses.
+ */
+ int16_t synced;
+
+};
+
+
+/**
+ * Compare two strings for equality. If either is NULL they are not equal.
*
- * @param cls closure.
- * @param a automaton.
- * @param action action to be performed on each state.
+ * @param s1 first string for comparison.
+ * @param s2 second string for comparison.
+ *
+ * @return 0 if the strings are the same or both NULL, 1 or -1 if not.
+ */
+static int
+sb_nullstrcmp (const struct StringBuffer *s1,
+ const struct StringBuffer *s2)
+{
+ if ( (GNUNET_YES == s1->null_flag) &&
+ (GNUNET_YES == s2->null_flag) )
+ return 0;
+ if ( (GNUNET_YES == s1->null_flag) ||
+ (GNUNET_YES == s2->null_flag) )
+ return -1;
+ if (s1->slen != s2->slen)
+ return -1;
+ return memcmp (s1->sbuf, s2->sbuf, s1->slen);
+}
+
+
+/**
+ * Compare two strings for equality.
+ *
+ * @param s1 first string for comparison.
+ * @param s2 second string for comparison.
+ *
+ * @return 0 if the strings are the same, 1 or -1 if not.
+ */
+static int
+sb_strcmp (const struct StringBuffer *s1,
+ const struct StringBuffer *s2)
+{
+ if (s1->slen != s2->slen)
+ return -1;
+ return memcmp (s1->sbuf, s2->sbuf, s1->slen);
+}
+
+
+/**
+ * Reallocate the buffer of 'ret' to fit 'nlen' characters;
+ * move the existing string to the beginning of the new buffer.
+ *
+ * @param ret current buffer, to be updated
+ * @param nlen target length for the buffer, must be at least ret->slen
*/
static void
-automaton_traverse (void *cls, struct GNUNET_REGEX_Automaton *a,
- GNUNET_REGEX_traverse_action action)
+sb_realloc (struct StringBuffer *ret,
+ size_t nlen)
{
- struct GNUNET_REGEX_State *s;
+ char *old;
+
+ GNUNET_assert (nlen >= ret->slen);
+ old = ret->abuf;
+ ret->abuf = GNUNET_malloc (nlen);
+ ret->blen = nlen;
+ memcpy (ret->abuf,
+ ret->sbuf,
+ ret->slen);
+ ret->sbuf = ret->abuf;
+ GNUNET_free_non_null (old);
+}
+
- for (s = a->states_head; NULL != s; s = s->next)
- s->marked = GNUNET_NO;
+/**
+ * Append a string.
+ *
+ * @param ret where to write the result
+ * @param sarg string to append
+ */
+static void
+sb_append (struct StringBuffer *ret,
+ const struct StringBuffer *sarg)
+{
+ if (GNUNET_YES == ret->null_flag)
+ ret->slen = 0;
+ ret->null_flag = GNUNET_NO;
+ if (ret->blen < sarg->slen + ret->slen)
+ sb_realloc (ret, ret->blen + sarg->slen + 128);
+ memcpy (&ret->sbuf[ret->slen],
+ sarg->sbuf,
+ sarg->slen);
+ ret->slen += sarg->slen;
+}
+
- automaton_state_traverse (cls, a->start, action);
+/**
+ * Append a C string.
+ *
+ * @param ret where to write the result
+ * @param cstr string to append
+ */
+static void
+sb_append_cstr (struct StringBuffer *ret,
+ const char *cstr)
+{
+ size_t cstr_len = strlen (cstr);
+
+ if (GNUNET_YES == ret->null_flag)
+ ret->slen = 0;
+ ret->null_flag = GNUNET_NO;
+ if (ret->blen < cstr_len + ret->slen)
+ sb_realloc (ret, ret->blen + cstr_len + 128);
+ memcpy (&ret->sbuf[ret->slen],
+ cstr,
+ cstr_len);
+ ret->slen += cstr_len;
+}
+
+
+/**
+ * Wrap a string buffer, that is, set ret to the format string
+ * which contains an "%s" which is to be replaced with the original
+ * content of 'ret'. Note that optimizing this function is not
+ * really worth it, it is rarely called.
+ *
+ * @param ret where to write the result and take the input for %.*s from
+ * @param format format string, fprintf-style, with exactly one "%.*s"
+ * @param extra_chars how long will the result be, in addition to 'sarg' length
+ */
+static void
+sb_wrap (struct StringBuffer *ret,
+ const char *format,
+ size_t extra_chars)
+{
+ char *temp;
+
+ if (GNUNET_YES == ret->null_flag)
+ ret->slen = 0;
+ ret->null_flag = GNUNET_NO;
+ temp = GNUNET_malloc (ret->slen + extra_chars + 1);
+ GNUNET_snprintf (temp,
+ ret->slen + extra_chars + 1,
+ format,
+ (int) ret->slen,
+ ret->sbuf);
+ GNUNET_free_non_null (ret->abuf);
+ ret->abuf = temp;
+ ret->sbuf = temp;
+ ret->blen = ret->slen + extra_chars + 1;
+ ret->slen = ret->slen + extra_chars;
}
+
/**
- * Reverses all transitions of the given automaton.
+ * Format a string buffer. Note that optimizing this function is not
+ * really worth it, it is rarely called.
*
- * @param a automaton.
+ * @param ret where to write the result
+ * @param format format string, fprintf-style, with exactly one "%.*s"
+ * @param extra_chars how long will the result be, in addition to 'sarg' length
+ * @param sarg string to print into the format
*/
static void
-automaton_reverse (struct GNUNET_REGEX_Automaton *a)
+sb_printf1 (struct StringBuffer *ret,
+ const char *format,
+ size_t extra_chars,
+ const struct StringBuffer *sarg)
{
- struct GNUNET_REGEX_State *s;
- struct Transition *t;
- struct Transition *t_next;
- struct GNUNET_REGEX_State *s_swp;
+ if (ret->blen < sarg->slen + extra_chars + 1)
+ sb_realloc (ret,
+ sarg->slen + extra_chars + 1);
+ ret->null_flag = GNUNET_NO;
+ ret->sbuf = ret->abuf;
+ ret->slen = sarg->slen + extra_chars;
+ GNUNET_snprintf (ret->sbuf,
+ ret->blen,
+ format,
+ (int) sarg->slen,
+ sarg->sbuf);
+}
- for (s = a->states_head; NULL != s; s = s->next)
- for (t = s->transitions_head; NULL != t; t = t->next)
- t->mark = GNUNET_NO;
- for (s = a->states_head; NULL != s; s = s->next)
+/**
+ * Format a string buffer.
+ *
+ * @param ret where to write the result
+ * @param format format string, fprintf-style, with exactly two "%.*s"
+ * @param extra_chars how long will the result be, in addition to 'sarg1/2' length
+ * @param sarg1 first string to print into the format
+ * @param sarg2 second string to print into the format
+ */
+static void
+sb_printf2 (struct StringBuffer *ret,
+ const char *format,
+ size_t extra_chars,
+ const struct StringBuffer *sarg1,
+ const struct StringBuffer *sarg2)
+{
+ if (ret->blen < sarg1->slen + sarg2->slen + extra_chars + 1)
+ sb_realloc (ret,
+ sarg1->slen + sarg2->slen + extra_chars + 1);
+ ret->null_flag = GNUNET_NO;
+ ret->slen = sarg1->slen + sarg2->slen + extra_chars;
+ ret->sbuf = ret->abuf;
+ GNUNET_snprintf (ret->sbuf,
+ ret->blen,
+ format,
+ (int) sarg1->slen,
+ sarg1->sbuf,
+ (int) sarg2->slen,
+ sarg2->sbuf);
+}
+
+
+/**
+ * Format a string buffer. Note that optimizing this function is not
+ * really worth it, it is rarely called.
+ *
+ * @param ret where to write the result
+ * @param format format string, fprintf-style, with exactly three "%.*s"
+ * @param extra_chars how long will the result be, in addition to 'sarg1/2/3' length
+ * @param sarg1 first string to print into the format
+ * @param sarg2 second string to print into the format
+ * @param sarg3 third string to print into the format
+ */
+static void
+sb_printf3 (struct StringBuffer *ret,
+ const char *format,
+ size_t extra_chars,
+ const struct StringBuffer *sarg1,
+ const struct StringBuffer *sarg2,
+ const struct StringBuffer *sarg3)
+{
+ if (ret->blen < sarg1->slen + sarg2->slen + sarg3->slen + extra_chars + 1)
+ sb_realloc (ret,
+ sarg1->slen + sarg2->slen + sarg3->slen + extra_chars + 1);
+ ret->null_flag = GNUNET_NO;
+ ret->slen = sarg1->slen + sarg2->slen + sarg3->slen + extra_chars;
+ ret->sbuf = ret->abuf;
+ GNUNET_snprintf (ret->sbuf,
+ ret->blen,
+ format,
+ (int) sarg1->slen,
+ sarg1->sbuf,
+ (int) sarg2->slen,
+ sarg2->sbuf,
+ (int) sarg3->slen,
+ sarg3->sbuf);
+}
+
+
+/**
+ * Free resources of the given string buffer.
+ *
+ * @param sb buffer to free (actual pointer is not freed, as they
+ * should not be individually allocated)
+ */
+static void
+sb_free (struct StringBuffer *sb)
+{
+ GNUNET_array_grow (sb->abuf,
+ sb->blen,
+ 0);
+ sb->slen = 0;
+ sb->sbuf = NULL;
+ sb->null_flag= GNUNET_YES;
+}
+
+
+/**
+ * Copy the given string buffer from 'in' to 'out'.
+ *
+ * @param in input string
+ * @param out output string
+ */
+static void
+sb_strdup (struct StringBuffer *out,
+ const struct StringBuffer *in)
+
+{
+ out->null_flag = in->null_flag;
+ if (GNUNET_YES == out->null_flag)
+ return;
+ if (out->blen < in->slen)
{
- for (t = s->transitions_head; NULL != t; t = t_next)
+ GNUNET_array_grow (out->abuf,
+ out->blen,
+ in->slen);
+ }
+ out->sbuf = out->abuf;
+ out->slen = in->slen;
+ memcpy (out->sbuf, in->sbuf, out->slen);
+}
+
+
+/**
+ * Copy the given string buffer from 'in' to 'out'.
+ *
+ * @param cstr input string
+ * @param out output string
+ */
+static void
+sb_strdup_cstr (struct StringBuffer *out,
+ const char *cstr)
+{
+ if (NULL == cstr)
+ {
+ out->null_flag = GNUNET_YES;
+ return;
+ }
+ out->null_flag = GNUNET_NO;
+ out->slen = strlen (cstr);
+ if (out->blen < out->slen)
+ {
+ GNUNET_array_grow (out->abuf,
+ out->blen,
+ out->slen);
+ }
+ out->sbuf = out->abuf;
+ memcpy (out->sbuf, cstr, out->slen);
+}
+
+
+/**
+ * Check if the given string 'str' needs parentheses around it when
+ * using it to generate a regex.
+ *
+ * @param str string
+ *
+ * @return GNUNET_YES if parentheses are needed, GNUNET_NO otherwise
+ */
+static int
+needs_parentheses (const struct StringBuffer *str)
+{
+ size_t slen;
+ const char *op;
+ const char *cl;
+ const char *pos;
+ const char *end;
+ unsigned int cnt;
+
+ if ((GNUNET_YES == str->null_flag) || ((slen = str->slen) < 2))
+ return GNUNET_NO;
+ pos = str->sbuf;
+ if ('(' != pos[0])
+ return GNUNET_YES;
+ end = str->sbuf + slen;
+ cnt = 1;
+ pos++;
+ while (cnt > 0)
+ {
+ cl = memchr (pos, ')', end - pos);
+ if (NULL == cl)
{
- t_next = t->next;
+ GNUNET_break (0);
+ return GNUNET_YES;
+ }
+ /* while '(' before ')', count opening parens */
+ while ( (NULL != (op = memchr (pos, '(', end - pos))) &&
+ (op < cl) )
+ {
+ cnt++;
+ pos = op + 1;
+ }
+ /* got ')' first */
+ cnt--;
+ pos = cl + 1;
+ }
+ return (*pos == '\0') ? GNUNET_NO : GNUNET_YES;
+}
- if (GNUNET_YES == t->mark || t->from_state == t->to_state)
- continue;
- t->mark = GNUNET_YES;
+/**
+ * Remove parentheses surrounding string 'str'.
+ * Example: "(a)" becomes "a", "(a|b)|(a|c)" stays the same.
+ * You need to GNUNET_free the returned string.
+ *
+ * @param str string, modified to contain a
+ * @return string without surrounding parentheses, string 'str' if no preceding
+ * epsilon could be found, NULL if 'str' was NULL
+ */
+static void
+remove_parentheses (struct StringBuffer *str)
+{
+ size_t slen;
+ const char *pos;
+ const char *end;
+ const char *sbuf;
+ const char *op;
+ const char *cp;
+ unsigned int cnt;
+
+ if (0)
+ return;
+ sbuf = str->sbuf;
+ if ( (GNUNET_YES == str->null_flag) ||
+ (1 >= (slen = str->slen)) ||
+ ('(' != str->sbuf[0]) ||
+ (')' != str->sbuf[slen - 1]) )
+ return;
+ cnt = 0;
+ pos = &sbuf[1];
+ end = &sbuf[slen - 1];
+ op = memchr (pos, '(', end - pos);
+ cp = memchr (pos, ')', end - pos);
+ while (NULL != cp)
+ {
+ while ( (NULL != op) &&
+ (op < cp) )
+ {
+ cnt++;
+ pos = op + 1;
+ op = memchr (pos, '(', end - pos);
+ }
+ while ( (NULL != cp) &&
+ ( (NULL == op) ||
+ (cp < op) ) )
+ {
+ if (0 == cnt)
+ return; /* can't strip parens */
+ cnt--;
+ pos = cp + 1;
+ cp = memchr (pos, ')', end - pos);
+ }
+ }
+ if (0 != cnt)
+ {
+ GNUNET_break (0);
+ return;
+ }
+ str->sbuf++;
+ str->slen -= 2;
+}
- GNUNET_CONTAINER_DLL_remove (t->from_state->transitions_head,
- t->from_state->transitions_tail, t);
- t->from_state->transition_count--;
- GNUNET_CONTAINER_DLL_insert (t->to_state->transitions_head,
- t->to_state->transitions_tail, t);
- t->to_state->transition_count++;
- s_swp = t->from_state;
- t->from_state = t->to_state;
- t->to_state = s_swp;
+/**
+ * Check if the string 'str' starts with an epsilon (empty string).
+ * Example: "(|a)" is starting with an epsilon.
+ *
+ * @param str string to test
+ *
+ * @return 0 if str has no epsilon, 1 if str starts with '(|' and ends with ')'
+ */
+static int
+has_epsilon (const struct StringBuffer *str)
+{
+ return
+ (GNUNET_YES != str->null_flag) &&
+ (0 < str->slen) &&
+ ('(' == str->sbuf[0]) &&
+ ('|' == str->sbuf[1]) &&
+ (')' == str->sbuf[str->slen - 1]);
+}
+
+
+/**
+ * Remove an epsilon from the string str. Where epsilon is an empty string
+ * Example: str = "(|a|b|c)", result: "a|b|c"
+ * The returned string needs to be freed.
+ *
+ * @param str original string
+ * @param ret where to return string without preceding epsilon, string 'str' if no preceding
+ * epsilon could be found, NULL if 'str' was NULL
+ */
+static void
+remove_epsilon (const struct StringBuffer *str,
+ struct StringBuffer *ret)
+{
+ if (GNUNET_YES == str->null_flag)
+ {
+ ret->null_flag = GNUNET_YES;
+ return;
+ }
+ if ( (str->slen > 1) &&
+ ('(' == str->sbuf[0]) &&
+ ('|' == str->sbuf[1]) &&
+ (')' == str->sbuf[str->slen - 1]) )
+ {
+ /* remove epsilon */
+ if (ret->blen < str->slen - 3)
+ {
+ GNUNET_array_grow (ret->abuf,
+ ret->blen,
+ str->slen - 3);
}
+ ret->sbuf = ret->abuf;
+ ret->slen = str->slen - 3;
+ memcpy (ret->sbuf, &str->sbuf[2], ret->slen);
+ return;
}
+ sb_strdup (ret, str);
}
+
/**
- * Create proof for the given state.
+ * Compare n bytes of 'str1' and 'str2'
*
- * @param cls closure.
- * @param s state.
+ * @param str1 first string to compare
+ * @param str2 second string for comparison
+ * @param n number of bytes to compare
+ *
+ * @return -1 if any of the strings is NULL, 0 if equal, non 0 otherwise
+ */
+static int
+sb_strncmp (const struct StringBuffer *str1,
+ const struct StringBuffer *str2, size_t n)
+{
+ size_t max;
+
+ if ( (str1->slen != str2->slen) &&
+ ( (str1->slen < n) ||
+ (str2->slen < n) ) )
+ return -1;
+ max = GNUNET_MAX (str1->slen, str2->slen);
+ if (max > n)
+ max = n;
+ return memcmp (str1->sbuf, str2->sbuf, max);
+}
+
+
+/**
+ * Compare n bytes of 'str1' and 'str2'
+ *
+ * @param str1 first string to compare
+ * @param str2 second C string for comparison
+ * @param n number of bytes to compare (and length of str2)
+ *
+ * @return -1 if any of the strings is NULL, 0 if equal, non 0 otherwise
+ */
+static int
+sb_strncmp_cstr (const struct StringBuffer *str1,
+ const char *str2, size_t n)
+{
+ if (str1->slen < n)
+ return -1;
+ return memcmp (str1->sbuf, str2, n);
+}
+
+
+/**
+ * Initialize string buffer for storing strings of up to n
+ * characters.
+ *
+ * @param sb buffer to initialize
+ * @param n desired target length
*/
static void
-automaton_create_proofs_step (void *cls, struct GNUNET_REGEX_State *s)
+sb_init (struct StringBuffer *sb,
+ size_t n)
{
- struct Transition *t;
- int i;
- char *tmp;
+ sb->null_flag = GNUNET_NO;
+ sb->abuf = sb->sbuf = (0 == n) ? NULL : GNUNET_malloc (n);
+ sb->blen = n;
+ sb->slen = 0;
+}
+
+
+/**
+ * Compare 'str1', starting from position 'k', with whole 'str2'
+ *
+ * @param str1 first string to compare, starting from position 'k'
+ * @param str2 second string for comparison
+ * @param k starting position in 'str1'
+ *
+ * @return -1 if any of the strings is NULL, 0 if equal, non 0 otherwise
+ */
+static int
+sb_strkcmp (const struct StringBuffer *str1,
+ const struct StringBuffer *str2, size_t k)
+{
+ if ( (GNUNET_YES == str1->null_flag) ||
+ (GNUNET_YES == str2->null_flag) ||
+ (k > str1->slen) ||
+ (str1->slen - k != str2->slen) )
+ return -1;
+ return memcmp (&str1->sbuf[k], str2->sbuf, str2->slen);
+}
+
+
+/**
+ * Helper function used as 'action' in 'GNUNET_REGEX_automaton_traverse'
+ * function to create the depth-first numbering of the states.
+ *
+ * @param cls states array.
+ * @param count current state counter.
+ * @param s current state.
+ */
+static void
+number_states (void *cls, const unsigned int count,
+ struct GNUNET_REGEX_State *s)
+{
+ struct GNUNET_REGEX_State **states = cls;
+
+ s->dfs_id = count;
+ if (NULL != states)
+ states[count] = s;
+}
+
+
+
+#define PRIS(a) \
+ ((GNUNET_YES == a.null_flag) ? 6 : (int) a.slen), \
+ ((GNUNET_YES == a.null_flag) ? "(null)" : a.sbuf)
+
- for (i = 0, t = s->transitions_head; NULL != t; t = t->next, i++)
+/**
+ * Construct the regular expression given the inductive step,
+ * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^*
+ * R^{(k-1)}_{kj}, and simplify the resulting expression saved in R_cur_ij.
+ *
+ * @param R_last_ij value of $R^{(k-1)_{ij}.
+ * @param R_last_ik value of $R^{(k-1)_{ik}.
+ * @param R_last_kk value of $R^{(k-1)_{kk}.
+ * @param R_last_kj value of $R^{(k-1)_{kj}.
+ * @param R_cur_ij result for this inductive step is saved in R_cur_ij, R_cur_ij
+ * is expected to be NULL when called!
+ * @param R_cur_l optimization -- kept between iterations to avoid realloc
+ * @param R_cur_r optimization -- kept between iterations to avoid realloc
+ */
+static void
+automaton_create_proofs_simplify (const struct StringBuffer *R_last_ij,
+ const struct StringBuffer *R_last_ik,
+ const struct StringBuffer *R_last_kk,
+ const struct StringBuffer *R_last_kj,
+ struct StringBuffer *R_cur_ij,
+ struct StringBuffer *R_cur_l,
+ struct StringBuffer *R_cur_r)
+{
+ struct StringBuffer R_temp_ij;
+ struct StringBuffer R_temp_ik;
+ struct StringBuffer R_temp_kj;
+ struct StringBuffer R_temp_kk;
+ int eps_check;
+ int ij_ik_cmp;
+ int ij_kj_cmp;
+ int ik_kk_cmp;
+ int kk_kj_cmp;
+ int clean_ik_kk_cmp;
+ int clean_kk_kj_cmp;
+ size_t length;
+ size_t length_l;
+ size_t length_r;
+
+ /*
+ * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
+ * R_last == R^{(k-1)}, R_cur == R^{(k)}
+ * R_cur_ij = R_cur_l | R_cur_r
+ * R_cur_l == R^{(k-1)}_{ij}
+ * R_cur_r == R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
+ */
+
+ if ( (GNUNET_YES == R_last_ij->null_flag) &&
+ ( (GNUNET_YES == R_last_ik->null_flag) ||
+ (GNUNET_YES == R_last_kj->null_flag)))
{
- if (t->to_state == s)
- GNUNET_asprintf (&tmp, "%c*", t->label);
- else if (i != s->transition_count - 1)
- GNUNET_asprintf (&tmp, "%c|", t->label);
+ /* R^{(k)}_{ij} = N | N */
+ R_cur_ij->null_flag = GNUNET_YES;
+ R_cur_ij->synced = GNUNET_NO;
+ return;
+ }
+
+ if ( (GNUNET_YES == R_last_ik->null_flag) ||
+ (GNUNET_YES == R_last_kj->null_flag) )
+ {
+ /* R^{(k)}_{ij} = R^{(k-1)}_{ij} | N */
+ if (GNUNET_YES == R_last_ij->synced)
+ {
+ R_cur_ij->synced = GNUNET_YES;
+ R_cur_ij->null_flag = GNUNET_NO;
+ return;
+ }
+ R_cur_ij->synced = GNUNET_YES;
+ sb_strdup (R_cur_ij, R_last_ij);
+ return;
+ }
+ R_cur_ij->synced = GNUNET_NO;
+
+ /* $R^{(k)}_{ij} = N | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} OR
+ * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} */
+
+ R_cur_r->null_flag = GNUNET_YES;
+ R_cur_r->slen = 0;
+ R_cur_l->null_flag = GNUNET_YES;
+ R_cur_l->slen = 0;
+
+ /* cache results from strcmp, we might need these many times */
+ ij_kj_cmp = sb_nullstrcmp (R_last_ij, R_last_kj);
+ ij_ik_cmp = sb_nullstrcmp (R_last_ij, R_last_ik);
+ ik_kk_cmp = sb_nullstrcmp (R_last_ik, R_last_kk);
+ kk_kj_cmp = sb_nullstrcmp (R_last_kk, R_last_kj);
+
+ /* Assign R_temp_(ik|kk|kj) to R_last[][] and remove epsilon as well
+ * as parentheses, so we can better compare the contents */
+
+ memset (&R_temp_ij, 0, sizeof (struct StringBuffer));
+ memset (&R_temp_ik, 0, sizeof (struct StringBuffer));
+ memset (&R_temp_kk, 0, sizeof (struct StringBuffer));
+ memset (&R_temp_kj, 0, sizeof (struct StringBuffer));
+ remove_epsilon (R_last_ik, &R_temp_ik);
+ remove_epsilon (R_last_kk, &R_temp_kk);
+ remove_epsilon (R_last_kj, &R_temp_kj);
+ remove_parentheses (&R_temp_ik);
+ remove_parentheses (&R_temp_kk);
+ remove_parentheses (&R_temp_kj);
+ clean_ik_kk_cmp = sb_nullstrcmp (R_last_ik, &R_temp_kk);
+ clean_kk_kj_cmp = sb_nullstrcmp (&R_temp_kk, R_last_kj);
+
+ /* construct R_cur_l (and, if necessary R_cur_r) */
+ if (GNUNET_YES != R_last_ij->null_flag)
+ {
+ /* Assign R_temp_ij to R_last_ij and remove epsilon as well
+ * as parentheses, so we can better compare the contents */
+ remove_epsilon (R_last_ij, &R_temp_ij);
+ remove_parentheses (&R_temp_ij);
+
+ if ( (0 == sb_strcmp (&R_temp_ij, &R_temp_ik)) &&
+ (0 == sb_strcmp (&R_temp_ik, &R_temp_kk)) &&
+ (0 == sb_strcmp (&R_temp_kk, &R_temp_kj)) )
+ {
+ if (0 == R_temp_ij.slen)
+ {
+ R_cur_r->null_flag = GNUNET_NO;
+ }
+ else if ((0 == sb_strncmp_cstr (R_last_ij, "(|", 2)) ||
+ (0 == sb_strncmp_cstr (R_last_ik, "(|", 2) &&
+ 0 == sb_strncmp_cstr (R_last_kj, "(|", 2)))
+ {
+ /*
+ * a|(e|a)a*(e|a) = a*
+ * a|(e|a)(e|a)*(e|a) = a*
+ * (e|a)|aa*a = a*
+ * (e|a)|aa*(e|a) = a*
+ * (e|a)|(e|a)a*a = a*
+ * (e|a)|(e|a)a*(e|a) = a*
+ * (e|a)|(e|a)(e|a)*(e|a) = a*
+ */
+ if (GNUNET_YES == needs_parentheses (&R_temp_ij))
+ sb_printf1 (R_cur_r, "(%.*s)*", 3, &R_temp_ij);
+ else
+ sb_printf1 (R_cur_r, "%.*s*", 1, &R_temp_ij);
+ }
+ else
+ {
+ /*
+ * a|aa*a = a+
+ * a|(e|a)a*a = a+
+ * a|aa*(e|a) = a+
+ * a|(e|a)(e|a)*a = a+
+ * a|a(e|a)*(e|a) = a+
+ */
+ if (GNUNET_YES == needs_parentheses (&R_temp_ij))
+ sb_printf1 (R_cur_r, "(%.*s)+", 3, &R_temp_ij);
+ else
+ sb_printf1 (R_cur_r, "%.*s+", 1, &R_temp_ij);
+ }
+ }
+ else if ( (0 == ij_ik_cmp) && (0 == clean_kk_kj_cmp) && (0 != clean_ik_kk_cmp) )
+ {
+ /* a|ab*b = ab* */
+ if (0 == R_last_kk->slen)
+ sb_strdup (R_cur_r, R_last_ij);
+ else if (GNUNET_YES == needs_parentheses (&R_temp_kk))
+ sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ij, &R_temp_kk);
+ else
+ sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ij, R_last_kk);
+ R_cur_l->null_flag = GNUNET_YES;
+ }
+ else if ( (0 == ij_kj_cmp) && (0 == clean_ik_kk_cmp) && (0 != clean_kk_kj_cmp))
+ {
+ /* a|bb*a = b*a */
+ if (R_last_kk->slen < 1)
+ {
+ sb_strdup (R_cur_r, R_last_kj);
+ }
+ else if (GNUNET_YES == needs_parentheses (&R_temp_kk))
+ sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_kj);
+ else
+ sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_kj);
+
+ R_cur_l->null_flag = GNUNET_YES;
+ }
+ else if ( (0 == ij_ik_cmp) && (0 == kk_kj_cmp) && (! has_epsilon (R_last_ij)) &&
+ has_epsilon (R_last_kk))
+ {
+ /* a|a(e|b)*(e|b) = a|ab* = a|a|ab|abb|abbb|... = ab* */
+ if (needs_parentheses (&R_temp_kk))
+ sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ij, &R_temp_kk);
+ else
+ sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ij, &R_temp_kk);
+ R_cur_l->null_flag = GNUNET_YES;
+ }
+ else if ( (0 == ij_kj_cmp) && (0 == ik_kk_cmp) && (! has_epsilon (R_last_ij)) &&
+ has_epsilon (R_last_kk))
+ {
+ /* a|(e|b)(e|b)*a = a|b*a = a|a|ba|bba|bbba|... = b*a */
+ if (needs_parentheses (&R_temp_kk))
+ sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_ij);
+ else
+ sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_ij);
+ R_cur_l->null_flag = GNUNET_YES;
+ }
else
- GNUNET_asprintf (&tmp, "%c", t->label);
+ {
+ sb_strdup (R_cur_l, R_last_ij);
+ remove_parentheses (R_cur_l);
+ }
+ }
+ else
+ {
+ /* we have no left side */
+ R_cur_l->null_flag = GNUNET_YES;
+ }
+
+ /* construct R_cur_r, if not already constructed */
+ if (GNUNET_YES == R_cur_r->null_flag)
+ {
+ length = R_temp_kk.slen - R_last_ik->slen;
+
+ /* a(ba)*bx = (ab)+x */
+ if ( (length > 0) &&
+ (GNUNET_YES != R_last_kk->null_flag) &&
+ (0 < R_last_kk->slen) &&
+ (GNUNET_YES != R_last_kj->null_flag) &&
+ (0 < R_last_kj->slen) &&
+ (GNUNET_YES != R_last_ik->null_flag) &&
+ (0 < R_last_ik->slen) &&
+ (0 == sb_strkcmp (&R_temp_kk, R_last_ik, length)) &&
+ (0 == sb_strncmp (&R_temp_kk, R_last_kj, length)) )
+ {
+ struct StringBuffer temp_a;
+ struct StringBuffer temp_b;
+
+ sb_init (&temp_a, length);
+ sb_init (&temp_b, R_last_kj->slen - length);
+
+ length_l = length;
+ temp_a.sbuf = temp_a.abuf;
+ memcpy (temp_a.sbuf, R_last_kj->sbuf, length_l);
+ temp_a.slen = length_l;
+
+ length_r = R_last_kj->slen - length;
+ temp_b.sbuf = temp_b.abuf;
+ memcpy (temp_b.sbuf, &R_last_kj->sbuf[length], length_r);
+ temp_b.slen = length_r;
+
+ /* e|(ab)+ = (ab)* */
+ if ( (GNUNET_YES != R_cur_l->null_flag) &&
+ (0 == R_cur_l->slen) &&
+ (0 == temp_b.slen) )
+ {
+ sb_printf2 (R_cur_r, "(%.*s%.*s)*", 3, R_last_ik, &temp_a);
+ sb_free (R_cur_l);
+ R_cur_l->null_flag = GNUNET_YES;
+ }
+ else
+ {
+ sb_printf3 (R_cur_r, "(%.*s%.*s)+%.*s", 3, R_last_ik, &temp_a, &temp_b);
+ }
+ sb_free (&temp_a);
+ sb_free (&temp_b);
+ }
+ else if (0 == sb_strcmp (&R_temp_ik, &R_temp_kk) &&
+ 0 == sb_strcmp (&R_temp_kk, &R_temp_kj))
+ {
+ /*
+ * (e|a)a*(e|a) = a*
+ * (e|a)(e|a)*(e|a) = a*
+ */
+ if (has_epsilon (R_last_ik) && has_epsilon (R_last_kj))
+ {
+ if (needs_parentheses (&R_temp_kk))
+ sb_printf1 (R_cur_r, "(%.*s)*", 3, &R_temp_kk);
+ else
+ sb_printf1 (R_cur_r, "%.*s*", 1, &R_temp_kk);
+ }
+ /* aa*a = a+a */
+ else if ( (0 == clean_ik_kk_cmp) &&
+ (0 == clean_kk_kj_cmp) &&
+ (! has_epsilon (R_last_ik)) )
+ {
+ if (needs_parentheses (&R_temp_kk))
+ sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, &R_temp_kk, &R_temp_kk);
+ else
+ sb_printf2 (R_cur_r, "%.*s+%.*s", 1, &R_temp_kk, &R_temp_kk);
+ }
+ /*
+ * (e|a)a*a = a+
+ * aa*(e|a) = a+
+ * a(e|a)*(e|a) = a+
+ * (e|a)a*a = a+
+ */
+ else
+ {
+ eps_check =
+ (has_epsilon (R_last_ik) + has_epsilon (R_last_kk) +
+ has_epsilon (R_last_kj));
- if (NULL != s->proof)
- s->proof =
- GNUNET_realloc (s->proof, strlen (s->proof) + strlen (tmp) + 1);
+ if (1 == eps_check)
+ {
+ if (needs_parentheses (&R_temp_kk))
+ sb_printf1 (R_cur_r, "(%.*s)+", 3, &R_temp_kk);
+ else
+ sb_printf1 (R_cur_r, "%.*s+", 1, &R_temp_kk);
+ }
+ }
+ }
+ /*
+ * aa*b = a+b
+ * (e|a)(e|a)*b = a*b
+ */
+ else if (0 == sb_strcmp (&R_temp_ik, &R_temp_kk))
+ {
+ if (has_epsilon (R_last_ik))
+ {
+ if (needs_parentheses (&R_temp_kk))
+ sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_kj);
+ else
+ sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_kj);
+ }
+ else
+ {
+ if (needs_parentheses (&R_temp_kk))
+ sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, &R_temp_kk, R_last_kj);
+ else
+ sb_printf2 (R_cur_r, "%.*s+%.*s", 1, &R_temp_kk, R_last_kj);
+ }
+ }
+ /*
+ * ba*a = ba+
+ * b(e|a)*(e|a) = ba*
+ */
+ else if (0 == sb_strcmp (&R_temp_kk, &R_temp_kj))
+ {
+ if (has_epsilon (R_last_kj))
+ {
+ if (needs_parentheses (&R_temp_kk))
+ sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ik, &R_temp_kk);
+ else
+ sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ik, &R_temp_kk);
+ }
+ else
+ {
+ if (needs_parentheses (&R_temp_kk))
+ sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, R_last_ik, &R_temp_kk);
+ else
+ sb_printf2 (R_cur_r, "%.*s+%.*s", 1, R_last_ik, &R_temp_kk);
+ }
+ }
else
- s->proof = GNUNET_malloc (strlen (tmp) + 1);
- strcat (s->proof, tmp);
- GNUNET_free (tmp);
+ {
+ if (0 < R_temp_kk.slen)
+ {
+ if (needs_parentheses (&R_temp_kk))
+ {
+ sb_printf3 (R_cur_r, "%.*s(%.*s)*%.*s", 3, R_last_ik, &R_temp_kk,
+ R_last_kj);
+ }
+ else
+ {
+ sb_printf3 (R_cur_r, "%.*s%.*s*%.*s", 1, R_last_ik, &R_temp_kk,
+ R_last_kj);
+ }
+ }
+ else
+ {
+ sb_printf2 (R_cur_r, "%.*s%.*s", 0, R_last_ik, R_last_kj);
+ }
+ }
+ }
+ sb_free (&R_temp_ij);
+ sb_free (&R_temp_ik);
+ sb_free (&R_temp_kk);
+ sb_free (&R_temp_kj);
+
+ if ( (GNUNET_YES == R_cur_l->null_flag) &&
+ (GNUNET_YES == R_cur_r->null_flag) )
+ {
+ R_cur_ij->null_flag = GNUNET_YES;
+ return;
+ }
+
+ if ( (GNUNET_YES != R_cur_l->null_flag) &&
+ (GNUNET_YES == R_cur_r->null_flag) )
+ {
+ struct StringBuffer tmp;
+
+ tmp = *R_cur_ij;
+ *R_cur_ij = *R_cur_l;
+ *R_cur_l = tmp;
+ return;
}
+
+ if ( (GNUNET_YES == R_cur_l->null_flag) &&
+ (GNUNET_YES != R_cur_r->null_flag) )
+ {
+ struct StringBuffer tmp;
+
+ tmp = *R_cur_ij;
+ *R_cur_ij = *R_cur_r;
+ *R_cur_r = tmp;
+ return;
+ }
+
+ if (0 == sb_nullstrcmp (R_cur_l, R_cur_r))
+ {
+ struct StringBuffer tmp;
+
+ tmp = *R_cur_ij;
+ *R_cur_ij = *R_cur_l;
+ *R_cur_l = tmp;
+ return;
+ }
+ sb_printf2 (R_cur_ij, "(%.*s|%.*s)", 3, R_cur_l, R_cur_r);
}
+
/**
- * Create proofs for all states in the given automaton.
+ * Create proofs for all states in the given automaton. Implementation of the
+ * algorithm descriped in chapter 3.2.1 of "Automata Theory, Languages, and
+ * Computation 3rd Edition" by Hopcroft, Motwani and Ullman.
*
- * @param a automaton.
+ * Each state in the automaton gets assigned 'proof' and 'hash' (hash of the
+ * proof) fields. The starting state will only have a valid proof/hash if it has
+ * any incoming transitions.
+ *
+ * @param a automaton for which to assign proofs and hashes, must not be NULL
*/
-static void
+static int
automaton_create_proofs (struct GNUNET_REGEX_Automaton *a)
{
- struct GNUNET_REGEX_State *s;
+ unsigned int n = a->state_count;
+ struct GNUNET_REGEX_State *states[n];
+ struct StringBuffer *R_last;
+ struct StringBuffer *R_cur;
+ struct StringBuffer R_cur_r;
+ struct StringBuffer R_cur_l;
+ struct StringBuffer *R_swap;
+ struct GNUNET_REGEX_Transition *t;
+ struct StringBuffer complete_regex;
+ unsigned int i;
+ unsigned int j;
+ unsigned int k;
+
+ R_last = GNUNET_malloc_large (sizeof (struct StringBuffer) * n * n);
+ R_cur = GNUNET_malloc_large (sizeof (struct StringBuffer) * n * n);
+ if ( (NULL == R_last) ||
+ (NULL == R_cur) )
+ {
+ GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "malloc");
+ GNUNET_free_non_null (R_cur);
+ GNUNET_free_non_null (R_last);
+ return GNUNET_SYSERR;
+ }
- automaton_reverse (a);
+ /* create depth-first numbering of the states, initializes 'state' */
+ GNUNET_REGEX_automaton_traverse (a, a->start, NULL, NULL, &number_states,
+ states);
- for (s = a->states_head; NULL != s; s = s->next)
- automaton_create_proofs_step (NULL, s);
+ for (i = 0; i < n; i++)
+ GNUNET_assert (NULL != states[i]);
+ for (i = 0; i < n; i++)
+ for (j = 0; j < n; j++)
+ R_last[i *n + j].null_flag = GNUNET_YES;
- automaton_reverse (a);
+ /* Compute regular expressions of length "1" between each pair of states */
+ for (i = 0; i < n; i++)
+ {
+ for (t = states[i]->transitions_head; NULL != t; t = t->next)
+ {
+ j = t->to_state->dfs_id;
+ if (GNUNET_YES == R_last[i * n + j].null_flag)
+ {
+ sb_strdup_cstr (&R_last[i * n + j], t->label);
+ }
+ else
+ {
+ sb_append_cstr (&R_last[i * n + j], "|");
+ sb_append_cstr (&R_last[i * n + j], t->label);
+ }
+ }
+ /* add self-loop: i is reachable from i via epsilon-transition */
+ if (GNUNET_YES == R_last[i * n + i].null_flag)
+ {
+ R_last[i * n + i].slen = 0;
+ R_last[i * n + i].null_flag = GNUNET_NO;
+ }
+ else
+ {
+ sb_wrap (&R_last[i * n + i], "(|%.*s)", 3);
+ }
+ }
+ for (i = 0; i < n; i++)
+ for (j = 0; j < n; j++)
+ if (needs_parentheses (&R_last[i * n + j]))
+ sb_wrap (&R_last[i * n + j], "(%.*s)", 2);
+ /* Compute regular expressions of length "k" between each pair of states per
+ * induction */
+ memset (&R_cur_l, 0, sizeof (struct StringBuffer));
+ memset (&R_cur_r, 0, sizeof (struct StringBuffer));
+ for (k = 0; k < n; k++)
+ {
+ for (i = 0; i < n; i++)
+ {
+ for (j = 0; j < n; j++)
+ {
+ /* Basis for the recursion:
+ * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
+ * R_last == R^{(k-1)}, R_cur == R^{(k)}
+ */
+
+ /* Create R_cur[i][j] and simplify the expression */
+ automaton_create_proofs_simplify (&R_last[i * n + j], &R_last[i * n + k],
+ &R_last[k * n + k], &R_last[k * n + j],
+ &R_cur[i * n + j],
+ &R_cur_l, &R_cur_r);
+ }
+ }
+ /* set R_last = R_cur */
+ R_swap = R_last;
+ R_last = R_cur;
+ R_cur = R_swap;
+ /* clear 'R_cur' for next iteration */
+ for (i = 0; i < n; i++)
+ for (j = 0; j < n; j++)
+ R_cur[i * n + j].null_flag = GNUNET_YES;
+ }
+ sb_free (&R_cur_l);
+ sb_free (&R_cur_r);
+ /* assign proofs and hashes */
+ for (i = 0; i < n; i++)
+ {
+ if (GNUNET_YES != R_last[a->start->dfs_id * n + i].null_flag)
+ {
+ states[i]->proof = GNUNET_strndup (R_last[a->start->dfs_id * n + i].sbuf,
+ R_last[a->start->dfs_id * n + i].slen);
+ GNUNET_CRYPTO_hash (states[i]->proof, strlen (states[i]->proof),
+ &states[i]->hash);
+ }
+ }
+
+ /* complete regex for whole DFA: union of all pairs (start state/accepting
+ * state(s)). */
+ sb_init (&complete_regex, 16 * n);
+ for (i = 0; i < n; i++)
+ {
+ if (states[i]->accepting)
+ {
+ if ( (0 == complete_regex.slen) &&
+ (0 < R_last[a->start->dfs_id * n + i].slen) )
+ {
+ sb_append (&complete_regex,
+ &R_last[a->start->dfs_id * n + i]);
+ }
+ else if ( (GNUNET_YES != R_last[a->start->dfs_id * n + i].null_flag) &&
+ (0 < R_last[a->start->dfs_id * n + i].slen) )
+ {
+ sb_append_cstr (&complete_regex, "|");
+ sb_append (&complete_regex,
+ &R_last[a->start->dfs_id * n + i]);
+ }
+ }
+ }
+ a->canonical_regex = GNUNET_strndup (complete_regex.sbuf, complete_regex.slen);
+
+ /* cleanup */
+ sb_free (&complete_regex);
+ for (i = 0; i < n; i++)
+ for (j = 0; j < n; j++)
+ {
+ sb_free (&R_cur[i * n + j]);
+ sb_free (&R_last[i * n + j]);
+ }
+ GNUNET_free (R_cur);
+ GNUNET_free (R_last);
+ return GNUNET_OK;
}
+
/**
* Creates a new DFA state based on a set of NFA states. Needs to be freed using
* automaton_destroy_state.
@@ -911,24 +1767,16 @@ dfa_state_create (struct GNUNET_REGEX_Context *ctx,
struct GNUNET_REGEX_StateSet *nfa_states)
{
struct GNUNET_REGEX_State *s;
- char *name;
- int len = 0;
+ char *pos;
+ size_t len;
struct GNUNET_REGEX_State *cstate;
- struct Transition *ctran;
- int insert = 1;
- struct Transition *t;
- int i;
+ struct GNUNET_REGEX_Transition *ctran;
+ unsigned int i;
s = GNUNET_malloc (sizeof (struct GNUNET_REGEX_State));
s->id = ctx->state_id++;
- s->accepting = 0;
- s->marked = 0;
- s->name = NULL;
- s->scc_id = 0;
s->index = -1;
s->lowlink = -1;
- s->contained = 0;
- s->proof = NULL;
if (NULL == nfa_states)
{
@@ -936,93 +1784,103 @@ dfa_state_create (struct GNUNET_REGEX_Context *ctx,
return s;
}
- s->nfa_set = nfa_states;
+ s->nfa_set = *nfa_states;
- if (nfa_states->len < 1)
+ if (nfa_states->off < 1)
return s;
- // Create a name based on 'sset'
- s->name = GNUNET_malloc (sizeof (char) * 2);
+ /* Create a name based on 'nfa_states' */
+ len = nfa_states->off * 14 + 4;
+ s->name = GNUNET_malloc (len);
strcat (s->name, "{");
- name = NULL;
+ pos = s->name + 1;
- for (i = 0; i < nfa_states->len; i++)
+ for (i = 0; i < nfa_states->off; i++)
{
cstate = nfa_states->states[i];
- GNUNET_asprintf (&name, "%i,", cstate->id);
-
- if (NULL != name)
- {
- len = strlen (s->name) + strlen (name) + 1;
- s->name = GNUNET_realloc (s->name, len);
- strcat (s->name, name);
- GNUNET_free (name);
- name = NULL;
- }
-
- // Add a transition for each distinct label to NULL state
- for (ctran = cstate->transitions_head; NULL != ctran; ctran = ctran->next)
- {
- if (0 != ctran->label)
- {
- insert = 1;
-
- for (t = s->transitions_head; NULL != t; t = t->next)
- {
- if (t->label == ctran->label)
- {
- insert = 0;
- break;
- }
- }
+ GNUNET_snprintf (pos, pos - s->name + len,
+ "%i,", cstate->id);
+ pos += strlen (pos);
- if (insert)
- state_add_transition (ctx, s, ctran->label, NULL);
- }
- }
+ /* Add a transition for each distinct label to NULL state */
+ for (ctran = cstate->transitions_head; NULL != ctran; ctran = ctran->next)
+ if (NULL != ctran->label)
+ state_add_transition (ctx, s, ctran->label, NULL);
- // If the nfa_states contain an accepting state, the new dfa state is also
- // accepting
+ /* If the nfa_states contain an accepting state, the new dfa state is also
+ * accepting. */
if (cstate->accepting)
s->accepting = 1;
- }
-
- s->name[strlen (s->name) - 1] = '}';
+ }
+ pos[-1] = '}';
+ s->name = GNUNET_realloc (s->name, strlen (s->name) + 1);
+ memset (nfa_states, 0, sizeof (struct GNUNET_REGEX_StateSet));
return s;
}
+
/**
- * Move from the given state 's' to the next state on transition 'label'
+ * Move from the given state 's' to the next state on transition 'str'. Consumes
+ * as much of the given 'str' as possible (usefull for strided DFAs). On return
+ * 's' will point to the next state, and the length of the substring used for
+ * this transition will be returned. If no transition possible 0 is returned and
+ * 's' points to NULL.
*
- * @param s starting state
- * @param label edge label to follow
+ * @param s starting state, will point to the next state or NULL (if no
+ * transition possible)
+ * @param str edge label to follow (will match longest common prefix)
*
- * @return new state or NULL, if transition on label not possible
+ * @return length of the substring comsumed from 'str'
*/
-static struct GNUNET_REGEX_State *
-dfa_move (struct GNUNET_REGEX_State *s, const char label)
+static unsigned int
+dfa_move (struct GNUNET_REGEX_State **s, const char *str)
{
- struct Transition *t;
+ struct GNUNET_REGEX_Transition *t;
struct GNUNET_REGEX_State *new_s;
+ unsigned int len;
+ unsigned int max_len;
if (NULL == s)
- return NULL;
+ return 0;
new_s = NULL;
-
- for (t = s->transitions_head; NULL != t; t = t->next)
+ max_len = 0;
+ for (t = (*s)->transitions_head; NULL != t; t = t->next)
{
- if (label == t->label)
+ len = strlen (t->label);
+
+ if (0 == strncmp (t->label, str, len))
{
- new_s = t->to_state;
- break;
+ if (len >= max_len)
+ {
+ max_len = len;
+ new_s = t->to_state;
+ }
}
}
- return new_s;
+ *s = new_s;
+ return max_len;
+}
+
+
+/**
+ * Set the given state 'marked' to GNUNET_YES. Used by the
+ * 'dfa_remove_unreachable_states' function to detect unreachable states in the
+ * automaton.
+ *
+ * @param cls closure, not used.
+ * @param count count, not used.
+ * @param s state where the marked attribute will be set to GNUNET_YES.
+ */
+static void
+mark_states (void *cls, const unsigned int count, struct GNUNET_REGEX_State *s)
+{
+ s->marked = GNUNET_YES;
}
+
/**
* Remove all unreachable states from DFA 'a'. Unreachable states are those
* states that are not reachable from the starting state.
@@ -1035,14 +1893,14 @@ dfa_remove_unreachable_states (struct GNUNET_REGEX_Automaton *a)
struct GNUNET_REGEX_State *s;
struct GNUNET_REGEX_State *s_next;
- // 1. unmark all states
+ /* 1. unmark all states */
for (s = a->states_head; NULL != s; s = s->next)
s->marked = GNUNET_NO;
- // 2. traverse dfa from start state and mark all visited states
- automaton_traverse (NULL, a, NULL);
+ /* 2. traverse dfa from start state and mark all visited states */
+ GNUNET_REGEX_automaton_traverse (a, a->start, NULL, NULL, &mark_states, NULL);
- // 3. delete all states that were not visited
+ /* 3. delete all states that were not visited */
for (s = a->states_head; NULL != s; s = s_next)
{
s_next = s->next;
@@ -1051,9 +1909,10 @@ dfa_remove_unreachable_states (struct GNUNET_REGEX_Automaton *a)
}
}
+
/**
* Remove all dead states from the DFA 'a'. Dead states are those states that do
- * not transition to any other state but themselfes.
+ * not transition to any other state but themselves.
*
* @param a DFA automaton
*/
@@ -1061,13 +1920,16 @@ static void
dfa_remove_dead_states (struct GNUNET_REGEX_Automaton *a)
{
struct GNUNET_REGEX_State *s;
- struct Transition *t;
+ struct GNUNET_REGEX_State *s_next;
+ struct GNUNET_REGEX_Transition *t;
int dead;
GNUNET_assert (DFA == a->type);
- for (s = a->states_head; NULL != s; s = s->next)
+ for (s = a->states_head; NULL != s; s = s_next)
{
+ s_next = s->next;
+
if (s->accepting)
continue;
@@ -1084,54 +1946,66 @@ dfa_remove_dead_states (struct GNUNET_REGEX_Automaton *a)
if (0 == dead)
continue;
- // state s is dead, remove it
+ /* state s is dead, remove it */
automaton_remove_state (a, s);
}
}
+
/**
* Merge all non distinguishable states in the DFA 'a'
*
* @param ctx context
* @param a DFA automaton
+ * @return GNUNET_OK on success
*/
-static void
+static int
dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Context *ctx,
struct GNUNET_REGEX_Automaton *a)
{
- int i;
- int table[a->state_count][a->state_count];
+ uint32_t *table;
struct GNUNET_REGEX_State *s1;
struct GNUNET_REGEX_State *s2;
- struct Transition *t1;
- struct Transition *t2;
+ struct GNUNET_REGEX_Transition *t1;
+ struct GNUNET_REGEX_Transition *t2;
struct GNUNET_REGEX_State *s1_next;
struct GNUNET_REGEX_State *s2_next;
int change;
- int num_equal_edges;
+ unsigned int num_equal_edges;
+ unsigned int i;
+ unsigned int state_cnt;
+ unsigned long long idx;
+ unsigned long long idx1;
- for (i = 0, s1 = a->states_head; i < a->state_count && NULL != s1;
- i++, s1 = s1->next)
+ if ( (NULL == a) || (0 == a->state_count) )
{
- s1->marked = i;
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "Could not merge nondistinguishable states, automaton was NULL.\n");
+ return GNUNET_SYSERR;
}
- // Mark all pairs of accepting/!accepting states
- for (s1 = a->states_head; NULL != s1; s1 = s1->next)
+ state_cnt = a->state_count;
+ table = GNUNET_malloc_large ((sizeof (uint32_t) * state_cnt * state_cnt / 32) + sizeof (uint32_t));
+ if (NULL == table)
{
- for (s2 = a->states_head; NULL != s2; s2 = s2->next)
- {
- table[s1->marked][s2->marked] = 0;
+ GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "malloc");
+ return GNUNET_SYSERR;
+ }
+
+ for (i = 0, s1 = a->states_head; NULL != s1; s1 = s1->next)
+ s1->marked = i++;
- if ((s1->accepting && !s2->accepting) ||
- (!s1->accepting && s2->accepting))
+ /* Mark all pairs of accepting/!accepting states */
+ for (s1 = a->states_head; NULL != s1; s1 = s1->next)
+ for (s2 = a->states_head; NULL != s2; s2 = s2->next)
+ if ( (s1->accepting && !s2->accepting) ||
+ (!s1->accepting && s2->accepting) )
{
- table[s1->marked][s2->marked] = 1;
+ idx = s1->marked * state_cnt + s2->marked;
+ table[idx / 32] |= (1 << (idx % 32));
}
- }
- }
- // Find all equal states
+ /* Find all equal states */
change = 1;
while (0 != change)
{
@@ -1140,75 +2014,351 @@ dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Context *ctx,
{
for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2->next)
{
- if (0 != table[s1->marked][s2->marked])
+ idx = s1->marked * state_cnt + s2->marked;
+ if (0 != (table[idx / 32] & (1 << (idx % 32))))
continue;
-
num_equal_edges = 0;
for (t1 = s1->transitions_head; NULL != t1; t1 = t1->next)
{
for (t2 = s2->transitions_head; NULL != t2; t2 = t2->next)
{
- if (t1->label == t2->label)
- {
- num_equal_edges++;
- if (0 != table[t1->to_state->marked][t2->to_state->marked] ||
- 0 != table[t2->to_state->marked][t1->to_state->marked])
- {
- table[s1->marked][s2->marked] = t1->label != 0 ? t1->label : 1;
- change = 1;
- }
- }
- }
+ if (0 == strcmp (t1->label, t2->label))
+ {
+ num_equal_edges++;
+ /* same edge, but targets definitively different, so we're different
+ as well */
+ if (t1->to_state->marked > t2->to_state->marked)
+ idx1 = t1->to_state->marked * state_cnt + t2->to_state->marked;
+ else
+ idx1 = t2->to_state->marked * state_cnt + t1->to_state->marked;
+ if (0 != (table[idx1 / 32] & (1 << (idx1 % 32))))
+ {
+ table[idx / 32] |= (1 << (idx % 32));
+ change = 1; /* changed a marker, need to run again */
+ }
+ }
+ }
}
- if (num_equal_edges != s1->transition_count ||
- num_equal_edges != s2->transition_count)
+ if ( (num_equal_edges != s1->transition_count) ||
+ (num_equal_edges != s2->transition_count) )
{
- // Make sure ALL edges of possible equal states are the same
- table[s1->marked][s2->marked] = -2;
+ /* Make sure ALL edges of possible equal states are the same */
+ table[idx / 32] |= (1 << (idx % 32));
+ change = 1; /* changed a marker, need to run again */
}
}
}
}
- // Merge states that are equal
+ /* Merge states that are equal */
for (s1 = a->states_head; NULL != s1; s1 = s1_next)
{
s1_next = s1->next;
for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2_next)
{
s2_next = s2->next;
- if (table[s1->marked][s2->marked] == 0)
+ idx = s1->marked * state_cnt + s2->marked;
+ if (0 == (table[idx / 32] & (1 << (idx % 32))))
automaton_merge_states (ctx, a, s1, s2);
}
}
+
+ GNUNET_free (table);
+ return GNUNET_OK;
}
+
/**
* Minimize the given DFA 'a' by removing all unreachable states, removing all
* dead states and merging all non distinguishable states
*
* @param ctx context
* @param a DFA automaton
+ * @return GNUNET_OK on success
*/
-static void
+static int
dfa_minimize (struct GNUNET_REGEX_Context *ctx,
struct GNUNET_REGEX_Automaton *a)
{
if (NULL == a)
- return;
+ return GNUNET_SYSERR;
GNUNET_assert (DFA == a->type);
- // 1. remove unreachable states
+ /* 1. remove unreachable states */
dfa_remove_unreachable_states (a);
- // 2. remove dead states
+ /* 2. remove dead states */
dfa_remove_dead_states (a);
- // 3. Merge nondistinguishable states
- dfa_merge_nondistinguishable_states (ctx, a);
+ /* 3. Merge nondistinguishable states */
+ if (GNUNET_OK != dfa_merge_nondistinguishable_states (ctx, a))
+ return GNUNET_SYSERR;
+ return GNUNET_OK;
}
+
+/**
+ * Context for adding strided transitions to a DFA.
+ */
+struct GNUNET_REGEX_Strided_Context
+{
+ /**
+ * Length of the strides.
+ */
+ const unsigned int stride;
+
+ /**
+ * Strided transitions DLL. New strided transitions will be stored in this DLL
+ * and afterwards added to the DFA.
+ */
+ struct GNUNET_REGEX_Transition *transitions_head;
+
+ /**
+ * Strided transitions DLL.
+ */
+ struct GNUNET_REGEX_Transition *transitions_tail;
+};
+
+
+/**
+ * Recursive helper function to add strides to a DFA.
+ *
+ * @param cls context, contains stride length and strided transitions DLL.
+ * @param depth current depth of the depth-first traversal of the graph.
+ * @param label current label, string that contains all labels on the path from
+ * 'start' to 's'.
+ * @param start start state for the depth-first traversal of the graph.
+ * @param s current state in the depth-first traversal
+ */
+void
+dfa_add_multi_strides_helper (void *cls, const unsigned int depth, char *label,
+ struct GNUNET_REGEX_State *start,
+ struct GNUNET_REGEX_State *s)
+{
+ struct GNUNET_REGEX_Strided_Context *ctx = cls;
+ struct GNUNET_REGEX_Transition *t;
+ char *new_label;
+
+ if (depth == ctx->stride)
+ {
+ t = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Transition));
+ t->label = GNUNET_strdup (label);
+ t->to_state = s;
+ t->from_state = start;
+ GNUNET_CONTAINER_DLL_insert (ctx->transitions_head, ctx->transitions_tail,
+ t);
+ }
+ else
+ {
+ for (t = s->transitions_head; NULL != t; t = t->next)
+ {
+ /* Do not consider self-loops, because it end's up in too many
+ * transitions */
+ if (t->to_state == t->from_state)
+ continue;
+
+ if (NULL != label)
+ {
+ GNUNET_asprintf (&new_label, "%s%s", label, t->label);
+ }
+ else
+ new_label = GNUNET_strdup (t->label);
+
+ dfa_add_multi_strides_helper (cls, (depth + 1), new_label, start,
+ t->to_state);
+ }
+ }
+ GNUNET_free_non_null (label);
+}
+
+
+/**
+ * Function called for each state in the DFA. Starts a traversal of depth set in
+ * context starting from state 's'.
+ *
+ * @param cls context.
+ * @param count not used.
+ * @param s current state.
+ */
+void
+dfa_add_multi_strides (void *cls, const unsigned int count,
+ struct GNUNET_REGEX_State *s)
+{
+ dfa_add_multi_strides_helper (cls, 0, NULL, s, s);
+}
+
+
+/**
+ * Adds multi-strided transitions to the given 'dfa'.
+ *
+ * @param regex_ctx regex context needed to add transitions to the automaton.
+ * @param dfa DFA to which the multi strided transitions should be added.
+ * @param stride_len length of the strides.
+ */
+void
+GNUNET_REGEX_dfa_add_multi_strides (struct GNUNET_REGEX_Context *regex_ctx,
+ struct GNUNET_REGEX_Automaton *dfa,
+ const unsigned int stride_len)
+{
+ struct GNUNET_REGEX_Strided_Context ctx = { stride_len, NULL, NULL };
+ struct GNUNET_REGEX_Transition *t;
+ struct GNUNET_REGEX_Transition *t_next;
+
+ if (1 > stride_len || GNUNET_YES == dfa->is_multistrided)
+ return;
+
+ /* Compute the new transitions of given stride_len */
+ GNUNET_REGEX_automaton_traverse (dfa, dfa->start, NULL, NULL,
+ &dfa_add_multi_strides, &ctx);
+
+ /* Add all the new transitions to the automaton. */
+ for (t = ctx.transitions_head; NULL != t; t = t_next)
+ {
+ t_next = t->next;
+ state_add_transition (regex_ctx, t->from_state, t->label, t->to_state);
+ GNUNET_CONTAINER_DLL_remove (ctx.transitions_head, ctx.transitions_tail, t);
+ GNUNET_free_non_null (t->label);
+ GNUNET_free (t);
+ }
+
+ /* Mark this automaton as multistrided */
+ dfa->is_multistrided = GNUNET_YES;
+}
+
+/**
+ * Recursive Helper function for DFA path compression. Does DFS on the DFA graph
+ * and adds new transitions to the given transitions DLL and marks states that
+ * should be removed by setting state->contained to GNUNET_YES.
+ *
+ * @param dfa DFA for which the paths should be compressed.
+ * @param start starting state for linear path search.
+ * @param cur current state in the recursive DFS.
+ * @param label current label (string of traversed labels).
+ * @param max_len maximal path compression length.
+ * @param transitions_head transitions DLL.
+ * @param transitions_tail transitions DLL.
+ */
+void
+dfa_compress_paths_helper (struct GNUNET_REGEX_Automaton *dfa,
+ struct GNUNET_REGEX_State *start,
+ struct GNUNET_REGEX_State *cur, char *label,
+ unsigned int max_len,
+ struct GNUNET_REGEX_Transition **transitions_head,
+ struct GNUNET_REGEX_Transition **transitions_tail)
+{
+ struct GNUNET_REGEX_Transition *t;
+ char *new_label;
+
+
+ if (NULL != label &&
+ ((cur->incoming_transition_count > 1 || GNUNET_YES == cur->accepting ||
+ GNUNET_YES == cur->marked) || (start != dfa->start && max_len > 0 &&
+ max_len == strlen (label)) ||
+ (start == dfa->start && GNUNET_REGEX_INITIAL_BYTES == strlen (label))))
+ {
+ t = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Transition));
+ t->label = GNUNET_strdup (label);
+ t->to_state = cur;
+ t->from_state = start;
+ GNUNET_CONTAINER_DLL_insert (*transitions_head, *transitions_tail, t);
+
+ if (GNUNET_NO == cur->marked)
+ {
+ dfa_compress_paths_helper (dfa, cur, cur, NULL, max_len, transitions_head,
+ transitions_tail);
+ }
+ return;
+ }
+ else if (cur != start)
+ cur->contained = GNUNET_YES;
+
+ if (GNUNET_YES == cur->marked && cur != start)
+ return;
+
+ cur->marked = GNUNET_YES;
+
+
+ for (t = cur->transitions_head; NULL != t; t = t->next)
+ {
+ if (NULL != label)
+ GNUNET_asprintf (&new_label, "%s%s", label, t->label);
+ else
+ new_label = GNUNET_strdup (t->label);
+
+ if (t->to_state != cur)
+ {
+ dfa_compress_paths_helper (dfa, start, t->to_state, new_label, max_len,
+ transitions_head, transitions_tail);
+ }
+ GNUNET_free (new_label);
+ }
+}
+
+
+/**
+ * Compress paths in the given 'dfa'. Linear paths like 0->1->2->3 will be
+ * compressed to 0->3 by combining transitions.
+ *
+ * @param regex_ctx context for adding new transitions.
+ * @param dfa DFA representation, will directly modify the given DFA.
+ * @param max_len maximal length of the compressed paths.
+ */
+static void
+dfa_compress_paths (struct GNUNET_REGEX_Context *regex_ctx,
+ struct GNUNET_REGEX_Automaton *dfa, unsigned int max_len)
+{
+ struct GNUNET_REGEX_State *s;
+ struct GNUNET_REGEX_State *s_next;
+ struct GNUNET_REGEX_Transition *t;
+ struct GNUNET_REGEX_Transition *t_next;
+ struct GNUNET_REGEX_Transition *transitions_head = NULL;
+ struct GNUNET_REGEX_Transition *transitions_tail = NULL;
+
+ if (NULL == dfa)
+ return;
+
+ /* Count the incoming transitions on each state. */
+ for (s = dfa->states_head; NULL != s; s = s->next)
+ {
+ for (t = s->transitions_head; NULL != t; t = t->next)
+ {
+ if (NULL != t->to_state)
+ t->to_state->incoming_transition_count++;
+ }
+ }
+
+ /* Unmark all states. */
+ for (s = dfa->states_head; NULL != s; s = s->next)
+ {
+ s->marked = GNUNET_NO;
+ s->contained = GNUNET_NO;
+ }
+
+ /* Add strides and mark states that can be deleted. */
+ dfa_compress_paths_helper (dfa, dfa->start, dfa->start, NULL, max_len,
+ &transitions_head, &transitions_tail);
+
+ /* Add all the new transitions to the automaton. */
+ for (t = transitions_head; NULL != t; t = t_next)
+ {
+ t_next = t->next;
+ state_add_transition (regex_ctx, t->from_state, t->label, t->to_state);
+ GNUNET_CONTAINER_DLL_remove (transitions_head, transitions_tail, t);
+ GNUNET_free_non_null (t->label);
+ GNUNET_free (t);
+ }
+
+ /* Remove marked states (including their incoming and outgoing transitions). */
+ for (s = dfa->states_head; NULL != s; s = s_next)
+ {
+ s_next = s->next;
+ if (GNUNET_YES == s->contained)
+ automaton_remove_state (dfa, s);
+ }
+}
+
+
/**
* Creates a new NFA fragment. Needs to be cleared using
* automaton_fragment_clear.
@@ -1229,19 +2379,23 @@ nfa_fragment_create (struct GNUNET_REGEX_State *start,
n->type = NFA;
n->start = NULL;
n->end = NULL;
+ n->state_count = 0;
- if (NULL == start && NULL == end)
+ if (NULL == start || NULL == end)
return n;
automaton_add_state (n, end);
automaton_add_state (n, start);
+ n->state_count = 2;
+
n->start = start;
n->end = end;
return n;
}
+
/**
* Adds a list of states to the given automaton 'n'.
*
@@ -1279,6 +2433,7 @@ nfa_add_states (struct GNUNET_REGEX_Automaton *n,
n->state_count++;
}
+
/**
* Creates a new NFA state. Needs to be freed using automaton_destroy_state.
*
@@ -1295,7 +2450,7 @@ nfa_state_create (struct GNUNET_REGEX_Context *ctx, int accepting)
s = GNUNET_malloc (sizeof (struct GNUNET_REGEX_State));
s->id = ctx->state_id++;
s->accepting = accepting;
- s->marked = 0;
+ s->marked = GNUNET_NO;
s->contained = 0;
s->index = -1;
s->lowlink = -1;
@@ -1306,127 +2461,77 @@ nfa_state_create (struct GNUNET_REGEX_Context *ctx, int accepting)
return s;
}
-/**
- * Calculates the NFA closure set for the given state.
- *
- * @param nfa the NFA containing 's'
- * @param s starting point state
- * @param label transitioning label on which to base the closure on,
- * pass 0 for epsilon transition
- *
- * @return sorted nfa closure on 'label' (epsilon closure if 'label' is 0)
- */
-static struct GNUNET_REGEX_StateSet *
-nfa_closure_create (struct GNUNET_REGEX_Automaton *nfa,
- struct GNUNET_REGEX_State *s, const char label)
-{
- struct GNUNET_REGEX_StateSet *cls;
- struct GNUNET_REGEX_StateSet *cls_check;
- struct GNUNET_REGEX_State *clsstate;
- struct GNUNET_REGEX_State *currentstate;
- struct Transition *ctran;
-
- if (NULL == s)
- return NULL;
-
- cls = GNUNET_malloc (sizeof (struct GNUNET_REGEX_StateSet));
- cls_check = GNUNET_malloc (sizeof (struct GNUNET_REGEX_StateSet));
-
- for (clsstate = nfa->states_head; NULL != clsstate; clsstate = clsstate->next)
- clsstate->contained = 0;
-
- // Add start state to closure only for epsilon closure
- if (0 == label)
- GNUNET_array_append (cls->states, cls->len, s);
-
- GNUNET_array_append (cls_check->states, cls_check->len, s);
- while (cls_check->len > 0)
- {
- currentstate = cls_check->states[cls_check->len - 1];
- GNUNET_array_grow (cls_check->states, cls_check->len, cls_check->len - 1);
-
- for (ctran = currentstate->transitions_head; NULL != ctran;
- ctran = ctran->next)
- {
- if (NULL != ctran->to_state && label == ctran->label)
- {
- clsstate = ctran->to_state;
-
- if (NULL != clsstate && 0 == clsstate->contained)
- {
- GNUNET_array_append (cls->states, cls->len, clsstate);
- GNUNET_array_append (cls_check->states, cls_check->len, clsstate);
- clsstate->contained = 1;
- }
- }
- }
- }
- GNUNET_assert (0 == cls_check->len);
- GNUNET_free (cls_check);
-
- if (cls->len > 1)
- qsort (cls->states, cls->len, sizeof (struct GNUNET_REGEX_State *),
- state_compare);
-
- return cls;
-}
/**
* Calculates the closure set for the given set of states.
*
+ * @param ret set to sorted nfa closure on 'label' (epsilon closure if 'label' is NULL)
* @param nfa the NFA containing 's'
* @param states list of states on which to base the closure on
* @param label transitioning label for which to base the closure on,
- * pass 0 for epsilon transition
- *
- * @return sorted nfa closure on 'label' (epsilon closure if 'label' is 0)
+ * pass NULL for epsilon transition
*/
-static struct GNUNET_REGEX_StateSet *
-nfa_closure_set_create (struct GNUNET_REGEX_Automaton *nfa,
- struct GNUNET_REGEX_StateSet *states, const char label)
+static void
+nfa_closure_set_create (struct GNUNET_REGEX_StateSet *ret,
+ struct GNUNET_REGEX_Automaton *nfa,
+ struct GNUNET_REGEX_StateSet *states, const char *label)
{
struct GNUNET_REGEX_State *s;
- struct GNUNET_REGEX_StateSet *sset;
- struct GNUNET_REGEX_StateSet *cls;
- int i;
- int j;
- int k;
- int contains;
+ unsigned int i;
+ struct GNUNET_REGEX_StateSet_MDLL cls_stack;
+ struct GNUNET_REGEX_State *clsstate;
+ struct GNUNET_REGEX_State *currentstate;
+ struct GNUNET_REGEX_Transition *ctran;
+ memset (ret, 0, sizeof (struct GNUNET_REGEX_StateSet));
if (NULL == states)
- return NULL;
-
- cls = GNUNET_malloc (sizeof (struct GNUNET_REGEX_StateSet));
+ return;
- for (i = 0; i < states->len; i++)
+ for (i = 0; i < states->off; i++)
{
s = states->states[i];
- sset = nfa_closure_create (nfa, s, label);
- for (j = 0; j < sset->len; j++)
+ /* Add start state to closure only for epsilon closure */
+ if (NULL == label)
+ state_set_append (ret, s);
+
+ /* initialize work stack */
+ cls_stack.head = NULL;
+ cls_stack.tail = NULL;
+ GNUNET_CONTAINER_MDLL_insert (ST, cls_stack.head, cls_stack.tail, s);
+ cls_stack.len = 1;
+
+ while (NULL != (currentstate = cls_stack.tail))
{
- contains = 0;
- for (k = 0; k < cls->len; k++)
+ GNUNET_CONTAINER_MDLL_remove (ST, cls_stack.head, cls_stack.tail,
+ currentstate);
+ cls_stack.len--;
+ for (ctran = currentstate->transitions_head; NULL != ctran;
+ ctran = ctran->next)
{
- if (sset->states[j]->id == cls->states[k]->id)
- {
- contains = 1;
- break;
- }
- }
- if (!contains)
- GNUNET_array_append (cls->states, cls->len, sset->states[j]);
+ if (NULL == (clsstate = ctran->to_state))
+ continue;
+ if (0 != clsstate->contained)
+ continue;
+ if (0 != nullstrcmp (label, ctran->label))
+ continue;
+ state_set_append (ret, clsstate);
+ GNUNET_CONTAINER_MDLL_insert_tail (ST, cls_stack.head, cls_stack.tail,
+ clsstate);
+ cls_stack.len++;
+ clsstate->contained = 1;
+ }
}
- state_set_clear (sset);
}
+ for (i = 0; i < ret->off; i++)
+ ret->states[i]->contained = 0;
- if (cls->len > 1)
- qsort (cls->states, cls->len, sizeof (struct GNUNET_REGEX_State *),
- state_compare);
-
- return cls;
+ if (ret->off > 1)
+ qsort (ret->states, ret->off, sizeof (struct GNUNET_REGEX_State *),
+ &state_compare);
}
+
/**
* Pops two NFA fragments (a, b) from the stack and concatenates them (ab)
*
@@ -1437,28 +2542,32 @@ nfa_add_concatenation (struct GNUNET_REGEX_Context *ctx)
{
struct GNUNET_REGEX_Automaton *a;
struct GNUNET_REGEX_Automaton *b;
- struct GNUNET_REGEX_Automaton *new;
+ struct GNUNET_REGEX_Automaton *new_nfa;
b = ctx->stack_tail;
+ GNUNET_assert (NULL != b);
GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, b);
a = ctx->stack_tail;
+ GNUNET_assert (NULL != a);
GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
- state_add_transition (ctx, a->end, 0, b->start);
+ state_add_transition (ctx, a->end, NULL, b->start);
a->end->accepting = 0;
b->end->accepting = 1;
- new = nfa_fragment_create (NULL, NULL);
- nfa_add_states (new, a->states_head, a->states_tail);
- nfa_add_states (new, b->states_head, b->states_tail);
- new->start = a->start;
- new->end = b->end;
+ new_nfa = nfa_fragment_create (NULL, NULL);
+ nfa_add_states (new_nfa, a->states_head, a->states_tail);
+ nfa_add_states (new_nfa, b->states_head, b->states_tail);
+ new_nfa->start = a->start;
+ new_nfa->end = b->end;
+ new_nfa->state_count += a->state_count + b->state_count;
automaton_fragment_clear (a);
automaton_fragment_clear (b);
- GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new);
+ GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
}
+
/**
* Pops a NFA fragment from the stack (a) and adds a new fragment (a*)
*
@@ -1468,12 +2577,11 @@ static void
nfa_add_star_op (struct GNUNET_REGEX_Context *ctx)
{
struct GNUNET_REGEX_Automaton *a;
- struct GNUNET_REGEX_Automaton *new;
+ struct GNUNET_REGEX_Automaton *new_nfa;
struct GNUNET_REGEX_State *start;
struct GNUNET_REGEX_State *end;
a = ctx->stack_tail;
- GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
if (NULL == a)
{
@@ -1482,24 +2590,27 @@ nfa_add_star_op (struct GNUNET_REGEX_Context *ctx)
return;
}
+ GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
+
start = nfa_state_create (ctx, 0);
end = nfa_state_create (ctx, 1);
- state_add_transition (ctx, start, 0, a->start);
- state_add_transition (ctx, start, 0, end);
- state_add_transition (ctx, a->end, 0, a->start);
- state_add_transition (ctx, a->end, 0, end);
+ state_add_transition (ctx, start, NULL, a->start);
+ state_add_transition (ctx, start, NULL, end);
+ state_add_transition (ctx, a->end, NULL, a->start);
+ state_add_transition (ctx, a->end, NULL, end);
a->end->accepting = 0;
end->accepting = 1;
- new = nfa_fragment_create (start, end);
- nfa_add_states (new, a->states_head, a->states_tail);
+ new_nfa = nfa_fragment_create (start, end);
+ nfa_add_states (new_nfa, a->states_head, a->states_tail);
automaton_fragment_clear (a);
- GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new);
+ GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
}
+
/**
* Pops an NFA fragment (a) from the stack and adds a new fragment (a+)
*
@@ -1511,13 +2622,22 @@ nfa_add_plus_op (struct GNUNET_REGEX_Context *ctx)
struct GNUNET_REGEX_Automaton *a;
a = ctx->stack_tail;
+
+ if (NULL == a)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "nfa_add_plus_op failed, because there was no element on the stack");
+ return;
+ }
+
GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
- state_add_transition (ctx, a->end, 0, a->start);
+ state_add_transition (ctx, a->end, NULL, a->start);
GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, a);
}
+
/**
* Pops an NFA fragment (a) from the stack and adds a new fragment (a?)
*
@@ -1527,12 +2647,11 @@ static void
nfa_add_question_op (struct GNUNET_REGEX_Context *ctx)
{
struct GNUNET_REGEX_Automaton *a;
- struct GNUNET_REGEX_Automaton *new;
+ struct GNUNET_REGEX_Automaton *new_nfa;
struct GNUNET_REGEX_State *start;
struct GNUNET_REGEX_State *end;
a = ctx->stack_tail;
- GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
if (NULL == a)
{
@@ -1541,22 +2660,24 @@ nfa_add_question_op (struct GNUNET_REGEX_Context *ctx)
return;
}
+ GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
+
start = nfa_state_create (ctx, 0);
end = nfa_state_create (ctx, 1);
- state_add_transition (ctx, start, 0, a->start);
- state_add_transition (ctx, start, 0, end);
- state_add_transition (ctx, a->end, 0, end);
+ state_add_transition (ctx, start, NULL, a->start);
+ state_add_transition (ctx, start, NULL, end);
+ state_add_transition (ctx, a->end, NULL, end);
a->end->accepting = 0;
- new = nfa_fragment_create (start, end);
- nfa_add_states (new, a->states_head, a->states_tail);
+ new_nfa = nfa_fragment_create (start, end);
+ nfa_add_states (new_nfa, a->states_head, a->states_tail);
+ GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
automaton_fragment_clear (a);
-
- GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new);
}
+
/**
* Pops two NFA fragments (a, b) from the stack and adds a new NFA fragment that
* alternates between a and b (a|b)
@@ -1568,44 +2689,47 @@ nfa_add_alternation (struct GNUNET_REGEX_Context *ctx)
{
struct GNUNET_REGEX_Automaton *a;
struct GNUNET_REGEX_Automaton *b;
- struct GNUNET_REGEX_Automaton *new;
+ struct GNUNET_REGEX_Automaton *new_nfa;
struct GNUNET_REGEX_State *start;
struct GNUNET_REGEX_State *end;
b = ctx->stack_tail;
+ GNUNET_assert (NULL != b);
GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, b);
a = ctx->stack_tail;
+ GNUNET_assert (NULL != a);
GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
start = nfa_state_create (ctx, 0);
end = nfa_state_create (ctx, 1);
- state_add_transition (ctx, start, 0, a->start);
- state_add_transition (ctx, start, 0, b->start);
+ state_add_transition (ctx, start, NULL, a->start);
+ state_add_transition (ctx, start, NULL, b->start);
- state_add_transition (ctx, a->end, 0, end);
- state_add_transition (ctx, b->end, 0, end);
+ state_add_transition (ctx, a->end, NULL, end);
+ state_add_transition (ctx, b->end, NULL, end);
a->end->accepting = 0;
b->end->accepting = 0;
end->accepting = 1;
- new = nfa_fragment_create (start, end);
- nfa_add_states (new, a->states_head, a->states_tail);
- nfa_add_states (new, b->states_head, b->states_tail);
+ new_nfa = nfa_fragment_create (start, end);
+ nfa_add_states (new_nfa, a->states_head, a->states_tail);
+ nfa_add_states (new_nfa, b->states_head, b->states_tail);
automaton_fragment_clear (a);
automaton_fragment_clear (b);
- GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new);
+ GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
}
+
/**
* Adds a new nfa fragment to the stack
*
* @param ctx context
- * @param lit label for nfa transition
+ * @param label label for nfa transition
*/
static void
-nfa_add_label (struct GNUNET_REGEX_Context *ctx, const char lit)
+nfa_add_label (struct GNUNET_REGEX_Context *ctx, const char *label)
{
struct GNUNET_REGEX_Automaton *n;
struct GNUNET_REGEX_State *start;
@@ -1615,12 +2739,13 @@ nfa_add_label (struct GNUNET_REGEX_Context *ctx, const char lit)
start = nfa_state_create (ctx, 0);
end = nfa_state_create (ctx, 1);
- state_add_transition (ctx, start, lit, end);
+ state_add_transition (ctx, start, label, end);
n = nfa_fragment_create (start, end);
GNUNET_assert (NULL != n);
GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, n);
}
+
/**
* Initialize a new context
*
@@ -1636,11 +2761,11 @@ GNUNET_REGEX_context_init (struct GNUNET_REGEX_Context *ctx)
}
ctx->state_id = 0;
ctx->transition_id = 0;
- ctx->scc_id = 0;
ctx->stack_head = NULL;
ctx->stack_tail = NULL;
}
+
/**
* Construct an NFA by parsing the regex string of length 'len'.
*
@@ -1655,25 +2780,36 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len)
struct GNUNET_REGEX_Context ctx;
struct GNUNET_REGEX_Automaton *nfa;
const char *regexp;
+ char curlabel[2];
char *error_msg;
unsigned int count;
unsigned int altcount;
unsigned int atomcount;
- unsigned int pcount;
+ unsigned int poff;
+ unsigned int psize;
struct
{
int altcount;
int atomcount;
} *p;
+ if (NULL == regex || 0 == strlen (regex) || 0 == len)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "Could not parse regex. Empty regex string provided.\n");
+
+ return NULL;
+ }
GNUNET_REGEX_context_init (&ctx);
regexp = regex;
+ curlabel[1] = '\0';
p = NULL;
error_msg = NULL;
altcount = 0;
atomcount = 0;
- pcount = 0;
+ poff = 0;
+ psize = 0;
for (count = 0; count < len && *regexp; count++, regexp++)
{
@@ -1685,9 +2821,11 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len)
--atomcount;
nfa_add_concatenation (&ctx);
}
- GNUNET_array_grow (p, pcount, pcount + 1);
- p[pcount - 1].altcount = altcount;
- p[pcount - 1].atomcount = atomcount;
+ if (poff == psize)
+ GNUNET_array_grow (p, psize, psize * 2 + 4);
+ p[poff].altcount = altcount;
+ p[poff].atomcount = atomcount;
+ poff++;
altcount = 0;
atomcount = 0;
break;
@@ -1702,26 +2840,26 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len)
altcount++;
break;
case ')':
- if (0 == pcount)
+ if (0 == poff)
{
error_msg = "Missing opening '('";
goto error;
}
if (0 == atomcount)
{
- // Ignore this: "()"
- pcount--;
- altcount = p[pcount].altcount;
- atomcount = p[pcount].atomcount;
+ /* Ignore this: "()" */
+ poff--;
+ altcount = p[poff].altcount;
+ atomcount = p[poff].atomcount;
break;
}
while (--atomcount > 0)
nfa_add_concatenation (&ctx);
for (; altcount > 0; altcount--)
nfa_add_alternation (&ctx);
- pcount--;
- altcount = p[pcount].altcount;
- atomcount = p[pcount].atomcount;
+ poff--;
+ altcount = p[poff].altcount;
+ atomcount = p[poff].atomcount;
atomcount++;
break;
case '*':
@@ -1748,21 +2886,19 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len)
}
nfa_add_question_op (&ctx);
break;
- case 92: /* escape: \ */
- regexp++;
- count++;
default:
if (atomcount > 1)
{
--atomcount;
nfa_add_concatenation (&ctx);
}
- nfa_add_label (&ctx, *regexp);
+ curlabel[0] = *regexp;
+ nfa_add_label (&ctx, curlabel);
atomcount++;
break;
}
}
- if (0 != pcount)
+ if (0 != poff)
{
error_msg = "Unbalanced parenthesis";
goto error;
@@ -1772,8 +2908,7 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len)
for (; altcount > 0; altcount--)
nfa_add_alternation (&ctx);
- if (NULL != p)
- GNUNET_free (p);
+ GNUNET_array_grow (p, psize, 0);
nfa = ctx.stack_tail;
GNUNET_CONTAINER_DLL_remove (ctx.stack_head, ctx.stack_tail, nfa);
@@ -1784,23 +2919,34 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len)
goto error;
}
+ /* Remember the regex that was used to generate this NFA */
+ nfa->regex = GNUNET_strdup (regex);
+
+ /* create depth-first numbering of the states for pretty printing */
+ GNUNET_REGEX_automaton_traverse (nfa, NULL, NULL, NULL, &number_states, NULL);
+
+ /* No multistriding added so far */
+ nfa->is_multistrided = GNUNET_NO;
+
return nfa;
error:
- GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not parse regex\n");
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not parse regex: `%s'\n", regex);
if (NULL != error_msg)
GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "%s\n", error_msg);
- if (NULL != p)
- GNUNET_free (p);
- while (NULL != ctx.stack_tail)
+
+ GNUNET_free_non_null (p);
+
+ while (NULL != (nfa = ctx.stack_head))
{
- GNUNET_REGEX_automaton_destroy (ctx.stack_tail);
- GNUNET_CONTAINER_DLL_remove (ctx.stack_head, ctx.stack_tail,
- ctx.stack_tail);
+ GNUNET_CONTAINER_DLL_remove (ctx.stack_head, ctx.stack_tail, nfa);
+ GNUNET_REGEX_automaton_destroy (nfa);
}
+
return NULL;
}
+
/**
* Create DFA states based on given 'nfa' and starting with 'dfa_state'.
*
@@ -1816,32 +2962,35 @@ construct_dfa_states (struct GNUNET_REGEX_Context *ctx,
struct GNUNET_REGEX_Automaton *dfa,
struct GNUNET_REGEX_State *dfa_state)
{
- struct Transition *ctran;
- struct GNUNET_REGEX_State *state_iter;
+ struct GNUNET_REGEX_Transition *ctran;
struct GNUNET_REGEX_State *new_dfa_state;
struct GNUNET_REGEX_State *state_contains;
- struct GNUNET_REGEX_StateSet *tmp;
- struct GNUNET_REGEX_StateSet *nfa_set;
+ struct GNUNET_REGEX_State *state_iter;
+ struct GNUNET_REGEX_StateSet tmp;
+ struct GNUNET_REGEX_StateSet nfa_set;
for (ctran = dfa_state->transitions_head; NULL != ctran; ctran = ctran->next)
{
- if (0 == ctran->label || NULL != ctran->to_state)
+ if (NULL == ctran->label || NULL != ctran->to_state)
continue;
- tmp = nfa_closure_set_create (nfa, dfa_state->nfa_set, ctran->label);
- nfa_set = nfa_closure_set_create (nfa, tmp, 0);
- state_set_clear (tmp);
- new_dfa_state = dfa_state_create (ctx, nfa_set);
+ nfa_closure_set_create (&tmp, nfa, &dfa_state->nfa_set, ctran->label);
+ nfa_closure_set_create (&nfa_set, nfa, &tmp, NULL);
+ state_set_clear (&tmp);
+
state_contains = NULL;
for (state_iter = dfa->states_head; NULL != state_iter;
state_iter = state_iter->next)
{
- if (0 == state_set_compare (state_iter->nfa_set, new_dfa_state->nfa_set))
+ if (0 == state_set_compare (&state_iter->nfa_set, &nfa_set))
+ {
state_contains = state_iter;
+ break;
+ }
}
-
if (NULL == state_contains)
{
+ new_dfa_state = dfa_state_create (ctx, &nfa_set);
automaton_add_state (dfa, new_dfa_state);
ctran->to_state = new_dfa_state;
construct_dfa_states (ctx, nfa, dfa, new_dfa_state);
@@ -1849,30 +2998,43 @@ construct_dfa_states (struct GNUNET_REGEX_Context *ctx,
else
{
ctran->to_state = state_contains;
- automaton_destroy_state (new_dfa_state);
+ state_set_clear (&nfa_set);
}
}
}
+
/**
- * Construct DFA for the given 'regex' of length 'len'
- *
- * @param regex regular expression string
- * @param len length of the regular expression
- *
- * @return DFA, needs to be freed using GNUNET_REGEX_destroy_automaton
+ * Construct DFA for the given 'regex' of length 'len'.
+ *
+ * Path compression means, that for example a DFA o -> a -> b -> c -> o will be
+ * compressed to o -> abc -> o. Note that this parameter influences the
+ * non-determinism of states of the resulting NFA in the DHT (number of outgoing
+ * edges with the same label). For example for an application that stores IPv4
+ * addresses as bitstrings it could make sense to limit the path compression to
+ * 4 or 8.
+ *
+ * @param regex regular expression string.
+ * @param len length of the regular expression.
+ * @param max_path_len limit the path compression length to the
+ * given value. If set to 1, no path compression is applied. Set to 0 for
+ * maximal possible path compression (generally not desireable).
+ * @return DFA, needs to be freed using GNUNET_REGEX_automaton_destroy.
*/
struct GNUNET_REGEX_Automaton *
-GNUNET_REGEX_construct_dfa (const char *regex, const size_t len)
+GNUNET_REGEX_construct_dfa (const char *regex, const size_t len,
+ unsigned int max_path_len)
{
struct GNUNET_REGEX_Context ctx;
struct GNUNET_REGEX_Automaton *dfa;
struct GNUNET_REGEX_Automaton *nfa;
- struct GNUNET_REGEX_StateSet *nfa_set;
+ struct GNUNET_REGEX_StateSet nfa_start_eps_cls;
+ struct GNUNET_REGEX_StateSet singleton_set;
GNUNET_REGEX_context_init (&ctx);
- // Create NFA
+ /* Create NFA */
+ // fprintf (stderr, "N");
nfa = GNUNET_REGEX_construct_nfa (regex, len);
if (NULL == nfa)
@@ -1884,28 +3046,43 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len)
dfa = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Automaton));
dfa->type = DFA;
-
- // Create DFA start state from epsilon closure
- nfa_set = nfa_closure_create (nfa, nfa->start, 0);
- dfa->start = dfa_state_create (&ctx, nfa_set);
+ dfa->regex = GNUNET_strdup (regex);
+
+ /* Create DFA start state from epsilon closure */
+ memset (&singleton_set, 0, sizeof (struct GNUNET_REGEX_StateSet));
+ state_set_append (&singleton_set, nfa->start);
+ nfa_closure_set_create (&nfa_start_eps_cls, nfa, &singleton_set, NULL);
+ state_set_clear (&singleton_set);
+ dfa->start = dfa_state_create (&ctx, &nfa_start_eps_cls);
automaton_add_state (dfa, dfa->start);
+ // fprintf (stderr, "D");
construct_dfa_states (&ctx, nfa, dfa, dfa->start);
-
GNUNET_REGEX_automaton_destroy (nfa);
- // Minimize DFA
- dfa_minimize (&ctx, dfa);
+ /* Minimize DFA */
+ // fprintf (stderr, "M");
+ if (GNUNET_OK != dfa_minimize (&ctx, dfa))
+ {
+ GNUNET_REGEX_automaton_destroy (dfa);
+ return NULL;
+ }
- // Calculate SCCs
- scc_tarjan (&ctx, dfa);
+ /* Create proofs and hashes for all states */
+ if (GNUNET_OK != automaton_create_proofs (dfa))
+ {
+ GNUNET_REGEX_automaton_destroy (dfa);
+ return NULL;
+ }
- // Create proofs for all states
- automaton_create_proofs (dfa);
+ /* Compress linear DFA paths */
+ if (1 != max_path_len)
+ dfa_compress_paths (&ctx, dfa, max_path_len);
return dfa;
}
+
/**
* Free the memory allocated by constructing the GNUNET_REGEX_Automaton data
* structure.
@@ -1921,123 +3098,19 @@ GNUNET_REGEX_automaton_destroy (struct GNUNET_REGEX_Automaton *a)
if (NULL == a)
return;
- for (s = a->states_head; NULL != s;)
+ GNUNET_free_non_null (a->regex);
+ GNUNET_free_non_null (a->canonical_regex);
+
+ for (s = a->states_head; NULL != s; s = next_state)
{
next_state = s->next;
+ GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s);
automaton_destroy_state (s);
- s = next_state;
}
GNUNET_free (a);
}
-/**
- * Save the given automaton as a GraphViz dot file
- *
- * @param a the automaton to be saved
- * @param filename where to save the file
- */
-void
-GNUNET_REGEX_automaton_save_graph (struct GNUNET_REGEX_Automaton *a,
- const char *filename)
-{
- struct GNUNET_REGEX_State *s;
- struct Transition *ctran;
- char *s_acc = NULL;
- char *s_tran = NULL;
- char *start;
- char *end;
- FILE *p;
-
- if (NULL == a)
- {
- GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print NFA, was NULL!");
- return;
- }
-
- if (NULL == filename || strlen (filename) < 1)
- {
- GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "No Filename given!");
- return;
- }
-
- p = fopen (filename, "w");
-
- if (NULL == p)
- {
- GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not open file for writing: %s",
- filename);
- return;
- }
-
- start = "digraph G {\nrankdir=LR\n";
- fwrite (start, strlen (start), 1, p);
-
- for (s = a->states_head; NULL != s; s = s->next)
- {
- if (s->accepting)
- {
- GNUNET_asprintf (&s_acc,
- "\"%s\" [shape=doublecircle, color=\"0.%i 0.8 0.95\"];\n",
- s->name, s->scc_id);
- }
- else
- {
- GNUNET_asprintf (&s_acc, "\"%s\" [color=\"0.%i 0.8 0.95\"];\n", s->name,
- s->scc_id);
- }
-
- if (NULL == s_acc)
- {
- GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print state %s\n",
- s->name);
- return;
- }
- fwrite (s_acc, strlen (s_acc), 1, p);
- GNUNET_free (s_acc);
- s_acc = NULL;
-
- for (ctran = s->transitions_head; NULL != ctran; ctran = ctran->next)
- {
- if (NULL == ctran->to_state)
- {
- GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
- "Transition from State %i has has no state for transitioning\n",
- s->id);
- continue;
- }
-
- if (ctran->label == 0)
- {
- GNUNET_asprintf (&s_tran,
- "\"%s\" -> \"%s\" [label = \"epsilon\", color=\"0.%i 0.8 0.95\"];\n",
- s->name, ctran->to_state->name, s->scc_id);
- }
- else
- {
- GNUNET_asprintf (&s_tran,
- "\"%s\" -> \"%s\" [label = \"%c\", color=\"0.%i 0.8 0.95\"];\n",
- s->name, ctran->to_state->name, ctran->label,
- s->scc_id);
- }
-
- if (NULL == s_tran)
- {
- GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print state %s\n",
- s->name);
- return;
- }
-
- fwrite (s_tran, strlen (s_tran), 1, p);
- GNUNET_free (s_tran);
- s_tran = NULL;
- }
- }
-
- end = "\n}\n";
- fwrite (end, strlen (end), 1, p);
- fclose (p);
-}
/**
* Evaluates the given string using the given DFA automaton
@@ -2052,6 +3125,7 @@ evaluate_dfa (struct GNUNET_REGEX_Automaton *a, const char *string)
{
const char *strp;
struct GNUNET_REGEX_State *s;
+ unsigned int step_len;
if (DFA != a->type)
{
@@ -2062,9 +3136,14 @@ evaluate_dfa (struct GNUNET_REGEX_Automaton *a, const char *string)
s = a->start;
- for (strp = string; NULL != strp && *strp; strp++)
+ /* If the string is empty but the starting state is accepting, we accept. */
+ if ((NULL == string || 0 == strlen (string)) && s->accepting)
+ return 0;
+
+ for (strp = string; NULL != strp && *strp; strp += step_len)
{
- s = dfa_move (s, *strp);
+ step_len = dfa_move (&s, strp);
+
if (NULL == s)
break;
}
@@ -2075,6 +3154,7 @@ evaluate_dfa (struct GNUNET_REGEX_Automaton *a, const char *string)
return 1;
}
+
/**
* Evaluates the given string using the given NFA automaton
*
@@ -2087,10 +3167,12 @@ static int
evaluate_nfa (struct GNUNET_REGEX_Automaton *a, const char *string)
{
const char *strp;
+ char str[2];
struct GNUNET_REGEX_State *s;
- struct GNUNET_REGEX_StateSet *sset;
- struct GNUNET_REGEX_StateSet *new_sset;
- int i;
+ struct GNUNET_REGEX_StateSet sset;
+ struct GNUNET_REGEX_StateSet new_sset;
+ struct GNUNET_REGEX_StateSet singleton_set;
+ unsigned int i;
int result;
if (NFA != a->type)
@@ -2100,32 +3182,41 @@ evaluate_nfa (struct GNUNET_REGEX_Automaton *a, const char *string)
return -1;
}
+ /* If the string is empty but the starting state is accepting, we accept. */
+ if ((NULL == string || 0 == strlen (string)) && a->start->accepting)
+ return 0;
+
result = 1;
- strp = string;
- sset = nfa_closure_create (a, a->start, 0);
+ memset (&singleton_set, 0, sizeof (struct GNUNET_REGEX_StateSet));
+ state_set_append (&singleton_set, a->start);
+ nfa_closure_set_create (&sset, a, &singleton_set, NULL);
+ state_set_clear (&singleton_set);
+ str[1] = '\0';
for (strp = string; NULL != strp && *strp; strp++)
{
- new_sset = nfa_closure_set_create (a, sset, *strp);
- state_set_clear (sset);
- sset = nfa_closure_set_create (a, new_sset, 0);
- state_set_clear (new_sset);
+ str[0] = *strp;
+ nfa_closure_set_create (&new_sset, a, &sset, str);
+ state_set_clear (&sset);
+ nfa_closure_set_create (&sset, a, &new_sset, 0);
+ state_set_clear (&new_sset);
}
- for (i = 0; i < sset->len; i++)
+ for (i = 0; i < sset.off; i++)
{
- s = sset->states[i];
- if (NULL != s && s->accepting)
+ s = sset.states[i];
+ if ( (NULL != s) && (s->accepting) )
{
result = 0;
break;
}
}
- state_set_clear (sset);
+ state_set_clear (&sset);
return result;
}
+
/**
* Evaluates the given 'string' against the given compiled regex
*
@@ -2157,9 +3248,55 @@ GNUNET_REGEX_eval (struct GNUNET_REGEX_Automaton *a, const char *string)
return result;
}
+
+/**
+ * Get the canonical regex of the given automaton.
+ * When constructing the automaton a proof is computed for each state,
+ * consisting of the regular expression leading to this state. A complete
+ * regex for the automaton can be computed by combining these proofs.
+ * As of now this function is only useful for testing.
+ *
+ * @param a automaton for which the canonical regex should be returned.
+ *
+ * @return
+ */
+const char *
+GNUNET_REGEX_get_canonical_regex (struct GNUNET_REGEX_Automaton *a)
+{
+ if (NULL == a)
+ return NULL;
+
+ return a->canonical_regex;
+}
+
+
+/**
+ * Get the number of transitions that are contained in the given automaton.
+ *
+ * @param a automaton for which the number of transitions should be returned.
+ *
+ * @return number of transitions in the given automaton.
+ */
+unsigned int
+GNUNET_REGEX_get_transition_count (struct GNUNET_REGEX_Automaton *a)
+{
+ unsigned int t_count;
+ struct GNUNET_REGEX_State *s;
+
+ if (NULL == a)
+ return 0;
+
+ t_count = 0;
+ for (s = a->states_head; NULL != s; s = s->next)
+ t_count += s->transition_count;
+
+ return t_count;
+}
+
+
/**
* Get the first key for the given 'input_string'. This hashes the first x bits
- * of the 'input_strings'.
+ * of the 'input_string'.
*
* @param input_string string.
* @param string_len length of the 'input_string'.
@@ -2168,13 +3305,15 @@ GNUNET_REGEX_eval (struct GNUNET_REGEX_Automaton *a, const char *string)
* @return number of bits of 'input_string' that have been consumed
* to construct the key
*/
-unsigned int
-GNUNET_REGEX_get_first_key (const char *input_string, unsigned int string_len,
- GNUNET_HashCode * key)
+size_t
+GNUNET_REGEX_get_first_key (const char *input_string, size_t string_len,
+ struct GNUNET_HashCode * key)
{
unsigned int size;
- size = string_len < initial_bits ? string_len : initial_bits;
+ size =
+ string_len <
+ GNUNET_REGEX_INITIAL_BYTES ? string_len : GNUNET_REGEX_INITIAL_BYTES;
if (NULL == input_string)
{
@@ -2187,49 +3326,125 @@ GNUNET_REGEX_get_first_key (const char *input_string, unsigned int string_len,
return size;
}
+
/**
* Check if the given 'proof' matches the given 'key'.
*
- * @param proof partial regex
- * @param key hash
+ * @param proof partial regex of a state.
+ * @param key hash of a state.
*
- * @return GNUNET_OK if the proof is valid for the given key
+ * @return GNUNET_OK if the proof is valid for the given key.
*/
int
-GNUNET_REGEX_check_proof (const char *proof, const GNUNET_HashCode * key)
+GNUNET_REGEX_check_proof (const char *proof, const struct GNUNET_HashCode *key)
{
- return GNUNET_OK;
+ struct GNUNET_HashCode key_check;
+
+ if (NULL == proof || NULL == key)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Proof check failed, was NULL.\n");
+ return GNUNET_NO;
+ }
+
+ GNUNET_CRYPTO_hash (proof, strlen (proof), &key_check);
+ return (0 ==
+ GNUNET_CRYPTO_hash_cmp (key, &key_check)) ? GNUNET_OK : GNUNET_NO;
}
+
/**
- * Iterate over all edges helper function starting from state 's', calling
- * iterator on for each edge.
+ * Recursive function that calls the iterator for each synthetic start state.
*
- * @param s state.
+ * @param min_len minimum length of the path in the graph.
+ * @param max_len maximum length of the path in the graph.
+ * @param consumed_string string consumed by traversing the graph till this state.
+ * @param state current state of the automaton.
* @param iterator iterator function called for each edge.
- * @param iterator_cls closure.
+ * @param iterator_cls closure for the iterator function.
*/
static void
-iterate_edge (struct GNUNET_REGEX_State *s, GNUNET_REGEX_KeyIterator iterator,
- void *iterator_cls)
+iterate_initial_edge (const unsigned int min_len, const unsigned int max_len,
+ char *consumed_string, struct GNUNET_REGEX_State *state,
+ GNUNET_REGEX_KeyIterator iterator, void *iterator_cls)
{
- struct Transition *t;
- struct GNUNET_REGEX_Edge edges[s->transition_count];
- unsigned int num_edges;
+ unsigned int i;
+ char *temp;
+ struct GNUNET_REGEX_Transition *t;
+ unsigned int num_edges = state->transition_count;
+ struct GNUNET_REGEX_Edge edges[num_edges];
+ struct GNUNET_REGEX_Edge edge[1];
+ struct GNUNET_HashCode hash;
+ struct GNUNET_HashCode hash_new;
+
+ unsigned int cur_len;
+
+ if (NULL != consumed_string)
+ cur_len = strlen (consumed_string);
+ else
+ cur_len = 0;
- if (GNUNET_YES != s->marked)
+ if ((cur_len >= min_len || GNUNET_YES == state->accepting) && cur_len > 0 &&
+ NULL != consumed_string)
{
- s->marked = GNUNET_YES;
+ if (cur_len <= max_len)
+ {
+ if (state->proof != NULL && 0 != strcmp (consumed_string, state->proof))
+ {
+ for (i = 0, t = state->transitions_head; NULL != t && i < num_edges;
+ t = t->next, i++)
+ {
+ edges[i].label = t->label;
+ edges[i].destination = t->to_state->hash;
+ }
+ GNUNET_CRYPTO_hash (consumed_string, strlen (consumed_string), &hash);
+ iterator (iterator_cls, &hash, consumed_string, state->accepting,
+ num_edges, edges);
+ }
- num_edges = state_get_edges (s, edges);
+ if (GNUNET_YES == state->accepting && cur_len > 1 &&
+ state->transition_count < 1 && cur_len < max_len)
+ {
+ /* Special case for regex consisting of just a string that is shorter than
+ * max_len */
+ edge[0].label = &consumed_string[cur_len - 1];
+ edge[0].destination = state->hash;
+ temp = GNUNET_strdup (consumed_string);
+ temp[cur_len - 1] = '\0';
+ GNUNET_CRYPTO_hash (temp, cur_len - 1, &hash_new);
+ iterator (iterator_cls, &hash_new, temp, GNUNET_NO, 1, edge);
+ GNUNET_free (temp);
+ }
+ }
+ else if (max_len < cur_len)
+ {
+ /* Case where the concatenated labels are longer than max_len, then split. */
+ edge[0].label = &consumed_string[max_len];
+ edge[0].destination = state->hash;
+ temp = GNUNET_strdup (consumed_string);
+ temp[max_len] = '\0';
+ GNUNET_CRYPTO_hash (temp, max_len, &hash);
+ iterator (iterator_cls, &hash, temp, GNUNET_NO, 1, edge);
+ GNUNET_free (temp);
+ }
+ }
- iterator (iterator_cls, &s->hash, s->proof, s->accepting, num_edges, edges);
+ if (cur_len < max_len)
+ {
+ for (t = state->transitions_head; NULL != t; t = t->next)
+ {
+ if (NULL != consumed_string)
+ GNUNET_asprintf (&temp, "%s%s", consumed_string, t->label);
+ else
+ GNUNET_asprintf (&temp, "%s", t->label);
- for (t = s->transitions_head; NULL != t; t = t->next)
- iterate_edge (t->to_state, iterator, iterator_cls);
+ iterate_initial_edge (min_len, max_len, temp, t->to_state, iterator,
+ iterator_cls);
+ GNUNET_free (temp);
+ }
}
}
+
/**
* Iterate over all edges starting from start state of automaton 'a'. Calling
* iterator for each edge.
@@ -2246,7 +3461,133 @@ GNUNET_REGEX_iterate_all_edges (struct GNUNET_REGEX_Automaton *a,
struct GNUNET_REGEX_State *s;
for (s = a->states_head; NULL != s; s = s->next)
+ {
+ struct GNUNET_REGEX_Edge edges[s->transition_count];
+ unsigned int num_edges;
+
+ num_edges = state_get_edges (s, edges);
+
+ if ((NULL != s->proof && 0 < strlen (s->proof)) || s->accepting)
+ iterator (iterator_cls, &s->hash, s->proof, s->accepting, num_edges,
+ edges);
+
s->marked = GNUNET_NO;
+ }
+
+ iterate_initial_edge (GNUNET_REGEX_INITIAL_BYTES, GNUNET_REGEX_INITIAL_BYTES,
+ NULL, a->start, iterator, iterator_cls);
+}
+
+/**
+ * Create a string with binary IP notation for the given 'addr' in 'str'.
+ *
+ * @param af address family of the given 'addr'.
+ * @param addr address that should be converted to a string.
+ * struct in_addr * for IPv4 and struct in6_addr * for IPv6.
+ * @param str string that will contain binary notation of 'addr'. Expected
+ * to be at least 33 bytes long for IPv4 and 129 bytes long for IPv6.
+ */
+static void
+iptobinstr (const int af, const void *addr, char *str)
+{
+ int i;
+
+ switch (af)
+ {
+ case AF_INET:
+ {
+ uint32_t b = htonl (((struct in_addr *) addr)->s_addr);
+
+ str[32] = '\0';
+ str += 31;
+ for (i = 31; i >= 0; i--)
+ {
+ *str = (b & 1) + '0';
+ str--;
+ b >>= 1;
+ }
+ break;
+ }
+ case AF_INET6:
+ {
+ struct in6_addr b = *(const struct in6_addr *) addr;
+
+ str[128] = '\0';
+ str += 127;
+ for (i = 127; i >= 0; i--)
+ {
+ *str = (b.s6_addr[i / 8] & 1) + '0';
+ str--;
+ b.s6_addr[i / 8] >>= 1;
+ }
+ break;
+ }
+ }
+}
+
- iterate_edge (a->start, iterator, iterator_cls);
+/**
+ * Get the ipv4 network prefix from the given 'netmask'.
+ *
+ * @param netmask netmask for which to get the prefix len.
+ *
+ * @return length of ipv4 prefix for 'netmask'.
+ */
+static unsigned int
+ipv4netmasktoprefixlen (const char *netmask)
+{
+ struct in_addr a;
+ unsigned int len;
+ uint32_t t;
+
+ if (1 != inet_pton (AF_INET, netmask, &a))
+ return 0;
+ len = 32;
+ for (t = htonl (~a.s_addr); 0 != t; t >>= 1)
+ len--;
+ return len;
}
+
+
+/**
+ * Create a regex in 'rxstr' from the given 'ip' and 'netmask'.
+ *
+ * @param ip IPv4 representation.
+ * @param netmask netmask for the ip.
+ * @param rxstr generated regex, must be at least GNUNET_REGEX_IPV4_REGEXLEN
+ * bytes long.
+ */
+void
+GNUNET_REGEX_ipv4toregex (const struct in_addr *ip, const char *netmask,
+ char *rxstr)
+{
+ unsigned int pfxlen;
+
+ pfxlen = ipv4netmasktoprefixlen (netmask);
+ iptobinstr (AF_INET, ip, rxstr);
+ rxstr[pfxlen] = '\0';
+ if (pfxlen < 32)
+ strcat (rxstr, "(0|1)+");
+}
+
+
+/**
+ * Create a regex in 'rxstr' from the given 'ipv6' and 'prefixlen'.
+ *
+ * @param ipv6 IPv6 representation.
+ * @param prefixlen length of the ipv6 prefix.
+ * @param rxstr generated regex, must be at least GNUNET_REGEX_IPV6_REGEXLEN
+ * bytes long.
+ */
+void
+GNUNET_REGEX_ipv6toregex (const struct in6_addr *ipv6, unsigned int prefixlen,
+ char *rxstr)
+{
+ iptobinstr (AF_INET6, ipv6, rxstr);
+ rxstr[prefixlen] = '\0';
+ if (prefixlen < 128)
+ strcat (rxstr, "(0|1)+");
+}
+
+
+/* end of regex.c */
diff --git a/src/regex/regex_block_lib.c b/src/regex/regex_block_lib.c
new file mode 100644
index 0000000..95361ca
--- /dev/null
+++ b/src/regex/regex_block_lib.c
@@ -0,0 +1,210 @@
+/*
+ This file is part of GNUnet.
+ (C) 2012,2013 Christian Grothoff (and other contributing authors)
+
+ GNUnet is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3, or (at your
+ option) any later version.
+
+ GNUnet is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GNUnet; see the file COPYING. If not, write to the
+ Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA.
+*/
+/**
+ * @author Bartlomiej Polot
+ * @file regex/regex_block_lib.c
+ */
+#include "platform.h"
+#include "regex_block_lib.h"
+
+
+/**
+ * Struct to keep track of the xquery while iterating all the edges in a block.
+ */
+struct regex_block_xquery_ctx
+{
+ /**
+ * Xquery: string we are looking for.
+ */
+ const char *xquery;
+
+ /**
+ * Has any edge matched the xquery so far? (GNUNET_OK / GNUNET_NO)
+ */
+ int found;
+};
+
+
+/**
+ * Iterator over all edges in a block, checking for a presence of a given query.
+ *
+ * @param cls Closure, (xquery context).
+ * @param token Token that follows to next state.
+ * @param len Lenght of token.
+ * @param key Hash of next state.
+ *
+ * @return GNUNET_YES, to keep iterating
+ */
+static int
+check_edge (void *cls,
+ const char *token,
+ size_t len,
+ const struct GNUNET_HashCode *key)
+{
+ struct regex_block_xquery_ctx *ctx = cls;
+
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " edge %.*s [%u]\n",
+ (int) len, token, len);
+ if (strlen (ctx->xquery) < len)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " too long!\n");
+ return GNUNET_YES;
+ }
+ if (0 == strncmp (ctx->xquery, token, len))
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " OK!\n");
+ ctx->found = GNUNET_OK;
+ }
+ else
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " KO!\n");
+ }
+
+ return GNUNET_YES; /* keep checking for malformed data! */
+}
+
+
+/**
+ * Check if the regex block is well formed, including all edges
+ *
+ * @param block The start of the block.
+ * @param size The size of the block.
+ * @param xquery String describing the edge we are looking for.
+ *
+ * @return GNUNET_OK in case it's fine.
+ * GNUNET_NO in case the xquery is not found.
+ * GNUNET_SYSERR if the block is invalid.
+ */
+int
+GNUNET_REGEX_block_check (const struct RegexBlock *block,
+ size_t size,
+ const char *xquery)
+{
+ int res;
+ struct regex_block_xquery_ctx ctx;
+
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
+ "* Checking block with xquery \"%s\"\n",
+ xquery);
+ if ( (GNUNET_YES == ntohl(block->accepting)) && ('\0' == xquery[0]) )
+ return GNUNET_OK;
+ ctx.xquery = xquery;
+ ctx.found = GNUNET_NO;
+ res = GNUNET_REGEX_block_iterate (block, size, &check_edge, &ctx);
+ if (GNUNET_SYSERR == res)
+ return GNUNET_SYSERR;
+ return ctx.found;
+}
+
+
+/**
+ * Iterate over all edges of a block of a regex state.
+ *
+ * @param block Block to iterate over.
+ * @param size Size of block.
+ * @param iterator Function to call on each edge in the block.
+ * @param iter_cls Closure for the iterator.
+ *
+ * @return How many bytes of block have been processed
+ */
+int
+GNUNET_REGEX_block_iterate (const struct RegexBlock *block,
+ size_t size,
+ GNUNET_REGEX_EgdeIterator iterator,
+ void *iter_cls)
+{
+ struct RegexEdge *edge;
+ unsigned int n;
+ unsigned int n_token;
+ unsigned int i;
+ size_t offset;
+ char *aux;
+
+ offset = sizeof (struct RegexBlock);
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
+ "* Start iterating block of size %u, off %u\n",
+ size, offset);
+ if (offset > size) // Is it safe to access the regex block?
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
+ "* Block is smaller than struct RegexBlock, END\n");
+ GNUNET_break_op (0);
+ return GNUNET_SYSERR;
+ }
+ n = ntohl (block->n_proof);
+ offset += n;
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
+ "* Proof length: %u, off %u\n", n, offset);
+ if (offset > size) // Is it safe to access the regex proof?
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
+ "* Block is smaller than Block + proof, END\n");
+ GNUNET_break_op (0);
+ return GNUNET_SYSERR;
+ }
+ aux = (char *) &block[1]; // Skip regex block
+ aux = &aux[n]; // Skip regex proof
+ n = ntohl (block->n_edges);
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "* Edges: %u\n", n);
+ for (i = 0; i < n; i++) // aux always points at the end of the previous block
+ {
+ offset += sizeof (struct RegexEdge);
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "* Edge %u, off %u\n", i, offset);
+ if (offset > size) // Is it safe to access the next edge block?
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
+ "* Size not enough for RegexEdge, END\n");
+ GNUNET_break_op (0);
+ return GNUNET_SYSERR;
+ }
+ edge = (struct RegexEdge *) aux;
+ n_token = ntohl (edge->n_token);
+ offset += n_token;
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
+ "* Token lenght %u, off %u\n", n_token, offset);
+ if (offset > size) // Is it safe to access the edge token?
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
+ "* Size not enough for edge token, END\n");
+ GNUNET_break_op (0);
+ return GNUNET_SYSERR;
+ }
+ aux = (char *) &edge[1]; // Skip edge block
+ if (NULL != iterator)
+ if (GNUNET_NO == iterator (iter_cls, aux, n_token, &edge->key))
+ return GNUNET_OK;
+ aux = &aux[n_token]; // Skip edge token
+ }
+ // The total size should be exactly the size of (regex + all edges) blocks
+ // If size == -1, block is from cache and therefore previously checked and
+ // assumed correct.
+ if (offset == size || SIZE_MAX == size)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
+ "* Block processed, END OK\n");
+ return GNUNET_OK;
+ }
+ GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
+ "* Size %u (%d), read %u END KO\n", size, size, offset);
+ GNUNET_break_op (0);
+ return GNUNET_SYSERR;
+}
+
+/* end of regex_block_lib.c */
diff --git a/src/regex/regex_block_lib.h b/src/regex/regex_block_lib.h
new file mode 100644
index 0000000..f591f5f
--- /dev/null
+++ b/src/regex/regex_block_lib.h
@@ -0,0 +1,98 @@
+/*
+ This file is part of GNUnet.
+ (C) 2012,2013 Christian Grothoff (and other contributing authors)
+
+ GNUnet is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3, or (at your
+ option) any later version.
+
+ GNUnet is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GNUnet; see the file COPYING. If not, write to the
+ Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA.
+*/
+
+/**
+ * @author Bartlomiej Polot
+ * @file regex/regex_block_lib.h
+ */
+
+#ifndef REGEX_BLOCK_LIB_H_
+#define REGEX_BLOCK_LIB_H_
+
+#ifdef __cplusplus
+extern "C"
+{
+#if 0
+ /* keep Emacsens' auto-indent happy */
+}
+#endif
+#endif
+
+#include "platform.h"
+#include "block_regex.h"
+
+/**
+ * Check if the regex block is well formed, including all edges
+ *
+ * @param block The start of the block.
+ * @param size The size of the block.
+ * @param xquery String describing the edge we are looking for.
+ *
+ * @return GNUNET_OK in case it's fine.
+ * GNUNET_NO in case the xquery is not found.
+ * GNUNET_SYSERR if the block is invalid.
+ */
+int
+GNUNET_REGEX_block_check (const struct RegexBlock *block,
+ size_t size,
+ const char *xquery);
+
+/**
+ * Iterator over edges in a block.
+ *
+ * @param cls Closure.
+ * @param token Token that follows to next state.
+ * @param len Length of token.
+ * @param key Hash of next state.
+ *
+ * @return GNUNET_YES if should keep iterating, GNUNET_NO otherwise.
+ */
+typedef int (*GNUNET_REGEX_EgdeIterator)(void *cls,
+ const char *token,
+ size_t len,
+ const struct GNUNET_HashCode *key);
+
+
+/**
+ * Iterate over all edges of a block of a regex state.
+ *
+ * @param block Block to iterate over.
+ * @param size Size of block.
+ * @param iterator Function to call on each edge in the block.
+ * @param iter_cls Closure for the iterator.
+ *
+ * @return GNUNET_SYSERR if an error has been encountered, GNUNET_OK otherwise
+ */
+int
+GNUNET_REGEX_block_iterate (const struct RegexBlock *block,
+ size_t size,
+ GNUNET_REGEX_EgdeIterator iterator,
+ void *iter_cls);
+
+#if 0 /* keep Emacsens' auto-indent happy */
+{
+#endif
+#ifdef __cplusplus
+}
+#endif
+
+/* ifndef REGEX_BLOCK_LIB_H */
+#endif
+/* end of regex_block_lib.h */
diff --git a/src/regex/regex_dht.c b/src/regex/regex_dht.c
new file mode 100644
index 0000000..52e8d48
--- /dev/null
+++ b/src/regex/regex_dht.c
@@ -0,0 +1,790 @@
+/*
+ This file is part of GNUnet
+ (C) 2012 Christian Grothoff (and other contributing authors)
+
+ GNUnet is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3, or (at your
+ option) any later version.
+
+ GNUnet is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GNUnet; see the file COPYING. If not, write to the
+ Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA.
+*/
+/**
+ * @file src/regex/regex_dht.c
+ * @brief library to announce regexes in the network and match strings
+ * against published regexes.
+ * @author Bartlomiej Polot
+ */
+#include "platform.h"
+#include "gnunet_regex_lib.h"
+#include "regex_block_lib.h"
+#include "gnunet_dht_service.h"
+#include "gnunet_statistics_service.h"
+
+#define LOG(kind,...) GNUNET_log_from (kind,"regex-dht",__VA_ARGS__)
+
+#define DHT_REPLICATION 5
+#define DHT_TTL GNUNET_TIME_UNIT_HOURS
+
+struct GNUNET_REGEX_announce_handle
+{
+ /**
+ * DHT handle to use, must be initialized externally.
+ */
+ struct GNUNET_DHT_Handle *dht;
+
+ /**
+ * Regular expression.
+ */
+ const char *regex;
+
+ /**
+ * Automaton representation of the regex (expensive to build).
+ */
+ struct GNUNET_REGEX_Automaton* dfa;
+
+ /**
+ * Identity under which to announce the regex.
+ */
+ struct GNUNET_PeerIdentity *id;
+
+ /**
+ * Optional statistics handle to report usage. Can be NULL.
+ */
+ struct GNUNET_STATISTICS_Handle *stats;
+};
+
+
+/**
+ * Regex callback iterator to store own service description in the DHT.
+ *
+ * @param cls closure.
+ * @param key hash for current state.
+ * @param proof proof for current state.
+ * @param accepting GNUNET_YES if this is an accepting state, GNUNET_NO if not.
+ * @param num_edges number of edges leaving current state.
+ * @param edges edges leaving current state.
+ */
+static void
+regex_iterator (void *cls,
+ const struct GNUNET_HashCode *key,
+ const char *proof,
+ int accepting,
+ unsigned int num_edges,
+ const struct GNUNET_REGEX_Edge *edges)
+{
+ struct GNUNET_REGEX_announce_handle *h = cls;
+ struct RegexBlock *block;
+ struct RegexEdge *block_edge;
+ enum GNUNET_DHT_RouteOption opt;
+ size_t size;
+ size_t len;
+ unsigned int i;
+ unsigned int offset;
+ char *aux;
+
+ LOG (GNUNET_ERROR_TYPE_DEBUG,
+ " regex dht put for state %s\n",
+ GNUNET_h2s (key));
+ LOG (GNUNET_ERROR_TYPE_DEBUG, " proof: %s\n", proof);
+ LOG (GNUNET_ERROR_TYPE_DEBUG, " num edges: %u\n", num_edges);
+
+ opt = GNUNET_DHT_RO_DEMULTIPLEX_EVERYWHERE;
+ if (GNUNET_YES == accepting)
+ {
+ struct RegexAccept block;
+
+ LOG (GNUNET_ERROR_TYPE_DEBUG,
+ " state %s is accepting, putting own id\n",
+ GNUNET_h2s(key));
+ size = sizeof (block);
+ block.key = *key;
+ block.id = *(h->id);
+ GNUNET_STATISTICS_update (h->stats, "# regex accepting blocks stored",
+ 1, GNUNET_NO);
+ GNUNET_STATISTICS_update (h->stats, "# regex accepting block bytes stored",
+ sizeof (block), GNUNET_NO);
+ (void)
+ GNUNET_DHT_put (h->dht, key,
+ 2, /* FIXME option */
+ opt /* | GNUNET_DHT_RO_RECORD_ROUTE*/,
+ GNUNET_BLOCK_TYPE_REGEX_ACCEPT,
+ size,
+ (char *) &block,
+ GNUNET_TIME_relative_to_absolute (GNUNET_TIME_UNIT_HOURS), /* FIXME: expiration time should be option */
+ GNUNET_TIME_UNIT_HOURS, /* FIXME option */
+ NULL, NULL);
+ }
+ len = strlen(proof);
+ size = sizeof (struct RegexBlock) + len;
+ block = GNUNET_malloc (size);
+
+ block->key = *key;
+ block->n_proof = htonl (len);
+ block->n_edges = htonl (num_edges);
+ block->accepting = htonl (accepting);
+
+ /* Store the proof at the end of the block. */
+ aux = (char *) &block[1];
+ memcpy (aux, proof, len);
+ aux = &aux[len];
+
+ /* Store each edge in a variable length MeshEdge struct at the
+ * very end of the MeshRegexBlock structure.
+ */
+ for (i = 0; i < num_edges; i++)
+ {
+ LOG (GNUNET_ERROR_TYPE_DEBUG, " edge %s towards %s\n",
+ edges[i].label, GNUNET_h2s(&edges[i].destination));
+
+ /* aux points at the end of the last block */
+ len = strlen (edges[i].label);
+ size += sizeof (struct RegexEdge) + len;
+ // Calculate offset FIXME is this ok? use size instead?
+ offset = aux - (char *) block;
+ block = GNUNET_realloc (block, size);
+ aux = &((char *) block)[offset];
+ block_edge = (struct RegexEdge *) aux;
+ block_edge->key = edges[i].destination;
+ block_edge->n_token = htonl (len);
+ aux = (char *) &block_edge[1];
+ memcpy (aux, edges[i].label, len);
+ aux = &aux[len];
+ }
+ (void)
+ GNUNET_DHT_put(h->dht, key,
+ DHT_REPLICATION, /* FIXME OPTION */
+ opt,
+ GNUNET_BLOCK_TYPE_REGEX, size,
+ (char *) block,
+ GNUNET_TIME_relative_to_absolute (DHT_TTL), /* FIXME: this should be an option */
+ DHT_TTL,
+ NULL, NULL);
+ GNUNET_STATISTICS_update (h->stats, "# regex blocks stored",
+ 1, GNUNET_NO);
+ GNUNET_STATISTICS_update (h->stats, "# regex block bytes stored",
+ size, GNUNET_NO);
+
+ GNUNET_free (block);
+}
+
+
+struct GNUNET_REGEX_announce_handle *
+GNUNET_REGEX_announce (struct GNUNET_DHT_Handle *dht,
+ struct GNUNET_PeerIdentity *id,
+ const char *regex,
+ uint16_t compression,
+ struct GNUNET_STATISTICS_Handle *stats)
+{
+ struct GNUNET_REGEX_announce_handle *h;
+
+ GNUNET_assert (NULL != dht);
+ h = GNUNET_malloc (sizeof (struct GNUNET_REGEX_announce_handle));
+ h->regex = regex;
+ h->dht = dht;
+ h->stats = stats;
+ h->id = id;
+ h->dfa = GNUNET_REGEX_construct_dfa (regex,
+ strlen (regex),
+ compression);
+ GNUNET_REGEX_reannounce (h);
+ return h;
+}
+
+void
+GNUNET_REGEX_reannounce (struct GNUNET_REGEX_announce_handle *h)
+{
+ GNUNET_REGEX_iterate_all_edges (h->dfa, &regex_iterator, h);
+}
+
+void
+GNUNET_REGEX_announce_cancel (struct GNUNET_REGEX_announce_handle *h)
+{
+ GNUNET_REGEX_automaton_destroy (h->dfa);
+ GNUNET_free (h);
+}
+
+
+/******************************************************************************/
+
+
+/**
+ * Struct to keep state of running searches that have consumed a part of
+ * the inital string.
+ */
+struct RegexSearchContext
+{
+ /**
+ * Part of the description already consumed by
+ * this particular search branch.
+ */
+ size_t position;
+
+ /**
+ * Information about the search.
+ */
+ struct GNUNET_REGEX_search_handle *info;
+
+ /**
+ * We just want to look for one edge, the longer the better.
+ * Keep its length.
+ */
+ unsigned int longest_match;
+
+ /**
+ * Destination hash of the longest match.
+ */
+ struct GNUNET_HashCode hash;
+};
+
+
+/**
+ * Struct to keep information of searches of services described by a regex
+ * using a user-provided string service description.
+ */
+struct GNUNET_REGEX_search_handle
+{
+ /**
+ * DHT handle to use, must be initialized externally.
+ */
+ struct GNUNET_DHT_Handle *dht;
+
+ /**
+ * Optional statistics handle to report usage. Can be NULL.
+ */
+ struct GNUNET_STATISTICS_Handle *stats;
+
+ /**
+ * User provided description of the searched service.
+ */
+ char *description;
+
+ /**
+ * Running DHT GETs.
+ */
+ struct GNUNET_CONTAINER_MultiHashMap *dht_get_handles;
+
+ /**
+ * Results from running DHT GETs.
+ */
+ struct GNUNET_CONTAINER_MultiHashMap *dht_get_results;
+
+ /**
+ * Contexts, for each running DHT GET. Free all on end of search.
+ */
+ struct RegexSearchContext **contexts;
+
+ /**
+ * Number of contexts (branches/steps in search).
+ */
+ unsigned int n_contexts;
+
+ /**
+ * @param callback Callback for found peers.
+ */
+ GNUNET_REGEX_Found callback;
+
+ /**
+ * @param callback_cls Closure for @c callback.
+ */
+ void *callback_cls;
+};
+
+
+
+/**
+ * Jump to the next edge, with the longest matching token.
+ *
+ * @param block Block found in the DHT.
+ * @param size Size of the block.
+ * @param ctx Context of the search.
+ *
+ * @return GNUNET_YES if should keep iterating, GNUNET_NO otherwise.
+ */
+static void
+regex_next_edge (const struct RegexBlock *block,
+ size_t size,
+ struct RegexSearchContext *ctx);
+
+
+/**
+ * Function to process DHT string to regex matching.
+ * Called on each result obtained for the DHT search.
+ *
+ * @param cls Closure (search context).
+ * @param exp When will this value expire.
+ * @param key Key of the result.
+ * @param get_path Path of the get request.
+ * @param get_path_length Lenght of get_path.
+ * @param put_path Path of the put request.
+ * @param put_path_length Length of the put_path.
+ * @param type Type of the result.
+ * @param size Number of bytes in data.
+ * @param data Pointer to the result data.
+ */
+static void
+dht_get_string_accept_handler (void *cls, struct GNUNET_TIME_Absolute exp,
+ const struct GNUNET_HashCode * key,
+ const struct GNUNET_PeerIdentity *get_path,
+ unsigned int get_path_length,
+ const struct GNUNET_PeerIdentity *put_path,
+ unsigned int put_path_length,
+ enum GNUNET_BLOCK_Type type,
+ size_t size, const void *data)
+{
+ const struct RegexAccept *block = data;
+ struct RegexSearchContext *ctx = cls;
+ struct GNUNET_REGEX_search_handle *info = ctx->info;
+
+ LOG (GNUNET_ERROR_TYPE_DEBUG, "Got regex results from DHT!\n");
+ LOG (GNUNET_ERROR_TYPE_DEBUG, " for %s\n", info->description);
+
+ GNUNET_STATISTICS_update (info->stats, "# regex accepting blocks found",
+ 1, GNUNET_NO);
+ GNUNET_STATISTICS_update (info->stats, "# regex accepting block bytes found",
+ size, GNUNET_NO);
+
+ info->callback (info->callback_cls,
+ &block->id,
+ get_path, get_path_length,
+ put_path, put_path_length);
+
+ return;
+}
+
+/**
+ * Find a path to a peer that offers a regex servcie compatible
+ * with a given string.
+ *
+ * @param key The key of the accepting state.
+ * @param ctx Context containing info about the string, tunnel, etc.
+ */
+static void
+regex_find_path (const struct GNUNET_HashCode *key,
+ struct RegexSearchContext *ctx)
+{
+ struct GNUNET_DHT_GetHandle *get_h;
+
+ LOG (GNUNET_ERROR_TYPE_DEBUG, "Found peer by service\n");
+ get_h = GNUNET_DHT_get_start (ctx->info->dht, /* handle */
+ GNUNET_BLOCK_TYPE_REGEX_ACCEPT, /* type */
+ key, /* key to search */
+ DHT_REPLICATION, /* replication level */
+ GNUNET_DHT_RO_DEMULTIPLEX_EVERYWHERE |
+ GNUNET_DHT_RO_RECORD_ROUTE,
+ NULL, /* xquery */ // FIXME BLOOMFILTER
+ 0, /* xquery bits */ // FIXME BLOOMFILTER SIZE
+ &dht_get_string_accept_handler, ctx);
+ GNUNET_break (GNUNET_OK ==
+ GNUNET_CONTAINER_multihashmap_put(ctx->info->dht_get_handles,
+ key,
+ get_h,
+ GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE));
+}
+
+
+/**
+ * Function to process DHT string to regex matching.
+ * Called on each result obtained for the DHT search.
+ *
+ * @param cls closure (search context)
+ * @param exp when will this value expire
+ * @param key key of the result
+ * @param get_path path of the get request (not used)
+ * @param get_path_length lenght of get_path (not used)
+ * @param put_path path of the put request (not used)
+ * @param put_path_length length of the put_path (not used)
+ * @param type type of the result
+ * @param size number of bytes in data
+ * @param data pointer to the result data
+ *
+ * TODO: re-issue the request after certain time? cancel after X results?
+ */
+static void
+dht_get_string_handler (void *cls, struct GNUNET_TIME_Absolute exp,
+ const struct GNUNET_HashCode * key,
+ const struct GNUNET_PeerIdentity *get_path,
+ unsigned int get_path_length,
+ const struct GNUNET_PeerIdentity *put_path,
+ unsigned int put_path_length,
+ enum GNUNET_BLOCK_Type type,
+ size_t size, const void *data)
+{
+ const struct RegexBlock *block = data;
+ struct RegexSearchContext *ctx = cls;
+ struct GNUNET_REGEX_search_handle *info = ctx->info;
+ void *copy;
+ size_t len;
+
+ LOG (GNUNET_ERROR_TYPE_DEBUG, "DHT GET STRING RETURNED RESULTS\n");
+ LOG (GNUNET_ERROR_TYPE_DEBUG, " for: %s\n", ctx->info->description);
+ LOG (GNUNET_ERROR_TYPE_DEBUG, " key: %s\n", GNUNET_h2s (key));
+
+ copy = GNUNET_malloc (size);
+ memcpy (copy, data, size);
+ GNUNET_break (
+ GNUNET_OK ==
+ GNUNET_CONTAINER_multihashmap_put (info->dht_get_results, key, copy,
+ GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE)
+ );
+ len = ntohl (block->n_proof);
+ {
+ char proof[len + 1];
+
+ memcpy (proof, &block[1], len);
+ proof[len] = '\0';
+ if (GNUNET_OK != GNUNET_REGEX_check_proof (proof, key))
+ {
+ GNUNET_break_op (0);
+ return;
+ }
+ }
+ len = strlen (info->description);
+ if (len == ctx->position) // String processed
+ {
+ if (GNUNET_YES == ntohl (block->accepting))
+ {
+ regex_find_path (key, ctx);
+ }
+ else
+ {
+ LOG (GNUNET_ERROR_TYPE_DEBUG, " block not accepting!\n");
+ // FIXME REGEX this block not successful, wait for more? start timeout?
+ }
+ return;
+ }
+
+ regex_next_edge (block, size, ctx);
+
+ return;
+}
+
+
+/**
+ * Iterator over found existing mesh regex blocks that match an ongoing search.
+ *
+ * @param cls closure
+ * @param key current key code
+ * @param value value in the hash map
+ * @return GNUNET_YES if we should continue to iterate,
+ * GNUNET_NO if not.
+ */
+static int
+regex_result_iterator (void *cls,
+ const struct GNUNET_HashCode * key,
+ void *value)
+{
+ struct RegexBlock *block = value;
+ struct RegexSearchContext *ctx = cls;
+
+ if (GNUNET_YES == ntohl(block->accepting) &&
+ ctx->position == strlen (ctx->info->description))
+ {
+ LOG (GNUNET_ERROR_TYPE_DEBUG, "* Found accepting known block\n");
+ regex_find_path (key, ctx);
+ return GNUNET_YES; // We found an accept state!
+ }
+ else
+ {
+ LOG (GNUNET_ERROR_TYPE_DEBUG, "* %u, %u, [%u]\n",
+ ctx->position, strlen(ctx->info->description),
+ ntohl(block->accepting));
+
+ }
+ regex_next_edge(block, SIZE_MAX, ctx);
+
+ GNUNET_STATISTICS_update (ctx->info->stats, "# regex mesh blocks iterated",
+ 1, GNUNET_NO);
+
+ return GNUNET_YES;
+}
+
+
+/**
+ * Iterator over edges in a regex block retrieved from the DHT.
+ *
+ * @param cls Closure (context of the search).
+ * @param token Token that follows to next state.
+ * @param len Lenght of token.
+ * @param key Hash of next state.
+ *
+ * @return GNUNET_YES if should keep iterating, GNUNET_NO otherwise.
+ */
+static int
+regex_edge_iterator (void *cls,
+ const char *token,
+ size_t len,
+ const struct GNUNET_HashCode *key)
+{
+ struct RegexSearchContext *ctx = cls;
+ struct GNUNET_REGEX_search_handle *info = ctx->info;
+ const char *current;
+ size_t current_len;
+
+ GNUNET_STATISTICS_update (info->stats, "# regex edges iterated",
+ 1, GNUNET_NO);
+
+ LOG (GNUNET_ERROR_TYPE_DEBUG, "* Start of regex edge iterator\n");
+ LOG (GNUNET_ERROR_TYPE_DEBUG, "* descr : %s\n", info->description);
+ LOG (GNUNET_ERROR_TYPE_DEBUG, "* posit : %u\n", ctx->position);
+ current = &info->description[ctx->position];
+ LOG (GNUNET_ERROR_TYPE_DEBUG, "* currt : %s\n", current);
+ current_len = strlen (info->description) - ctx->position;
+ LOG (GNUNET_ERROR_TYPE_DEBUG, "* ctlen : %u\n", current_len);
+ LOG (GNUNET_ERROR_TYPE_DEBUG, "* tklen : %u\n", len);
+ LOG (GNUNET_ERROR_TYPE_DEBUG, "* token : %.*s\n", len, token);
+ LOG (GNUNET_ERROR_TYPE_DEBUG, "* nextk : %s\n", GNUNET_h2s(key));
+ if (len > current_len)
+ {
+ LOG (GNUNET_ERROR_TYPE_DEBUG, "* Token too long, END\n");
+ return GNUNET_YES; // Token too long, wont match
+ }
+ if (0 != strncmp (current, token, len))
+ {
+ LOG (GNUNET_ERROR_TYPE_DEBUG, "* Token doesn't match, END\n");
+ return GNUNET_YES; // Token doesn't match
+ }
+
+ if (len > ctx->longest_match)
+ {
+ LOG (GNUNET_ERROR_TYPE_DEBUG, "* Token is longer, KEEP\n");
+ ctx->longest_match = len;
+ ctx->hash = *key;
+ }
+ else
+ {
+ LOG (GNUNET_ERROR_TYPE_DEBUG, "* Token is not longer, IGNORE\n");
+ }
+
+ LOG (GNUNET_ERROR_TYPE_DEBUG, "* End of regex edge iterator\n");
+ return GNUNET_YES;
+}
+
+
+/**
+ * Jump to the next edge, with the longest matching token.
+ *
+ * @param block Block found in the DHT.
+ * @param size Size of the block.
+ * @param ctx Context of the search.
+ *
+ * @return GNUNET_YES if should keep iterating, GNUNET_NO otherwise.
+ */
+static void
+regex_next_edge (const struct RegexBlock *block,
+ size_t size,
+ struct RegexSearchContext *ctx)
+{
+ struct RegexSearchContext *new_ctx;
+ struct GNUNET_REGEX_search_handle *info = ctx->info;
+ struct GNUNET_DHT_GetHandle *get_h;
+ const char *rest;
+ int result;
+
+ /* Find the longest match for the current string position,
+ * among tokens in the given block */
+ ctx->longest_match = 0;
+ result = GNUNET_REGEX_block_iterate (block, size,
+ &regex_edge_iterator, ctx);
+ GNUNET_break (GNUNET_OK == result);
+
+ /* Did anything match? */
+ if (0 == ctx->longest_match)
+ return;
+
+ new_ctx = GNUNET_malloc (sizeof (struct RegexSearchContext));
+ new_ctx->info = info;
+ new_ctx->position = ctx->position + ctx->longest_match;
+ GNUNET_array_append (info->contexts, info->n_contexts, new_ctx);
+
+ /* Check whether we already have a DHT GET running for it */
+ if (GNUNET_YES ==
+ GNUNET_CONTAINER_multihashmap_contains(info->dht_get_handles, &ctx->hash))
+ {
+ LOG (GNUNET_ERROR_TYPE_DEBUG, "* GET running, END\n");
+ GNUNET_CONTAINER_multihashmap_get_multiple (info->dht_get_results,
+ &ctx->hash,
+ &regex_result_iterator,
+ new_ctx);
+ // FIXME: "leaks" new_ctx? avoid keeping it around?
+ return; // We are already looking for it
+ }
+
+ GNUNET_STATISTICS_update (info->stats, "# regex nodes traversed",
+ 1, GNUNET_NO);
+
+ /* Start search in DHT */
+ rest = &new_ctx->info->description[new_ctx->position];
+ get_h =
+ GNUNET_DHT_get_start (info->dht, /* handle */
+ GNUNET_BLOCK_TYPE_REGEX, /* type */
+ &ctx->hash, /* key to search */
+ DHT_REPLICATION, /* replication level */
+ GNUNET_DHT_RO_DEMULTIPLEX_EVERYWHERE,
+ rest, /* xquery */
+ // FIXME add BLOOMFILTER to exclude filtered peers
+ strlen(rest) + 1, /* xquery bits */
+ // FIXME add BLOOMFILTER SIZE
+ &dht_get_string_handler, new_ctx);
+ if (GNUNET_OK !=
+ GNUNET_CONTAINER_multihashmap_put(info->dht_get_handles,
+ &ctx->hash,
+ get_h,
+ GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST))
+ {
+ GNUNET_break (0);
+ return;
+ }
+}
+
+
+struct GNUNET_REGEX_search_handle *
+GNUNET_REGEX_search (struct GNUNET_DHT_Handle *dht,
+ const char *string,
+ GNUNET_REGEX_Found callback,
+ void *callback_cls,
+ struct GNUNET_STATISTICS_Handle *stats)
+{
+ struct GNUNET_REGEX_search_handle *h;
+ struct GNUNET_DHT_GetHandle *get_h;
+ struct RegexSearchContext *ctx;
+ struct GNUNET_HashCode key;
+ size_t size;
+ size_t len;
+
+ /* Initialize handle */
+ LOG (GNUNET_ERROR_TYPE_DEBUG, "GNUNET_REGEX_search: %s\n", string);
+ GNUNET_assert (NULL != dht);
+ GNUNET_assert (NULL != callback);
+ h = GNUNET_malloc (sizeof (struct GNUNET_REGEX_search_handle));
+ h->dht = dht;
+ h->description = GNUNET_strdup (string);
+ h->callback = callback;
+ h->callback_cls = callback_cls;
+ h->stats = stats;
+ h->dht_get_handles = GNUNET_CONTAINER_multihashmap_create (32, GNUNET_YES);
+ h->dht_get_results = GNUNET_CONTAINER_multihashmap_create (32, GNUNET_YES);
+
+ /* Initialize context */
+ len = strlen (string);
+ size = GNUNET_REGEX_get_first_key (string, len, &key);
+ ctx = GNUNET_malloc (sizeof (struct RegexSearchContext));
+ ctx->position = size;
+ ctx->info = h;
+ GNUNET_array_append (h->contexts, h->n_contexts, ctx);
+ LOG (GNUNET_ERROR_TYPE_DEBUG,
+ " consumed %u bits out of %u\n", size, len);
+ LOG (GNUNET_ERROR_TYPE_DEBUG,
+ " looking for %s\n", GNUNET_h2s (&key));
+
+ /* Start search in DHT */
+ get_h = GNUNET_DHT_get_start (h->dht, /* handle */
+ GNUNET_BLOCK_TYPE_REGEX, /* type */
+ &key, /* key to search */
+ DHT_REPLICATION, /* replication level */
+ GNUNET_DHT_RO_DEMULTIPLEX_EVERYWHERE,
+ &h->description[size], /* xquery */
+ // FIXME add BLOOMFILTER to exclude filtered peers
+ len + 1 - size, /* xquery bits */
+ // FIXME add BLOOMFILTER SIZE
+ &dht_get_string_handler, ctx);
+ GNUNET_break (
+ GNUNET_OK ==
+ GNUNET_CONTAINER_multihashmap_put (h->dht_get_handles,
+ &key,
+ get_h,
+ GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST)
+ );
+
+ return h;
+}
+
+
+/**
+ * Iterator over hash map entries to cancel DHT GET requests after a
+ * successful connect_by_string.
+ *
+ * @param cls Closure (unused).
+ * @param key Current key code (unused).
+ * @param value Value in the hash map (get handle).
+ * @return GNUNET_YES if we should continue to iterate,
+ * GNUNET_NO if not.
+ */
+static int
+regex_cancel_dht_get (void *cls,
+ const struct GNUNET_HashCode * key,
+ void *value)
+{
+ struct GNUNET_DHT_GetHandle *h = value;
+
+ GNUNET_DHT_get_stop (h);
+ return GNUNET_YES;
+}
+
+
+/**
+ * Iterator over hash map entries to free MeshRegexBlocks stored during the
+ * search for connect_by_string.
+ *
+ * @param cls Closure (unused).
+ * @param key Current key code (unused).
+ * @param value MeshRegexBlock in the hash map.
+ * @return GNUNET_YES if we should continue to iterate,
+ * GNUNET_NO if not.
+ */
+static int
+regex_free_result (void *cls,
+ const struct GNUNET_HashCode * key,
+ void *value)
+{
+
+ GNUNET_free (value);
+ return GNUNET_YES;
+}
+
+
+/**
+ * Cancel an ongoing regex search in the DHT and free all resources.
+ *
+ * @param ctx The search context.
+ */
+static void
+regex_cancel_search (struct GNUNET_REGEX_search_handle *ctx)
+{
+ GNUNET_free (ctx->description);
+ GNUNET_CONTAINER_multihashmap_iterate (ctx->dht_get_handles,
+ &regex_cancel_dht_get, NULL);
+ GNUNET_CONTAINER_multihashmap_iterate (ctx->dht_get_results,
+ &regex_free_result, NULL);
+ GNUNET_CONTAINER_multihashmap_destroy (ctx->dht_get_results);
+ GNUNET_CONTAINER_multihashmap_destroy (ctx->dht_get_handles);
+ if (0 < ctx->n_contexts)
+ {
+ int i;
+
+ for (i = 0; i < ctx->n_contexts; i++)
+ {
+ GNUNET_free (ctx->contexts[i]);
+ }
+ GNUNET_free (ctx->contexts);
+ }
+}
+
+void
+GNUNET_REGEX_search_cancel (struct GNUNET_REGEX_search_handle *h)
+{
+ regex_cancel_search (h);
+ GNUNET_free (h);
+}
+
+
+
+/* end of regex_dht.c */ \ No newline at end of file
diff --git a/src/regex/regex_graph.c b/src/regex/regex_graph.c
new file mode 100644
index 0000000..0b5c571
--- /dev/null
+++ b/src/regex/regex_graph.c
@@ -0,0 +1,317 @@
+/*
+ This file is part of GNUnet
+ (C) 2012 Christian Grothoff (and other contributing authors)
+
+ GNUnet is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3, or (at your
+ option) any later version.
+
+ GNUnet is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GNUnet; see the file COPYING. If not, write to the
+ Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA.
+*/
+/**
+ * @file src/regex/regex_graph.c
+ * @brief functions for creating .dot graphs from regexes
+ * @author Maximilian Szengel
+ */
+#include "platform.h"
+#include "gnunet_regex_lib.h"
+#include "regex_internal.h"
+
+/**
+ * Context for graph creation. Passed as the cls to
+ * GNUNET_REGEX_automaton_save_graph_step.
+ */
+struct GNUNET_REGEX_Graph_Context
+{
+ /**
+ * File pointer to the dot file used for output.
+ */
+ FILE *filep;
+
+ /**
+ * Verbose flag, if it's set to GNUNET_YES additional info will be printed in
+ * the graph.
+ */
+ int verbose;
+
+ /**
+ * Coloring flag, if set to GNUNET_YES SCCs will be colored.
+ */
+ int coloring;
+};
+
+
+/**
+ * Recursive function doing DFS with 'v' as a start, detecting all SCCs inside
+ * the subgraph reachable from 'v'. Used with scc_tarjan function to detect all
+ * SCCs inside an automaton.
+ *
+ * @param scc_counter counter for numbering the sccs
+ * @param v start vertex
+ * @param index current index
+ * @param stack stack for saving all SCCs
+ * @param stack_size current size of the stack
+ */
+static void
+scc_tarjan_strongconnect (unsigned int *scc_counter,
+ struct GNUNET_REGEX_State *v, unsigned int *index,
+ struct GNUNET_REGEX_State **stack,
+ unsigned int *stack_size)
+{
+ struct GNUNET_REGEX_State *w;
+ struct GNUNET_REGEX_Transition *t;
+
+ v->index = *index;
+ v->lowlink = *index;
+ (*index)++;
+ stack[(*stack_size)++] = v;
+ v->contained = 1;
+
+ for (t = v->transitions_head; NULL != t; t = t->next)
+ {
+ w = t->to_state;
+
+ if (NULL == w)
+ continue;
+
+ if (w->index < 0)
+ {
+ scc_tarjan_strongconnect (scc_counter, w, index, stack, stack_size);
+ v->lowlink = (v->lowlink > w->lowlink) ? w->lowlink : v->lowlink;
+ }
+ else if (1 == w->contained)
+ v->lowlink = (v->lowlink > w->index) ? w->index : v->lowlink;
+ }
+
+ if (v->lowlink == v->index)
+ {
+ (*scc_counter)++;
+ do
+ {
+ w = stack[--(*stack_size)];
+ w->contained = 0;
+ w->scc_id = *scc_counter;
+ }
+ while (w != v);
+ }
+}
+
+
+/**
+ * Detect all SCCs (Strongly Connected Components) inside the given automaton.
+ * SCCs will be marked using the scc_id on each state.
+ *
+ * @param a the automaton for which SCCs should be computed and assigned.
+ */
+static void
+scc_tarjan (struct GNUNET_REGEX_Automaton *a)
+{
+ unsigned int index;
+ unsigned int scc_counter;
+ struct GNUNET_REGEX_State *v;
+ struct GNUNET_REGEX_State *stack[a->state_count];
+ unsigned int stack_size;
+
+ for (v = a->states_head; NULL != v; v = v->next)
+ {
+ v->contained = 0;
+ v->index = -1;
+ v->lowlink = -1;
+ }
+
+ stack_size = 0;
+ index = 0;
+ scc_counter = 0;
+
+ for (v = a->states_head; NULL != v; v = v->next)
+ {
+ if (v->index < 0)
+ scc_tarjan_strongconnect (&scc_counter, v, &index, stack, &stack_size);
+ }
+}
+
+
+/**
+ * Save a state to an open file pointer. cls is expected to be a file pointer to
+ * an open file. Used only in conjunction with
+ * GNUNET_REGEX_automaton_save_graph.
+ *
+ * @param cls file pointer.
+ * @param count current count of the state, not used.
+ * @param s state.
+ */
+void
+GNUNET_REGEX_automaton_save_graph_step (void *cls, unsigned int count,
+ struct GNUNET_REGEX_State *s)
+{
+ struct GNUNET_REGEX_Graph_Context *ctx = cls;
+ struct GNUNET_REGEX_Transition *ctran;
+ char *s_acc = NULL;
+ char *s_tran = NULL;
+ char *name;
+ char *to_name;
+
+ if (GNUNET_YES == ctx->verbose)
+ GNUNET_asprintf (&name, "%i (%s) (%s) (%s)", s->dfs_id, s->name, s->proof,
+ GNUNET_h2s (&s->hash));
+ else
+ GNUNET_asprintf (&name, "%i", s->dfs_id);
+
+ if (s->accepting)
+ {
+ if (GNUNET_YES == ctx->coloring)
+ {
+ GNUNET_asprintf (&s_acc,
+ "\"%s\" [shape=doublecircle, color=\"0.%i 0.8 0.95\"];\n",
+ name, s->scc_id * s->scc_id);
+ }
+ else
+ {
+ GNUNET_asprintf (&s_acc, "\"%s\" [shape=doublecircle];\n", name,
+ s->scc_id);
+ }
+ }
+ else if (GNUNET_YES == ctx->coloring)
+ {
+ GNUNET_asprintf (&s_acc,
+ "\"%s\" [shape=circle, color=\"0.%i 0.8 0.95\"];\n", name,
+ s->scc_id * s->scc_id);
+ }
+ else
+ {
+ GNUNET_asprintf (&s_acc, "\"%s\" [shape=circle];\n", name, s->scc_id);
+ }
+
+ GNUNET_assert (NULL != s_acc);
+
+ fwrite (s_acc, strlen (s_acc), 1, ctx->filep);
+ GNUNET_free (s_acc);
+ s_acc = NULL;
+
+ for (ctran = s->transitions_head; NULL != ctran; ctran = ctran->next)
+ {
+ if (NULL == ctran->to_state)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "Transition from State %i has no state for transitioning\n",
+ s->id);
+ continue;
+ }
+
+ if (GNUNET_YES == ctx->verbose)
+ {
+ GNUNET_asprintf (&to_name, "%i (%s) (%s) (%s)", ctran->to_state->dfs_id,
+ ctran->to_state->name, ctran->to_state->proof,
+ GNUNET_h2s (&ctran->to_state->hash));
+ }
+ else
+ GNUNET_asprintf (&to_name, "%i", ctran->to_state->dfs_id);
+
+ if (NULL == ctran->label)
+ {
+ if (GNUNET_YES == ctx->coloring)
+ {
+ GNUNET_asprintf (&s_tran,
+ "\"%s\" -> \"%s\" [label = \"ε\", color=\"0.%i 0.8 0.95\"];\n",
+ name, to_name, s->scc_id * s->scc_id);
+ }
+ else
+ {
+ GNUNET_asprintf (&s_tran, "\"%s\" -> \"%s\" [label = \"ε\"];\n", name,
+ to_name, s->scc_id);
+ }
+ }
+ else
+ {
+ if (GNUNET_YES == ctx->coloring)
+ {
+ GNUNET_asprintf (&s_tran,
+ "\"%s\" -> \"%s\" [label = \"%s\", color=\"0.%i 0.8 0.95\"];\n",
+ name, to_name, ctran->label, s->scc_id * s->scc_id);
+ }
+ else
+ {
+ GNUNET_asprintf (&s_tran, "\"%s\" -> \"%s\" [label = \"%s\"];\n", name,
+ to_name, ctran->label, s->scc_id);
+ }
+ }
+
+ GNUNET_free (to_name);
+
+ GNUNET_assert (NULL != s_tran);
+
+ fwrite (s_tran, strlen (s_tran), 1, ctx->filep);
+ GNUNET_free (s_tran);
+ s_tran = NULL;
+ }
+
+ GNUNET_free (name);
+}
+
+
+/**
+ * Save the given automaton as a GraphViz dot file.
+ *
+ * @param a the automaton to be saved.
+ * @param filename where to save the file.
+ * @param options options for graph generation that include coloring or verbose
+ * mode
+ */
+void
+GNUNET_REGEX_automaton_save_graph (struct GNUNET_REGEX_Automaton *a,
+ const char *filename,
+ enum GNUNET_REGEX_GraphSavingOptions options)
+{
+ char *start;
+ char *end;
+ struct GNUNET_REGEX_Graph_Context ctx;
+
+ if (NULL == a)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print NFA, was NULL!");
+ return;
+ }
+
+ if (NULL == filename || strlen (filename) < 1)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "No Filename given!");
+ return;
+ }
+
+ ctx.filep = fopen (filename, "w");
+ ctx.verbose =
+ (0 == (options & GNUNET_REGEX_GRAPH_VERBOSE)) ? GNUNET_NO : GNUNET_YES;
+ ctx.coloring =
+ (0 == (options & GNUNET_REGEX_GRAPH_COLORING)) ? GNUNET_NO : GNUNET_YES;
+
+ if (NULL == ctx.filep)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not open file for writing: %s",
+ filename);
+ return;
+ }
+
+ /* First add the SCCs to the automaton, so we can color them nicely */
+ if (GNUNET_YES == ctx.coloring)
+ scc_tarjan (a);
+
+ start = "digraph G {\nrankdir=LR\n";
+ fwrite (start, strlen (start), 1, ctx.filep);
+
+ GNUNET_REGEX_automaton_traverse (a, a->start, NULL, NULL,
+ &GNUNET_REGEX_automaton_save_graph_step,
+ &ctx);
+
+ end = "\n}\n";
+ fwrite (end, strlen (end), 1, ctx.filep);
+ fclose (ctx.filep);
+}
diff --git a/src/regex/regex_internal.h b/src/regex/regex_internal.h
new file mode 100644
index 0000000..00badc5
--- /dev/null
+++ b/src/regex/regex_internal.h
@@ -0,0 +1,484 @@
+/*
+ This file is part of GNUnet
+ (C) 2012 Christian Grothoff (and other contributing authors)
+
+ GNUnet is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3, or (at your
+ option) any later version.
+
+ GNUnet is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GNUnet; see the file COPYING. If not, write to the
+ Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA.
+*/
+/**
+ * @file src/regex/regex_internal.h
+ * @brief common internal definitions for regex library.
+ * @author Maximilian Szengel
+ */
+#ifndef REGEX_INTERNAL_H
+#define REGEX_INTERNAL_H
+
+#include "gnunet_regex_lib.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#if 0 /* keep Emacsens' auto-indent happy */
+}
+#endif
+#endif
+
+/**
+ * char array of literals that are allowed inside a regex (apart from the
+ * operators)
+ */
+#define ALLOWED_LITERALS "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+
+
+/**
+ * Transition between two states. Transitions are stored at the states from
+ * which they origin ('from_state'). Each state can have 0-n transitions.
+ * If label is NULL, this is considered to be an epsilon transition.
+ */
+struct GNUNET_REGEX_Transition
+{
+ /**
+ * This is a linked list.
+ */
+ struct GNUNET_REGEX_Transition *prev;
+
+ /**
+ * This is a linked list.
+ */
+ struct GNUNET_REGEX_Transition *next;
+
+ /**
+ * Unique id of this transition.
+ */
+ unsigned int id;
+
+ /**
+ * Label for this transition. This is basically the edge label for the graph.
+ */
+ char *label;
+
+ /**
+ * State to which this transition leads.
+ */
+ struct GNUNET_REGEX_State *to_state;
+
+ /**
+ * State from which this transition origins.
+ */
+ struct GNUNET_REGEX_State *from_state;
+};
+
+
+/**
+ * A state. Can be used in DFA and NFA automatons.
+ */
+struct GNUNET_REGEX_State;
+
+
+/**
+ * Set of states.
+ */
+struct GNUNET_REGEX_StateSet
+{
+ /**
+ * Array of states.
+ */
+ struct GNUNET_REGEX_State **states;
+
+ /**
+ * Number of entries in *use* in the 'states' array.
+ */
+ unsigned int off;
+
+ /**
+ * Length of the 'states' array.
+ */
+ unsigned int size;
+};
+
+
+/**
+ * A state. Can be used in DFA and NFA automatons.
+ */
+struct GNUNET_REGEX_State
+{
+ /**
+ * This is a linked list to keep states in an automaton.
+ */
+ struct GNUNET_REGEX_State *prev;
+
+ /**
+ * This is a linked list to keep states in an automaton.
+ */
+ struct GNUNET_REGEX_State *next;
+
+ /**
+ * This is a multi DLL for StateSet_MDLL.
+ */
+ struct GNUNET_REGEX_State *prev_SS;
+
+ /**
+ * This is a multi DLL for StateSet_MDLL.
+ */
+ struct GNUNET_REGEX_State *next_SS;
+
+ /**
+ * This is a multi DLL for StateSet_MDLL Stack.
+ */
+ struct GNUNET_REGEX_State *prev_ST;
+
+ /**
+ * This is a multi DLL for StateSet_MDLL Stack.
+ */
+ struct GNUNET_REGEX_State *next_ST;
+
+ /**
+ * Unique state id.
+ */
+ unsigned int id;
+
+ /**
+ * Unique state id that is used for traversing the automaton. It is guaranteed
+ * to be > 0 and < state_count.
+ */
+ unsigned int traversal_id;
+
+ /**
+ * If this is an accepting state or not.
+ */
+ int accepting;
+
+ /**
+ * Marking of the state. This is used for marking all visited states when
+ * traversing all states of an automaton and for cases where the state id
+ * cannot be used (dfa minimization).
+ */
+ int marked;
+
+ /**
+ * Marking the state as contained. This is used for checking, if the state is
+ * contained in a set in constant time.
+ */
+ int contained;
+
+ /**
+ * Marking the state as part of an SCC (Strongly Connected Component). All
+ * states with the same scc_id are part of the same SCC. scc_id is 0, if state
+ * is not a part of any SCC.
+ */
+ unsigned int scc_id;
+
+ /**
+ * Used for SCC detection.
+ */
+ int index;
+
+ /**
+ * Used for SCC detection.
+ */
+ int lowlink;
+
+ /**
+ * Human readable name of the state. Used for debugging and graph
+ * creation.
+ */
+ char *name;
+
+ /**
+ * Hash of the state.
+ */
+ struct GNUNET_HashCode hash;
+
+ /**
+ * Linear state ID accquired by depth-first-search. This ID should be used for
+ * storing information about the state in an array, because the 'id' of the
+ * state is not guaranteed to be linear. The 'dfs_id' is guaranteed to be > 0
+ * and < 'state_count'.
+ */
+ unsigned int dfs_id;
+
+ /**
+ * Proof for this state.
+ */
+ char *proof;
+
+ /**
+ * Number of transitions from this state to other states.
+ */
+ unsigned int transition_count;
+
+ /**
+ * DLL of transitions.
+ */
+ struct GNUNET_REGEX_Transition *transitions_head;
+
+ /**
+ * DLL of transitions.
+ */
+ struct GNUNET_REGEX_Transition *transitions_tail;
+
+ /**
+ * Number of incoming transitions. Used for compressing DFA paths.
+ */
+ unsigned int incoming_transition_count;
+
+ /**
+ * Set of states on which this state is based on. Used when creating a DFA out
+ * of several NFA states.
+ */
+ struct GNUNET_REGEX_StateSet nfa_set;
+};
+
+
+/**
+ * Type of an automaton.
+ */
+enum GNUNET_REGEX_AutomatonType
+{
+ NFA,
+ DFA
+};
+
+
+/**
+ * Automaton representation.
+ */
+struct GNUNET_REGEX_Automaton
+{
+ /**
+ * Linked list of NFAs used for partial NFA creation.
+ */
+ struct GNUNET_REGEX_Automaton *prev;
+
+ /**
+ * Linked list of NFAs used for partial NFA creation.
+ */
+ struct GNUNET_REGEX_Automaton *next;
+
+ /**
+ * First state of the automaton. This is mainly used for constructing an NFA,
+ * where each NFA itself consists of one or more NFAs linked together.
+ */
+ struct GNUNET_REGEX_State *start;
+
+ /**
+ * End state of the partial NFA. This is undefined for DFAs
+ */
+ struct GNUNET_REGEX_State *end;
+
+ /**
+ * Number of states in the automaton.
+ */
+ unsigned int state_count;
+
+ /**
+ * DLL of states.
+ */
+ struct GNUNET_REGEX_State *states_head;
+
+ /**
+ * DLL of states
+ */
+ struct GNUNET_REGEX_State *states_tail;
+
+ /**
+ * Type of the automaton.
+ */
+ enum GNUNET_REGEX_AutomatonType type;
+
+ /**
+ * Regex
+ */
+ char *regex;
+
+ /**
+ * Canonical regex (result of RX->NFA->DFA->RX)
+ */
+ char *canonical_regex;
+
+ /**
+ * GNUNET_YES, if multi strides have been added to the Automaton.
+ */
+ int is_multistrided;
+};
+
+
+/**
+ * Construct an NFA by parsing the regex string of length 'len'.
+ *
+ * @param regex regular expression string.
+ * @param len length of the string.
+ *
+ * @return NFA, needs to be freed using GNUNET_REGEX_automaton_destroy.
+ */
+struct GNUNET_REGEX_Automaton *
+GNUNET_REGEX_construct_nfa (const char *regex, const size_t len);
+
+
+/**
+ * Function that get's passed to automaton traversal and is called before each
+ * next traversal from state 's' using transition 't' to check if traversal
+ * should proceed. Return GNUNET_NO to stop traversal or GNUNET_YES to continue.
+ *
+ * @param cls closure for the check.
+ * @param s current state in the traversal.
+ * @param t current transition from state 's' that will be used for the next
+ * step.
+ *
+ * @return GNUNET_YES to proceed traversal, GNUNET_NO to stop.
+ */
+typedef int (*GNUNET_REGEX_traverse_check) (void *cls,
+ struct GNUNET_REGEX_State * s,
+ struct GNUNET_REGEX_Transition * t);
+
+
+/**
+ * Function that is called with each state, when traversing an automaton.
+ *
+ * @param cls closure.
+ * @param count current count of the state, from 0 to a->state_count -1.
+ * @param s state.
+ */
+typedef void (*GNUNET_REGEX_traverse_action) (void *cls,
+ const unsigned int count,
+ struct GNUNET_REGEX_State * s);
+
+
+/**
+ * Traverses the given automaton using depth-first-search (DFS) from it's start
+ * state, visiting all reachable states and calling 'action' on each one of
+ * them.
+ *
+ * @param a automaton to be traversed.
+ * @param start start state, pass a->start or NULL to traverse the whole automaton.
+ * @param check function that is checked before advancing on each transition
+ * in the DFS.
+ * @param check_cls closure for check.
+ * @param action action to be performed on each state.
+ * @param action_cls closure for action
+ */
+void
+GNUNET_REGEX_automaton_traverse (const struct GNUNET_REGEX_Automaton *a,
+ struct GNUNET_REGEX_State *start,
+ GNUNET_REGEX_traverse_check check,
+ void *check_cls,
+ GNUNET_REGEX_traverse_action action,
+ void *action_cls);
+
+/**
+ * Get the canonical regex of the given automaton.
+ * When constructing the automaton a proof is computed for each state,
+ * consisting of the regular expression leading to this state. A complete
+ * regex for the automaton can be computed by combining these proofs.
+ * As of now this function is only useful for testing.
+ *
+ * @param a automaton for which the canonical regex should be returned.
+ *
+ * @return canonical regex string.
+ */
+const char *
+GNUNET_REGEX_get_canonical_regex (struct GNUNET_REGEX_Automaton *a);
+
+
+/**
+ * Get the number of transitions that are contained in the given automaton.
+ *
+ * @param a automaton for which the number of transitions should be returned.
+ *
+ * @return number of transitions in the given automaton.
+ */
+unsigned int
+GNUNET_REGEX_get_transition_count (struct GNUNET_REGEX_Automaton *a);
+
+
+/**
+ * Context that contains an id counter for states and transitions as well as a
+ * DLL of automatons used as a stack for NFA construction.
+ */
+struct GNUNET_REGEX_Context
+{
+ /**
+ * Unique state id.
+ */
+ unsigned int state_id;
+
+ /**
+ * Unique transition id.
+ */
+ unsigned int transition_id;
+
+ /**
+ * DLL of GNUNET_REGEX_Automaton's used as a stack.
+ */
+ struct GNUNET_REGEX_Automaton *stack_head;
+
+ /**
+ * DLL of GNUNET_REGEX_Automaton's used as a stack.
+ */
+ struct GNUNET_REGEX_Automaton *stack_tail;
+};
+
+
+/**
+ * Adds multi-strided transitions to the given 'dfa'.
+ *
+ * @param regex_ctx regex context needed to add transitions to the automaton.
+ * @param dfa DFA to which the multi strided transitions should be added.
+ * @param stride_len length of the strides.
+ */
+void
+GNUNET_REGEX_dfa_add_multi_strides (struct GNUNET_REGEX_Context *regex_ctx,
+ struct GNUNET_REGEX_Automaton *dfa,
+ const unsigned int stride_len);
+
+
+/**
+ * Generate a (pseudo) random regular expression of length 'rx_length', as well
+ * as a (optional) string that will be matched by the generated regex. The
+ * returned regex needs to be freed.
+ *
+ * @param rx_length length of the random regex.
+ * @param matching_str (optional) pointer to a string that will contain a string
+ * that will be matched by the generated regex, if
+ * 'matching_str' pointer was not NULL.
+ *
+ * @return NULL if 'rx_length' is 0, a random regex of length 'rx_length', which
+ * needs to be freed, otherwise.
+ */
+char *
+GNUNET_REGEX_generate_random_regex (size_t rx_length, char *matching_str);
+
+
+/**
+ * Generate a random string of maximum length 'max_len' that only contains literals allowed
+ * in a regular expression. The string might be 0 chars long but is garantueed
+ * to be shorter or equal to 'max_len'.
+ *
+ * @param max_len maximum length of the string that should be generated.
+ *
+ * @return random string that needs to be freed.
+ */
+char *
+GNUNET_REGEX_generate_random_string (size_t max_len);
+
+
+#if 0 /* keep Emacsens' auto-indent happy */
+{
+#endif
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/regex/regex_random.c b/src/regex/regex_random.c
new file mode 100644
index 0000000..eee0c73
--- /dev/null
+++ b/src/regex/regex_random.c
@@ -0,0 +1,170 @@
+/*
+ This file is part of GNUnet
+ (C) 2012 Christian Grothoff (and other contributing authors)
+
+ GNUnet is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3, or (at your
+ option) any later version.
+
+ GNUnet is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GNUnet; see the file COPYING. If not, write to the
+ Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA.
+*/
+/**
+ * @file src/regex/regex_random.c
+ * @brief functions for creating random regular expressions and strings
+ * @author Maximilian Szengel
+ */
+#include "platform.h"
+#include "gnunet_regex_lib.h"
+#include "gnunet_crypto_lib.h"
+#include "regex_internal.h"
+
+
+/**
+ * Get a (pseudo) random valid literal for building a regular expression.
+ *
+ * @return random valid literal
+ */
+char
+get_random_literal ()
+{
+ uint32_t ridx;
+
+ ridx =
+ GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK,
+ (uint32_t) strlen (ALLOWED_LITERALS));
+
+ return ALLOWED_LITERALS[ridx];
+}
+
+
+/**
+ * Generate a (pseudo) random regular expression of length 'rx_length', as well
+ * as a (optional) string that will be matched by the generated regex. The
+ * returned regex needs to be freed.
+ *
+ * @param rx_length length of the random regex.
+ * @param matching_str (optional) pointer to a string that will contain a string
+ * that will be matched by the generated regex, if
+ * 'matching_str' pointer was not NULL. Make sure you
+ * allocated at least rx_length+1 bytes for this sting.
+ *
+ * @return NULL if 'rx_length' is 0, a random regex of length 'rx_length', which
+ * needs to be freed, otherwise.
+ */
+char *
+GNUNET_REGEX_generate_random_regex (size_t rx_length, char *matching_str)
+{
+ char *rx;
+ char *rx_p;
+ char *matching_strp;
+ unsigned int i;
+ unsigned int char_op_switch;
+ unsigned int last_was_op;
+ int rx_op;
+ char current_char;
+
+ if (0 == rx_length)
+ return NULL;
+
+ if (NULL != matching_str)
+ matching_strp = matching_str;
+ else
+ matching_strp = NULL;
+
+ rx = GNUNET_malloc (rx_length + 1);
+ rx_p = rx;
+ current_char = 0;
+ last_was_op = 1;
+
+ for (i = 0; i < rx_length; i++)
+ {
+ char_op_switch = GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, 2);
+
+ if (0 == char_op_switch && !last_was_op)
+ {
+ last_was_op = 1;
+ rx_op = GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, 4);
+
+ switch (rx_op)
+ {
+ case 0:
+ current_char = '+';
+ break;
+ case 1:
+ current_char = '*';
+ break;
+ case 2:
+ current_char = '?';
+ break;
+ case 3:
+ if (i < rx_length - 1) /* '|' cannot be at the end */
+ current_char = '|';
+ else
+ current_char = get_random_literal ();
+ break;
+ }
+ }
+ else
+ {
+ current_char = get_random_literal ();
+ last_was_op = 0;
+ }
+
+ if (NULL != matching_strp &&
+ (current_char != '+' && current_char != '*' && current_char != '?' &&
+ current_char != '|'))
+ {
+ *matching_strp = current_char;
+ matching_strp++;
+ }
+
+ *rx_p = current_char;
+ rx_p++;
+ }
+ *rx_p = '\0';
+ if (NULL != matching_strp)
+ *matching_strp = '\0';
+
+ return rx;
+}
+
+/**
+ * Generate a random string of maximum length 'max_len' that only contains literals allowed
+ * in a regular expression. The string might be 0 chars long but is garantueed
+ * to be shorter or equal to 'max_len'.
+ *
+ * @param max_len maximum length of the string that should be generated.
+ *
+ * @return random string that needs to be freed.
+ */
+char *
+GNUNET_REGEX_generate_random_string (size_t max_len)
+{
+ unsigned int i;
+ char *str;
+ size_t len;
+
+ if (1 > max_len)
+ return GNUNET_strdup ("");
+
+ len = (size_t) GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, max_len);
+ str = GNUNET_malloc (len + 1);
+
+ for (i = 0; i < len; i++)
+ {
+ str[i] = get_random_literal ();
+ }
+
+ str[i] = '\0';
+
+ return str;
+}
diff --git a/src/regex/regex_simulation_profiler_test.conf b/src/regex/regex_simulation_profiler_test.conf
new file mode 100644
index 0000000..9384aa2
--- /dev/null
+++ b/src/regex/regex_simulation_profiler_test.conf
@@ -0,0 +1,7 @@
+[regex-mysql]
+DATABASE = regex
+USER = gnunet
+PASSWORD =
+HOST = localhost
+PORT = 3306
+REGEX_PREFIX = GNVPN-0001-PAD
diff --git a/src/regex/regex_test_lib.c b/src/regex/regex_test_lib.c
new file mode 100644
index 0000000..7a11fe0
--- /dev/null
+++ b/src/regex/regex_test_lib.c
@@ -0,0 +1,291 @@
+/*
+ * This file is part of GNUnet
+ * (C) 2012 Christian Grothoff (and other contributing authors)
+ *
+ * GNUnet is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your
+ * option) any later version.
+ *
+ * GNUnet is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNUnet; see the file COPYING. If not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+/**
+ * @file src/regex/regex_test_lib.c
+ * @brief library to read regexes representing IP networks from a file.
+ * and simplyfinying the into one big regex, in order to run
+ * tests (regex performance, mesh profiler).
+ * @author Bartlomiej Polot
+ */
+
+#include "platform.h"
+#include "gnunet_util_lib.h"
+
+struct RegexCombineCtx {
+ struct RegexCombineCtx *next;
+ struct RegexCombineCtx *prev;
+
+ struct RegexCombineCtx *head;
+ struct RegexCombineCtx *tail;
+
+ char *s;
+};
+
+
+/**
+ * Extract a string from all prefix-combined regexes.
+ *
+ * @param ctx Context with 0 or more regexes.
+ *
+ * @return Regex that matches any of the added regexes.
+ */
+static char *
+regex_combine (struct RegexCombineCtx *ctx)
+{
+ struct RegexCombineCtx *p;
+ size_t len;
+ char *regex;
+ char *tmp;
+ char *s;
+
+ if (NULL != ctx->s)
+ GNUNET_asprintf (&regex, "%s(", ctx->s);
+ else
+ regex = GNUNET_strdup ("(");
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "prefix: %s\n", regex);
+
+ for (p = ctx->head; NULL != p; p = p->next)
+ {
+ s = regex_combine (p);
+ GNUNET_asprintf (&tmp, "%s%s|", regex, s);
+ GNUNET_free_non_null (s);
+ GNUNET_free_non_null (regex);
+ regex = tmp;
+ }
+ len = strlen (regex);
+ if (1 == len)
+ {
+ GNUNET_free (regex);
+ return GNUNET_strdup ("");
+ }
+
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "pre-partial: %s\n", regex);
+ if ('|' == regex[len - 1])
+ regex[len - 1] = ')';
+ if ('(' == regex[len - 1])
+ regex[len - 1] = '\0';
+
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "partial: %s\n", regex);
+ return regex;
+}
+
+
+/**
+ * Add a single regex to a context, combining with exisiting regex by-prefix.
+ *
+ * @param ctx Context with 0 or more regexes.
+ * @param regex Regex to add.
+ */
+static void
+regex_add (struct RegexCombineCtx *ctx, const char *regex)
+{
+ struct RegexCombineCtx *p;
+ const char *rest;
+
+ rest = &regex[1];
+ for (p = ctx->head; NULL != p; p = p->next)
+ {
+ if (p->s[0] == regex[0])
+ {
+ if (1 == strlen(p->s))
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "common char %s\n", p->s);
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "adding %s\n", rest);
+ regex_add (p, rest);
+ }
+ else
+ {
+ struct RegexCombineCtx *new;
+ new = GNUNET_malloc (sizeof (struct RegexCombineCtx));
+ new->s = GNUNET_strdup (&p->s[1]);
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " p has now %s\n", p->s);
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " p will have %.1s\n", p->s);
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " regex is %s\n", regex);
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " new has now %s\n", new->s);
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " rest is now %s\n", rest);
+ p->s[1] = '\0'; /* dont realloc */
+ GNUNET_CONTAINER_DLL_insert (p->head, p->tail, new);
+ regex_add (p, rest);
+ }
+ return;
+ }
+ }
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " no match\n");
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " new state %s\n", regex);
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " under %s\n", ctx->s);
+ p = GNUNET_malloc (sizeof (struct RegexCombineCtx));
+ p->s = GNUNET_strdup (regex);
+ GNUNET_CONTAINER_DLL_insert (ctx->head, ctx->tail, p);
+}
+
+
+/**
+ * Free all resources used by the context node and all its children.
+ *
+ * @param ctx Context to free.
+ */
+static void
+regex_ctx_destroy (struct RegexCombineCtx *ctx)
+{
+ struct RegexCombineCtx *p;
+ struct RegexCombineCtx *next;
+
+ for (p = ctx->head; NULL != p; p = next)
+ {
+ next = p->next;
+ regex_ctx_destroy (p);
+ }
+ GNUNET_free (ctx->s);
+ GNUNET_free (ctx);
+}
+
+
+/**
+ * Return a prefix-combine regex that matches the same strings as
+ * any of the original regexes.
+ *
+ * WARNING: only useful for reading specific regexes for specific applications,
+ * namely the gnunet-regex-profiler / gnunet-regex-daemon.
+ * This function DOES NOT support arbitrary regex combining.
+ */
+char *
+GNUNET_REGEX_combine (char * const regexes[])
+{
+ unsigned int i;
+ char *combined;
+ const char *current;
+ struct RegexCombineCtx *ctx;
+
+ ctx = GNUNET_malloc (sizeof (struct RegexCombineCtx));
+ for (i = 0; regexes[i]; i++)
+ {
+ current = regexes[i];
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Regex %u: %s\n", i, current);
+ regex_add (ctx, current);
+ }
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "\nCombining...\n");
+
+ combined = regex_combine (ctx);
+
+ regex_ctx_destroy (ctx);
+
+ return combined;
+}
+
+
+/**
+ * Read a set of regexes from a file, one per line and return them in an array
+ * suitable for GNUNET_REGEX_combine.
+ * The array must be free'd using GNUNET_REGEX_free_from_file.
+ *
+ * @param filename Name of the file containing the regexes.
+ *
+ * @return A newly allocated, NULL terminated array of regexes.
+ */
+char **
+GNUNET_REGEX_read_from_file (const char *filename)
+{
+ struct GNUNET_DISK_FileHandle *f;
+ unsigned int nr;
+ unsigned int offset;
+ off_t size;
+ size_t len;
+ char *buffer;
+ char *regex;
+ char **regexes;
+
+ f = GNUNET_DISK_file_open (filename,
+ GNUNET_DISK_OPEN_READ,
+ GNUNET_DISK_PERM_NONE);
+ if (NULL == f)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "Can't open file %s for reading\n", filename);
+ return NULL;
+ }
+ if (GNUNET_OK != GNUNET_DISK_file_handle_size (f, &size))
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "Can't get size of file %s\n", filename);
+ GNUNET_DISK_file_close (f);
+ return NULL;
+ }
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
+ "using file %s, size %llu\n",
+ filename, (unsigned long long) size);
+
+ buffer = GNUNET_malloc (size + 1);
+ GNUNET_DISK_file_read (f, buffer, size);
+ GNUNET_DISK_file_close (f);
+ regexes = GNUNET_malloc (sizeof (char *));
+ nr = 1;
+ offset = 0;
+ regex = NULL;
+ do
+ {
+ if (NULL == regex)
+ regex = GNUNET_malloc (size + 1);
+ len = (size_t) sscanf (&buffer[offset], "%s", regex);
+ if (0 == len)
+ break;
+ len = strlen (regex);
+ offset += len + 1;
+ if (len < 1)
+ continue;
+ if (len < 6 || strncmp (&regex[len - 6], "(0|1)*", 6) != 0)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
+ "%s (line %u) does not end in \"(0|1)*\"\n",
+ buffer, nr);
+ }
+ else
+ {
+ len -= 6;
+ buffer[len] = '\0';
+ }
+ regex = GNUNET_realloc (regex, len + 1);
+ GNUNET_array_grow (regexes, nr, nr + 1);
+ regexes[nr - 2] = regex;
+ regexes[nr - 1] = NULL;
+ regex = NULL;
+ } while (offset < size);
+ GNUNET_free_non_null (regex);
+ GNUNET_free (buffer);
+
+ return regexes;
+}
+
+
+/**
+ * Free all memory reserved for a set of regexes created by read_from_file.
+ *
+ * @param regexes NULL-terminated array of regexes.
+ */
+void
+GNUNET_REGEX_free_from_file (char **regexes)
+{
+ unsigned int i;
+
+ for (i = 0; regexes[i]; i++)
+ GNUNET_free (regexes[i]);
+ GNUNET_free (regexes);
+}
+
+/* end of regex_test_lib.c */ \ No newline at end of file
diff --git a/src/regex/regex_test_lib.h b/src/regex/regex_test_lib.h
new file mode 100644
index 0000000..b21af2e
--- /dev/null
+++ b/src/regex/regex_test_lib.h
@@ -0,0 +1,80 @@
+/*
+ * This file is part of GNUnet
+ * (C) 2012 Christian Grothoff (and other contributing authors)
+ *
+ * GNUnet is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your
+ * option) any later version.
+ *
+ * GNUnet is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNUnet; see the file COPYING. If not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+/**
+ * @file src/regex/regex_test_lib.h
+ * @brief library to read regexes representing IP networks from a file.
+ * and simplyfinying the into one big regex, in order to run
+ * tests (regex performance, mesh profiler).
+ * @author Bertlomiej Polot
+ *
+ */
+
+#ifndef GNUNET_REGEX_TEST_LIB_H
+#define GNUNET_REGEX_TEST_LIB_H
+
+
+#ifdef __cplusplus
+extern "C"
+{
+ #if 0 /* keep Emacsens' auto-indent happy */
+}
+#endif
+#endif
+
+/**
+ * Combine an array of regexes into a single prefix-shared regex.
+ *
+ * @param regexes A NULL-terminated array of regexes.
+ *
+ * @retrun A string with a single regex that matches any of the original regexes
+ */
+char *
+GNUNET_REGEX_combine(char * const regexes[]);
+
+/**
+ * Read a set of regexes from a file, one per line and return them in an array
+ * suitable for GNUNET_REGEX_combine.
+ * The array must be free'd using GNUNET_REGEX_free_from_file.
+ *
+ * @param filename Name of the file containing the regexes.
+ *
+ * @return A newly allocated, NULL terminated array of regexes.
+ */
+char **
+GNUNET_REGEX_read_from_file (const char *filename);
+
+
+/**
+ * Free all memory reserved for a set of regexes created by read_from_file.
+ *
+ * @param regexes NULL-terminated array of regexes.
+ */
+void
+GNUNET_REGEX_free_from_file (char **regexes);
+
+#if 0 /* keep Emacsens' auto-indent happy */
+{
+ #endif
+ #ifdef __cplusplus
+}
+#endif
+
+/* end of gnunet_regex_lib.h */
+#endif \ No newline at end of file
diff --git a/src/regex/test_regex_eval_api.c b/src/regex/test_regex_eval_api.c
index c63e97c..ce6f923 100644
--- a/src/regex/test_regex_eval_api.c
+++ b/src/regex/test_regex_eval_api.c
@@ -26,6 +26,7 @@
#include <time.h>
#include "platform.h"
#include "gnunet_regex_lib.h"
+#include "regex_internal.h"
enum Match_Result
{
@@ -41,148 +42,155 @@ struct Regex_String_Pair
enum Match_Result expected_results[20];
};
-static const char allowed_literals[] =
- "0123456789" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz";
+/**
+ * Random regex test. Generate a random regex as well as 'str_count' strings to
+ * match it against. Will match using GNUNET_REGEX implementation and compare
+ * the result to glibc regex result. 'rx_length' has to be smaller then
+ * 'max_str_len'.
+ *
+ * @param rx_length length of the regular expression.
+ * @param max_str_len maximum length of the random strings.
+ * @param str_count number of generated random strings.
+ *
+ * @return 0 on success, non 0 otherwise.
+ */
int
test_random (unsigned int rx_length, unsigned int max_str_len,
unsigned int str_count)
{
- int i;
- int j;
- int rx_exp;
- char rand_rx[rx_length + 1];
- char matching_str[str_count][max_str_len + 1];
- char *rand_rxp;
- char *matching_strp;
- int char_op_switch;
- int last_was_op;
- char current_char;
+ unsigned int i;
+ char *rand_rx;
+ char *matching_str;
int eval;
int eval_check;
+ int eval_canonical;
+ int eval_canonical_check;
struct GNUNET_REGEX_Automaton *dfa;
regex_t rx;
regmatch_t matchptr[1];
char error[200];
int result;
- unsigned int str_len;
+ char *canonical_regex = NULL;
- // At least one string is needed for matching
+ /* At least one string is needed for matching */
GNUNET_assert (str_count > 0);
- // The string should be at least as long as the regex itself
+ /* The string should be at least as long as the regex itself */
GNUNET_assert (max_str_len >= rx_length);
- rand_rxp = rand_rx;
- matching_strp = matching_str[0];
- current_char = 0;
- last_was_op = 1;
+ /* Generate random regex and a string that matches the regex */
+ matching_str = GNUNET_malloc (rx_length + 1);
+ rand_rx = GNUNET_REGEX_generate_random_regex (rx_length, matching_str);
- // Generate random regex and a string that matches the regex
- for (i = 0; i < rx_length; i++)
+ /* Now match */
+ result = 0;
+ for (i = 0; i < str_count; i++)
{
- char_op_switch = 0 + (int) (1.0 * rand () / (RAND_MAX + 1.0));
-
- if (0 == char_op_switch && !last_was_op)
+ if (0 < i)
{
- last_was_op = 1;
- rx_exp = rand () % 4;
-
- switch (rx_exp)
- {
- case 0:
- current_char = '+';
- break;
- case 1:
- current_char = '*';
- break;
- case 2:
- current_char = '?';
- break;
- case 3:
- if (i < rx_length - 1) // '|' cannot be at the end
- current_char = '|';
- else
- current_char =
- allowed_literals[rand () % (sizeof (allowed_literals) - 1)];
- break;
- }
+ matching_str = GNUNET_REGEX_generate_random_string (max_str_len);
}
- else
+
+ /* Match string using DFA */
+ dfa = GNUNET_REGEX_construct_dfa (rand_rx, strlen (rand_rx), 0);
+ if (NULL == dfa)
{
- current_char =
- allowed_literals[rand () % (sizeof (allowed_literals) - 1)];
- last_was_op = 0;
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Constructing DFA failed\n");
+ goto error;
}
- if (current_char != '+' && current_char != '*' && current_char != '?' &&
- current_char != '|')
+ eval = GNUNET_REGEX_eval (dfa, matching_str);
+ /* save the canonical regex for later comparison */
+ canonical_regex = GNUNET_strdup (GNUNET_REGEX_get_canonical_regex (dfa));
+ GNUNET_REGEX_automaton_destroy (dfa);
+
+ /* Match string using glibc regex */
+ if (0 != regcomp (&rx, rand_rx, REG_EXTENDED))
{
- *matching_strp = current_char;
- matching_strp++;
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "Could not compile regex using regcomp: %s\n", rand_rx);
+ goto error;
}
- *rand_rxp = current_char;
- rand_rxp++;
- }
- *rand_rxp = '\0';
- *matching_strp = '\0';
+ eval_check = regexec (&rx, matching_str, 1, matchptr, 0);
+ regfree (&rx);
- // Generate some random strings for matching...
- // Start at 1, because the first string is generated above during regex generation
- for (i = 1; i < str_count; i++)
- {
- str_len = rand () % max_str_len;
- for (j = 0; j < str_len; j++)
- matching_str[i][j] =
- allowed_literals[rand () % (sizeof (allowed_literals) - 1)];
- matching_str[i][str_len] = '\0';
- }
+ /* We only want to match the whole string, because that's what our DFA does,
+ * too. */
+ if (eval_check == 0 &&
+ (matchptr[0].rm_so != 0 || matchptr[0].rm_eo != strlen (matching_str)))
+ eval_check = 1;
- // Now match
- result = 0;
- for (i = 0; i < str_count; i++)
- {
- // Match string using DFA
- dfa = GNUNET_REGEX_construct_dfa (rand_rx, strlen (rand_rx));
+ /* Match canonical regex */
+ dfa =
+ GNUNET_REGEX_construct_dfa (canonical_regex, strlen (canonical_regex),
+ 0);
if (NULL == dfa)
{
GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Constructing DFA failed\n");
- return -1;
+ goto error;
}
- eval = GNUNET_REGEX_eval (dfa, matching_str[i]);
+ eval_canonical = GNUNET_REGEX_eval (dfa, matching_str);
GNUNET_REGEX_automaton_destroy (dfa);
- // Match string using glibc regex
- if (0 != regcomp (&rx, rand_rx, REG_EXTENDED))
+ if (0 != regcomp (&rx, canonical_regex, REG_EXTENDED))
{
GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
- "Could not compile regex using regcomp\n");
- return -1;
+ "Could not compile regex using regcomp: %s\n",
+ canonical_regex);
+ goto error;
}
- eval_check = regexec (&rx, matching_str[i], 1, matchptr, 0);
+ eval_canonical_check = regexec (&rx, matching_str, 1, matchptr, 0);
regfree (&rx);
- // We only want to match the whole string, because that's what our DFA does, too.
- if (eval_check == 0 &&
- (matchptr[0].rm_so != 0 ||
- matchptr[0].rm_eo != strlen (matching_str[i])))
- eval_check = 1;
+ /* We only want to match the whole string, because that's what our DFA does,
+ * too. */
+ if (eval_canonical_check == 0 &&
+ (matchptr[0].rm_so != 0 || matchptr[0].rm_eo != strlen (matching_str)))
+ eval_canonical_check = 1;
- // compare result
- if (eval_check != eval)
+ /* compare results */
+ if (eval_check != eval || eval_canonical != eval_canonical_check)
{
regerror (eval_check, &rx, error, sizeof error);
- GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
- "Unexpected result:\nregex: %s\nstring: %s\ngnunet regex: %i\nglibc regex: %i\nglibc error: %s\n\n",
- rand_rx, matching_str, eval, eval_check, error);
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Unexpected result:\nregex: %s\ncanonical_regex: %s\n\
+ string: %s\ngnunet regex: %i\nglibc regex: %i\n\
+ canonical regex: %i\ncanonical regex glibc: %i\n\
+ glibc error: %s\n\n", rand_rx, canonical_regex, matching_str,
+ eval, eval_check, eval_canonical, eval_canonical_check, error);
result += 1;
}
+ GNUNET_free (canonical_regex);
+ GNUNET_free (matching_str);
+ canonical_regex = NULL;
+ matching_str = NULL;
}
+
+ GNUNET_free (rand_rx);
+
return result;
+
+error:
+ GNUNET_free_non_null (matching_str);
+ GNUNET_free_non_null (rand_rx);
+ GNUNET_free_non_null (canonical_regex);
+ return -1;
}
+/**
+ * Automaton test that compares the result of matching regular expression 'rx'
+ * with the strings and expected results in 'rxstr' with the result of matching
+ * the same strings with glibc regex.
+ *
+ * @param a automaton.
+ * @param rx compiled glibc regex.
+ * @param rxstr regular expression and strings with expected results to
+ * match against.
+ *
+ * @return 0 on successfull, non 0 otherwise
+ */
int
test_automaton (struct GNUNET_REGEX_Automaton *a, regex_t * rx,
struct Regex_String_Pair *rxstr)
@@ -207,7 +215,8 @@ test_automaton (struct GNUNET_REGEX_Automaton *a, regex_t * rx,
eval = GNUNET_REGEX_eval (a, rxstr->strings[i]);
eval_check = regexec (rx, rxstr->strings[i], 1, matchptr, 0);
- // We only want to match the whole string, because that's what our DFA does, too.
+ /* We only want to match the whole string, because that's what our DFA does,
+ * too. */
if (eval_check == 0 &&
(matchptr[0].rm_so != 0 ||
matchptr[0].rm_eo != strlen (rxstr->strings[i])))
@@ -220,11 +229,13 @@ test_automaton (struct GNUNET_REGEX_Automaton *a, regex_t * rx,
result = 1;
regerror (eval_check, rx, error, sizeof error);
GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
- "Unexpected result:\nregex: %s\nstring: %s\nexpected result: %i\n"
- "gnunet regex: %i\nglibc regex: %i\nglibc error: %s\nrm_so: %i\nrm_eo: %i\n\n",
- rxstr->regex, rxstr->strings[i], rxstr->expected_results[i],
- eval, eval_check, error, matchptr[0].rm_so,
- matchptr[0].rm_eo);
+ "Unexpected result:\nregex: %s\ncanonical_regex: %s\n"
+ "string: %s\nexpected result: %i\n"
+ "gnunet regex: %i\nglibc regex: %i\nglibc error: %s\n"
+ "rm_so: %i\nrm_eo: %i\n\n", rxstr->regex,
+ GNUNET_REGEX_get_canonical_regex (a), rxstr->strings[i],
+ rxstr->expected_results[i], eval, eval_check, error,
+ matchptr[0].rm_so, matchptr[0].rm_eo);
}
}
return result;
@@ -233,13 +244,7 @@ test_automaton (struct GNUNET_REGEX_Automaton *a, regex_t * rx,
int
main (int argc, char *argv[])
{
- GNUNET_log_setup ("test-regex",
-#if VERBOSE
- "DEBUG",
-#else
- "WARNING",
-#endif
- NULL);
+ GNUNET_log_setup ("test-regex", "WARNING", NULL);
struct GNUNET_REGEX_Automaton *a;
regex_t rx;
@@ -247,17 +252,20 @@ main (int argc, char *argv[])
int check_nfa;
int check_dfa;
int check_rand;
+ char *check_proof;
- struct Regex_String_Pair rxstr[8] = {
+ struct Regex_String_Pair rxstr[19] = {
{"ab?(abcd)?", 5,
{"ababcd", "abab", "aabcd", "a", "abb"},
{match, nomatch, match, match, nomatch}},
{"ab(c|d)+c*(a(b|c)d)+", 5,
- {"abcdcdcdcdddddabd", "abcd", "abcddddddccccccccccccccccccccccccabdacdabd",
+ {"abcdcdcdcdddddabd", "abcd",
+ "abcddddddccccccccccccccccccccccccabdacdabd",
"abccccca", "abcdcdcdccdabdabd"},
{match, nomatch, match, nomatch, match}},
{"ab+c*(a(bx|c)d)+", 5,
- {"abcdcdcdcdddddabd", "abcd", "abcddddddccccccccccccccccccccccccabdacdabd",
+ {"abcdcdcdcdddddabd", "abcd",
+ "abcddddddccccccccccccccccccccccccabdacdabd",
"abccccca", "abcdcdcdccdabdabd"},
{nomatch, nomatch, nomatch, nomatch, nomatch}},
{"a+X*y+c|p|R|Z*K*y*R+w|Y*6+n+h*k*w+V*F|W*B*e*", 1,
@@ -272,16 +280,53 @@ main (int argc, char *argv[])
{"V|M*o?x*p*d+h+b|E*m?h?Y*E*O?W*W*P+o?Z+H*M|I*q+C*a+5?5*9|b?z|G*y*k?R|p+u|8*h?B+l*H|e|L*O|1|F?v*0?5|C+", 1,
{"VMoxpdhbEmhYEOWWPoZHMIqCa559bzGykRpu8hBlHeLO1Fv05C"},
{nomatch}},
+ {"(bla)*", 8,
+ {"", "bla", "blabla", "bl", "la", "b", "l", "a"},
+ {match, match, match, nomatch, nomatch, nomatch, nomatch, nomatch}},
+ {"ab(c|d)+c*(a(b|c)+d)+(bla)(bla)*", 8,
+ {"ab", "abcabdbla", "abdcccccccccccabcbccdblablabla", "bl", "la", "b",
+ "l",
+ "a"},
+ {nomatch, match, match, nomatch, nomatch, nomatch, nomatch, nomatch}},
+ {"a|aa*a", 6,
+ {"", "a", "aa", "aaa", "aaaa", "aaaaa"},
+ {nomatch, match, match, match, match, match}},
+ {"ab(c|d)+c*(a(b|c)+d)+(bla)+", 1,
+ {"abcabdblaacdbla"},
+ {nomatch}},
+ {"(ac|b)+", 8,
+ {"b", "bb", "ac", "", "acb", "bacbacac", "acacac", "abc"},
+ {match, match, match, nomatch, match, match, match, nomatch}},
+ {"(ab|c)+", 7,
+ {"", "ab", "c", "abc", "ababcc", "acc", "abac"},
+ {nomatch, match, match, match, match, nomatch, nomatch}},
+ {"((j|2j)K|(j|2j)AK|(j|2j)(D|e|(j|2j)A(D|e))D*K)", 1,
+ {"", "2j2jADK", "j2jADK"},
+ {nomatch, match, match}},
+ {"((j|2j)K|(j|2j)(D|e|((j|2j)j|(j|2j)2j)A(D|e))D*K|(j|2j)AK)", 2,
+ {"", "2j2jjADK", "j2jADK"},
+ {nomatch, match, match}},
{"ab(c|d)+c*(a(b|c)d)+", 1,
{"abacd"},
- {nomatch}}
+ {nomatch}},
+ {"d|5kl", 1,
+ {"d5kl"},
+ {nomatch}},
+ {"a()b", 1,
+ {"ab"},
+ {match}},
+ {"GNVPN-0001-PAD(001110101001001010(0|1)*|001110101001001010000(0|1)*|001110101001001010001(0|1)*|001110101001001010010(0|1)*|001110101001001010011(0|1)*|001110101001001010100(0|1)*|001110101001001010101(0|1)*|001110101001001010110(0|1)*|001110101001001010111(0|1)*|0011101010110110(0|1)*|001110101011011000000(0|1)*|001110101011011000001(0|1)*|001110101011011000010(0|1)*|001110101011011000011(0|1)*|001110101011011000100(0|1)*|001110101011011000101(0|1)*|001110101011011000110(0|1)*|001110101011011000111(0|1)*|001110101011011001000(0|1)*|001110101011011001001(0|1)*|001110101011011001010(0|1)*|001110101011011001011(0|1)*|001110101011011001100(0|1)*|001110101011011001101(0|1)*|001110101011011001110(0|1)*|001110101011011001111(0|1)*|001110101011011010000(0|1)*|001110101011011010001(0|1)*|001110101011011010010(0|1)*|001110101011011010011(0|1)*|001110101011011010100(0|1)*|001110101011011010101(0|1)*|001110101011011010110(0|1)*|001110101011011010111(0|1)*|001110101011011011000(0|1)*|001110101011011011001(0|1)*|001110101011011011010(0|1)*|001110101011011011011(0|1)*|001110101011011011100(0|1)*|001110101011011011101(0|1)*|001110101011011011110(0|1)*|001110101011011011111(0|1)*|0011101110111101(0|1)*|001110111011110100000(0|1)*|001110111011110100001(0|1)*|001110111011110100010(0|1)*|001110111011110100011(0|1)*|001110111011110100100(0|1)*|001110111011110100101(0|1)*|001110111011110100110(0|1)*|001110111011110100111(0|1)*|001110111011110101000(0|1)*|001110111011110101001(0|1)*|001110111011110101010(0|1)*|001110111011110101011(0|1)*|001110111011110101100(0|1)*|001110111011110101101(0|1)*|001110111011110101110(0|1)*|001110111011110101111(0|1)*|001110111011110110000(0|1)*|001110111011110110001(0|1)*|001110111011110110010(0|1)*|001110111011110110011(0|1)*|001110111011110110100(0|1)*|001110111011110110101(0|1)*|001110111011110110110(0|1)*|001110111011110110111(0|1)*|001110111011110111000(0|1)*|001110111011110111001(0|1)*|001110111011110111010(0|1)*|001110111011110111011(0|1)*|001110111011110111100(0|1)*|001110111011110111101(0|1)*|001110111011110111110(0|1)*|0111010001010110(0|1)*|011101000101011000000(0|1)*|011101000101011000001(0|1)*|011101000101011000010(0|1)*|011101000101011000011(0|1)*|011101000101011000100(0|1)*|011101000101011000101(0|1)*|011101000101011000110(0|1)*|011101000101011000111(0|1)*|011101000101011001000(0|1)*|011101000101011001001(0|1)*|011101000101011001010(0|1)*|011101000101011001011(0|1)*|011101000101011001100(0|1)*|011101000101011001101(0|1)*|011101000101011001110(0|1)*|011101000101011001111(0|1)*|011101000101011010000(0|1)*|011101000101011010001(0|1)*|011101000101011010010(0|1)*|011101000101011010011(0|1)*|011101000101011010100(0|1)*|011101000101011010101(0|1)*|011101000101011010110(0|1)*|011101000101011010111(0|1)*|011101000101011011000(0|1)*|011101000101011011001(0|1)*|011101000101011011010(0|1)*|011101000101011011011(0|1)*|011101000101011011100(0|1)*|011101000101011011101(0|1)*|011101000101011011110(0|1)*|011101000101011011111(0|1)*|0111010001010111(0|1)*|011101000101011100000(0|1)*|011101000101011100001(0|1)*|011101000101011100010(0|1)*|011101000101011100011(0|1)*|011101000101011100100(0|1)*|011101000101011100101(0|1)*|011101000101011100110(0|1)*|011101000101011100111(0|1)*|011101000101011101000(0|1)*|011101000101011101001(0|1)*|011101000101011101010(0|1)*|011101000101011101011(0|1)*|011101000101011101100(0|1)*|011101000101011101101(0|1)*|011101000101011101110(0|1)*|011101000101011101111(0|1)*|011101000101011110000(0|1)*|011101000101011110001(0|1)*|011101000101011110010(0|1)*|011101000101011110011(0|1)*|011101000101011110100(0|1)*|011101000101011110101(0|1)*|011101000101011110110(0|1)*|011101000101011110111(0|1)*|011101000101011111000(0|1)*|011101000101011111001(0|1)*|011101000101011111010(0|1)*|011101000101011111011(0|1)*|011101000101011111100(0|1)*|011101000101011111101(0|1)*|011101000101011111110(0|1)*|011101000101011111111(0|1)*|0111010001011000(0|1)*|011101000101100000000(0|1)*|011101000101100000001(0|1)*|011101000101100000010(0|1)*|011101000101100000011(0|1)*|011101000101100000100(0|1)*|011101000101100000101(0|1)*|011101000101100000110(0|1)*|011101000101100000111(0|1)*|011101000101100001000(0|1)*|011101000101100001001(0|1)*|011101000101100001010(0|1)*|011101000101100001011(0|1)*|011101000101100001100(0|1)*|011101000101100001101(0|1)*|011101000101100001110(0|1)*|011101000101100001111(0|1)*|011101000101100010000(0|1)*|011101000101100010001(0|1)*|011101000101100010010(0|1)*|011101000101100010011(0|1)*|011101000101100010100(0|1)*|011101000101100010101(0|1)*|011101000101100010110(0|1)*|011101000101100010111(0|1)*|011101000101100011000(0|1)*|011101000101100011001(0|1)*|011101000101100011010(0|1)*|011101000101100011011(0|1)*|011101000101100011100(0|1)*|011101000101100011101(0|1)*|011101000101100011110(0|1)*|011101000101100011111(0|1)*|01110100010110010(0|1)*|011101000101100100000(0|1)*|011101000101100100001(0|1)*|011101000101100100010(0|1)*|011101000101100100011(0|1)*|011101000101100100100(0|1)*|011101000101100100101(0|1)*|011101000101100100110(0|1)*|011101000101100100111(0|1)*|011101000101100101000(0|1)*|011101000101100101001(0|1)*|011101000101100101010(0|1)*|011101000101100101011(0|1)*|011101000101100101100(0|1)*|011101000101100101101(0|1)*|011101000101100101110(0|1)*|011101000101100101111(0|1)*|011101000101100101111000(0|1)*|1100101010011100(0|1)*|110010101001110000000(0|1)*|110010101001110000000001(0|1)*|110010101001110000000010(0|1)*|110010101001110000000110(0|1)*|110010101001110000001(0|1)*|110010101001110000001000(0|1)*|110010101001110000001001(0|1)*|110010101001110000001010(0|1)*|110010101001110000001011(0|1)*|110010101001110000001101(0|1)*|110010101001110000001110(0|1)*|110010101001110000010(0|1)*|110010101001110000011(0|1)*|110010101001110000100(0|1)*|110010101001110000101(0|1)*|110010101001110000110(0|1)*|110010101001110000111(0|1)*|110010101001110001000(0|1)*|110010101001110001001(0|1)*|110010101001110001010(0|1)*|110010101001110001011(0|1)*|110010101001110001100(0|1)*|110010101001110001101(0|1)*|110010101001110001110(0|1)*|110010101001110001111(0|1)*|110010101001110010000(0|1)*|110010101001110010001(0|1)*|110010101001110010010(0|1)*|110010101001110010011(0|1)*|110010101001110010100(0|1)*|110010101001110010101(0|1)*|110010101001110010110(0|1)*|110010101001110010111(0|1)*|110010101001110011000(0|1)*|110010101001110011001(0|1)*|110010101001110011010(0|1)*|110010101001110011011(0|1)*|110010101001110011100(0|1)*|110010101001110011101(0|1)*|110010101001110011110(0|1)*|110010101001110011111(0|1)*|1101101010111010(0|1)*|110110101011101000000(0|1)*|110110101011101000000001(0|1)*|110110101011101000001000(0|1)*|110110101011101000001001(0|1)*|110110101011101000001010(0|1)*|110110101011101000001011(0|1)*|110110101011101000001100(0|1)*|110110101011101000001110(0|1)*|110110101011101000001111(0|1)*|110110101011101000010(0|1)*|110110101011101000010000(0|1)*|110110101011101000010001(0|1)*|110110101011101000010010(0|1)*|110110101011101000010011(0|1)*|110110101011101000011(0|1)*|110110101011101000100(0|1)*|110110101011101000101(0|1)*|110110101011101000110(0|1)*|110110101011101000111(0|1)*|110110101011101001000(0|1)*|110110101011101001001(0|1)*|110110101011101001010(0|1)*|110110101011101001011(0|1)*|110110101011101001100(0|1)*|110110101011101001101(0|1)*|110110101011101001110(0|1)*|110110101011101001111(0|1)*|110110101011101010000(0|1)*|110110101011101010001(0|1)*|110110101011101010010(0|1)*|110110101011101010011(0|1)*|110110101011101010100(0|1)*|110110101011101010101(0|1)*|110110101011101010110(0|1)*|110110101011101010111(0|1)*|110110101011101011000(0|1)*|110110101011101011001(0|1)*|110110101011101011010(0|1)*|110110101011101011011(0|1)*|110110101011101011100(0|1)*|110110101011101011101(0|1)*|110110101011101011110(0|1)*|110110101011101011111(0|1)*|1101101011010100(0|1)*|110110101101010000000(0|1)*|110110101101010000001(0|1)*|110110101101010000010(0|1)*|110110101101010000011(0|1)*|110110101101010000100(0|1)*|110110101101010000101(0|1)*|110110101101010000110(0|1)*|110110101101010000111(0|1)*|110110101101010001000(0|1)*|110110101101010001001(0|1)*|110110101101010001010(0|1)*|110110101101010001011(0|1)*|110110101101010001100(0|1)*|110110101101010001101(0|1)*|110110101101010001110(0|1)*|110110101101010001111(0|1)*|110110101101010010000(0|1)*|110110101101010010001(0|1)*|110110101101010010010(0|1)*|110110101101010010011(0|1)*|110110101101010010100(0|1)*|1101101011010100101000(0|1)*|110110101101010010101(0|1)*|110110101101010010110(0|1)*|110110101101010010111(0|1)*|110110101101010011000(0|1)*|110110101101010011010(0|1)*|110110101101010011011(0|1)*|110110101101010011100(0|1)*|110110101101010011101(0|1)*|110110101101010011110(0|1)*|110110101101010011111(0|1)*|1101111010100100(0|1)*|110111101010010000000(0|1)*|110111101010010000001(0|1)*|110111101010010000010(0|1)*|110111101010010000011(0|1)*|110111101010010000100(0|1)*|110111101010010000101(0|1)*|110111101010010000110(0|1)*|110111101010010000111(0|1)*|110111101010010001000(0|1)*|110111101010010001001(0|1)*|110111101010010001010(0|1)*|110111101010010001011(0|1)*|110111101010010001100(0|1)*|110111101010010001101(0|1)*|110111101010010001110(0|1)*|110111101010010001111(0|1)*|110111101010010010000(0|1)*|110111101010010010001(0|1)*|110111101010010010010(0|1)*|110111101010010010011(0|1)*|110111101010010010100(0|1)*|110111101010010010101(0|1)*|110111101010010010110(0|1)*|110111101010010010111(0|1)*|110111101010010011000(0|1)*|110111101010010011001(0|1)*|110111101010010011010(0|1)*|110111101010010011011(0|1)*|110111101010010011100(0|1)*|110111101010010011101(0|1)*|110111101010010011110(0|1)*|110111101010010011111(0|1)*|11011110101001010(0|1)*|110111101010010100000(0|1)*|110111101010010100001(0|1)*|110111101010010100010(0|1)*|110111101010010100011(0|1)*|110111101010010100100(0|1)*|110111101010010100101(0|1)*|110111101010010100110(0|1)*|110111101010010100111(0|1)*|110111101010010101000(0|1)*|110111101010010101001(0|1)*|110111101010010101010(0|1)*|110111101010010101011(0|1)*|110111101010010101100(0|1)*|110111101010010101101(0|1)*|110111101010010101110(0|1)*|110111101010010101111(0|1)*)",
+ 2,
+ {"GNVPN-0001-PAD1101111010100101011101010101010101",
+ "GNVPN-0001-PAD11001010100111000101101010101"},
+ {match, match}}
};
check_nfa = 0;
check_dfa = 0;
check_rand = 0;
- for (i = 0; i < 8; i++)
+ for (i = 0; i < 19; i++)
{
if (0 != regcomp (&rx, rxstr[i].regex, REG_EXTENDED))
{
@@ -290,22 +335,31 @@ main (int argc, char *argv[])
return 1;
}
- // NFA test
+ /* NFA test */
a = GNUNET_REGEX_construct_nfa (rxstr[i].regex, strlen (rxstr[i].regex));
check_nfa += test_automaton (a, &rx, &rxstr[i]);
GNUNET_REGEX_automaton_destroy (a);
- // DFA test
- a = GNUNET_REGEX_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex));
+ /* DFA test */
+ a = GNUNET_REGEX_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex), 0);
+ check_dfa += test_automaton (a, &rx, &rxstr[i]);
+ check_proof = GNUNET_strdup (GNUNET_REGEX_get_canonical_regex (a));
+ GNUNET_REGEX_automaton_destroy (a);
+
+ a = GNUNET_REGEX_construct_dfa (check_proof, strlen (check_proof), 0);
check_dfa += test_automaton (a, &rx, &rxstr[i]);
GNUNET_REGEX_automaton_destroy (a);
+ if (0 != check_dfa)
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "check_proof: %s\n", check_proof);
+ GNUNET_free_non_null (check_proof);
regfree (&rx);
}
+ /* Random tests */
srand (time (NULL));
- for (i = 0; i < 150; i++)
- check_rand += test_random (150, 200, 25);
+ for (i = 0; i < 20; i++)
+ check_rand += test_random (50, 60, 10);
return check_nfa + check_dfa + check_rand;
}
diff --git a/src/regex/test_regex_graph_api.c b/src/regex/test_regex_graph_api.c
new file mode 100644
index 0000000..3ae6073
--- /dev/null
+++ b/src/regex/test_regex_graph_api.c
@@ -0,0 +1,157 @@
+/*
+ This file is part of GNUnet
+ (C) 2012 Christian Grothoff (and other contributing authors)
+
+ GNUnet is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3, or (at your
+ option) any later version.
+
+ GNUnet is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GNUnet; see the file COPYING. If not, write to the
+ Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA.
+*/
+/**
+ * @file regex/test_regex_graph_api.c
+ * @brief test for regex_graph.c
+ * @author Maximilian Szengel
+ */
+#include <regex.h>
+#include <time.h>
+#include "platform.h"
+#include "gnunet_regex_lib.h"
+#include "regex_internal.h"
+
+#define KEEP_FILES 1
+
+/**
+ * Check if 'filename' exists and is not empty.
+ *
+ * @param filename name of the file that should be checked
+ *
+ * @return 0 if ok, non 0 on error.
+ */
+static int
+filecheck (const char *filename)
+{
+ int error = 0;
+ FILE *fp;
+
+ /* Check if file was created and delete it again */
+ if (NULL == (fp = fopen (filename, "r")))
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not find graph %s\n", filename);
+ return 1;
+ }
+
+ GNUNET_break (0 == fseek (fp, 0L, SEEK_END));
+ if (1 > ftell (fp))
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "Graph writing failed, got empty file (%s)!\n", filename);
+ error = 2;
+ }
+
+ GNUNET_assert (0 == fclose (fp));
+
+ if (!KEEP_FILES)
+ {
+ if (0 != unlink (filename))
+ GNUNET_log_strerror_file (GNUNET_ERROR_TYPE_ERROR, "unlink", filename);
+ }
+ return error;
+}
+
+
+int
+main (int argc, char *argv[])
+{
+ int error;
+ struct GNUNET_REGEX_Automaton *a;
+ unsigned int i;
+ const char *filename = "test_graph.dot";
+
+ const char *regex[12] = {
+ "ab(c|d)+c*(a(b|c)+d)+(bla)+",
+ "(bla)*",
+ "b(lab)*la",
+ "(ab)*",
+ "ab(c|d)+c*(a(b|c)+d)+(bla)(bla)*",
+ "z(abc|def)?xyz",
+ "1*0(0|1)*",
+ "a*b*",
+ "a+X*y+c|p|R|Z*K*y*R+w|Y*6+n+h*k*w+V*F|W*B*e*",
+ "a",
+ "a|b",
+ "PADPADPADPADPADPabcdefghixxxxxxxxxxxxxjklmnop*qstoisdjfguisdfguihsdfgbdsuivggsd"
+ };
+
+ GNUNET_log_setup ("test-regex", "WARNING", NULL);
+ error = 0;
+ for (i = 0; i < 12; i++)
+ {
+ /* Check NFA graph creation */
+ a = GNUNET_REGEX_construct_nfa (regex[i], strlen (regex[i]));
+ GNUNET_REGEX_automaton_save_graph (a, filename, GNUNET_REGEX_GRAPH_DEFAULT);
+ GNUNET_REGEX_automaton_destroy (a);
+ error += filecheck (filename);
+
+ a = GNUNET_REGEX_construct_nfa (regex[i], strlen (regex[i]));
+ GNUNET_REGEX_automaton_save_graph (a, filename,
+ GNUNET_REGEX_GRAPH_DEFAULT |
+ GNUNET_REGEX_GRAPH_VERBOSE);
+ GNUNET_REGEX_automaton_destroy (a);
+ error += filecheck (filename);
+
+ a = GNUNET_REGEX_construct_nfa (regex[i], strlen (regex[i]));
+ GNUNET_REGEX_automaton_save_graph (a, filename,
+ GNUNET_REGEX_GRAPH_DEFAULT |
+ GNUNET_REGEX_GRAPH_COLORING);
+ GNUNET_REGEX_automaton_destroy (a);
+ error += filecheck (filename);
+
+ a = GNUNET_REGEX_construct_nfa (regex[i], strlen (regex[i]));
+ GNUNET_REGEX_automaton_save_graph (a, filename,
+ GNUNET_REGEX_GRAPH_DEFAULT |
+ GNUNET_REGEX_GRAPH_VERBOSE |
+ GNUNET_REGEX_GRAPH_COLORING);
+ GNUNET_REGEX_automaton_destroy (a);
+ error += filecheck (filename);
+
+
+ /* Check DFA graph creation */
+ a = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i]), 0);
+ GNUNET_REGEX_automaton_save_graph (a, filename, GNUNET_REGEX_GRAPH_DEFAULT);
+ GNUNET_REGEX_automaton_destroy (a);
+ error += filecheck (filename);
+
+ a = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i]), 0);
+ GNUNET_REGEX_automaton_save_graph (a, filename,
+ GNUNET_REGEX_GRAPH_DEFAULT |
+ GNUNET_REGEX_GRAPH_VERBOSE);
+ GNUNET_REGEX_automaton_destroy (a);
+ error += filecheck (filename);
+
+ a = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i]), 0);
+ GNUNET_REGEX_automaton_save_graph (a, filename,
+ GNUNET_REGEX_GRAPH_DEFAULT |
+ GNUNET_REGEX_GRAPH_COLORING);
+ GNUNET_REGEX_automaton_destroy (a);
+ error += filecheck (filename);
+
+
+ a = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i]), 4);
+ GNUNET_REGEX_automaton_save_graph (a, filename, GNUNET_REGEX_GRAPH_DEFAULT);
+ GNUNET_REGEX_automaton_destroy (a);
+ error += filecheck (filename);
+
+ }
+
+ return error;
+}
diff --git a/src/regex/test_regex_iptoregex.c b/src/regex/test_regex_iptoregex.c
new file mode 100644
index 0000000..e33e792
--- /dev/null
+++ b/src/regex/test_regex_iptoregex.c
@@ -0,0 +1,103 @@
+/*
+ This file is part of GNUnet
+ (C) 2012 Christian Grothoff (and other contributing authors)
+
+ GNUnet is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3, or (at your
+ option) any later version.
+
+ GNUnet is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GNUnet; see the file COPYING. If not, write to the
+ Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA.
+*/
+/**
+ * @file regex/test_regex_iptoregex.c
+ * @brief simple test for regex.c iptoregex functions
+ * @author Maximilian Szengel
+ */
+#include "platform.h"
+#include "gnunet_regex_lib.h"
+
+
+static int
+test_iptoregex (const char *ipv4, const char *netmask, const char *expectedv4,
+ const char *ipv6, unsigned int prefixlen,
+ const char *expectedv6)
+{
+ int error = 0;
+
+ struct in_addr a;
+ struct in6_addr b;
+ char rxv4[GNUNET_REGEX_IPV4_REGEXLEN];
+ char rxv6[GNUNET_REGEX_IPV6_REGEXLEN];
+
+ GNUNET_assert (1 == inet_pton (AF_INET, ipv4, &a));
+ GNUNET_REGEX_ipv4toregex (&a, netmask, rxv4);
+
+
+ if (0 != strcmp (rxv4, expectedv4))
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Expected: %s but got: %s\n",
+ expectedv4, rxv4);
+ error++;
+ }
+
+ GNUNET_assert (1 == inet_pton (AF_INET6, ipv6, &b));
+ GNUNET_REGEX_ipv6toregex (&b, prefixlen, rxv6);
+
+ if (0 != strcmp (rxv6, expectedv6))
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Expected: %s but got: %s\n",
+ expectedv6, rxv6);
+ error++;
+ }
+
+ return error;
+}
+
+int
+main (int argc, char *argv[])
+{
+ GNUNET_log_setup ("test-regex", "WARNING", NULL);
+
+ int error;
+
+ error = 0;
+
+ error +=
+ test_iptoregex ("192.0.0.0", "255.255.255.0",
+ "110000000000000000000000(0|1)+", "FFFF::0", 16,
+ "1111111111111111(0|1)+");
+
+ error +=
+ test_iptoregex ("187.238.225.0", "255.255.255.128",
+ "1011101111101110111000010(0|1)+", "E1E1:73F9:51BE::0",
+ 49,
+ "1110000111100001011100111111100101010001101111100(0|1)+");
+
+ error +=
+ test_iptoregex ("255.255.255.255", "255.255.255.255",
+ "11111111111111111111111111111111",
+ "FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF", 128,
+ "11111111111111111111111111111111"
+ "11111111111111111111111111111111"
+ "11111111111111111111111111111111"
+ "11111111111111111111111111111111");
+
+ error +=
+ test_iptoregex ("0.0.0.0", "255.255.255.255",
+ "00000000000000000000000000000000", "0::0", 128,
+ "00000000000000000000000000000000"
+ "00000000000000000000000000000000"
+ "00000000000000000000000000000000"
+ "00000000000000000000000000000000");
+
+ return error;
+}
diff --git a/src/regex/test_regex_iterate_api.c b/src/regex/test_regex_iterate_api.c
index b214d6a..695bc30 100644
--- a/src/regex/test_regex_iterate_api.c
+++ b/src/regex/test_regex_iterate_api.c
@@ -26,46 +26,233 @@
#include <time.h>
#include "platform.h"
#include "gnunet_regex_lib.h"
+#include "regex_internal.h"
-void
-key_iterator (void *cls, const GNUNET_HashCode * key, const char *proof,
+/**
+ * Regex initial padding.
+ */
+#define INITIAL_PADDING "PADPADPADPADPADP"
+
+/**
+ * Set to GNUNET_YES to save a debug graph.
+ */
+#define GNUNET_REGEX_ITERATE_SAVE_DEBUG_GRAPH GNUNET_NO
+
+static unsigned int transition_counter;
+
+struct IteratorContext
+{
+ int error;
+ int should_save_graph;
+ FILE *graph_filep;
+ unsigned int string_count;
+ char *const *strings;
+ unsigned int match_count;
+};
+
+struct RegexStringPair
+{
+ char *regex;
+ unsigned int string_count;
+ char *strings[20];
+};
+
+
+static void
+key_iterator (void *cls, const struct GNUNET_HashCode *key, const char *proof,
int accepting, unsigned int num_edges,
const struct GNUNET_REGEX_Edge *edges)
{
- int i;
+ unsigned int i;
+ struct IteratorContext *ctx = cls;
+ char *out_str;
+ char *state_id = GNUNET_strdup (GNUNET_h2s (key));
+
+ GNUNET_assert (NULL != proof);
+ if (GNUNET_YES == ctx->should_save_graph)
+ {
+ if (GNUNET_YES == accepting)
+ GNUNET_asprintf (&out_str, "\"%s\" [shape=doublecircle]\n", state_id);
+ else
+ GNUNET_asprintf (&out_str, "\"%s\" [shape=circle]\n", state_id);
+ fwrite (out_str, strlen (out_str), 1, ctx->graph_filep);
+ GNUNET_free (out_str);
+
+ for (i = 0; i < num_edges; i++)
+ {
+ transition_counter++;
+ GNUNET_asprintf (&out_str, "\"%s\" -> \"%s\" [label = \"%s (%s)\"]\n",
+ state_id, GNUNET_h2s (&edges[i].destination),
+ edges[i].label, proof);
+ fwrite (out_str, strlen (out_str), 1, ctx->graph_filep);
+
+ GNUNET_free (out_str);
+ }
+ }
+ else
+ {
+ for (i = 0; i < num_edges; i++)
+ transition_counter++;
+ }
+
+ for (i = 0; i < ctx->string_count; i++)
+ {
+ if (0 == strcmp (proof, ctx->strings[i]))
+ ctx->match_count++;
+ }
- GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating...\n");
- for (i = 0; i < num_edges; i++)
+ if (GNUNET_OK != GNUNET_REGEX_check_proof (proof, key))
{
- GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Edge %i: %s\n", i, edges[i].label);
+ ctx->error++;
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "Proof check failed: proof: %s key: %s\n", proof, state_id);
}
- if (NULL != proof)
- GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Proof: %s\n", proof);
+ GNUNET_free (state_id);
}
int
main (int argc, char *argv[])
{
- GNUNET_log_setup ("test-regex",
-#if VERBOSE
- "DEBUG",
-#else
- "WARNING",
-#endif
- NULL);
+ GNUNET_log_setup ("test-regex", "WARNING", NULL);
int error;
- const char *regex;
struct GNUNET_REGEX_Automaton *dfa;
+ unsigned int i;
+ unsigned int num_transitions;
+ char *filename = NULL;
+ struct IteratorContext ctx = { 0, 0, NULL, 0, NULL, 0 };
error = 0;
- regex = "ab(c|d)+c*(a(b|c)d)+";
- dfa = GNUNET_REGEX_construct_dfa (regex, strlen (regex));
- GNUNET_REGEX_automaton_save_graph (dfa, "dfa.dot");
- GNUNET_REGEX_iterate_all_edges (dfa, key_iterator, NULL);
- GNUNET_REGEX_automaton_destroy (dfa);
+ const struct RegexStringPair rxstr[13] = {
+ {INITIAL_PADDING "ab(c|d)+c*(a(b|c)+d)+(bla)+", 2,
+ {INITIAL_PADDING "abcdcdca", INITIAL_PADDING "abcabdbl"}},
+ {INITIAL_PADDING
+ "abcdefghixxxxxxxxxxxxxjklmnop*qstoisdjfguisdfguihsdfgbdsuivggsd", 1,
+ {INITIAL_PADDING "abcdefgh"}},
+ {INITIAL_PADDING "VPN-4-1(0|1)*", 2,
+ {INITIAL_PADDING "VPN-4-10", INITIAL_PADDING "VPN-4-11"}},
+ {INITIAL_PADDING "(a+X*y+c|p|R|Z*K*y*R+w|Y*6+n+h*k*w+V*F|W*B*e*)", 2,
+ {INITIAL_PADDING "aaaaaaaa", INITIAL_PADDING "aaXXyyyc"}},
+ {INITIAL_PADDING "a*", 1, {INITIAL_PADDING "aaaaaaaa"}},
+ {INITIAL_PADDING "xzxzxzxzxz", 1, {INITIAL_PADDING "xzxzxzxz"}},
+ {INITIAL_PADDING "xyz*", 1, {INITIAL_PADDING "xyzzzzzz"}},
+ {INITIAL_PADDING
+ "abcd:(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1):(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)",
+ 2, {INITIAL_PADDING "abcd:000", INITIAL_PADDING "abcd:101"}},
+ {INITIAL_PADDING "(x*|(0|1|2)(a|b|c|d)+)", 2,
+ {INITIAL_PADDING "xxxxxxxx", INITIAL_PADDING "0abcdbad"}},
+ {INITIAL_PADDING "(0|1)(0|1)23456789ABC", 1, {INITIAL_PADDING "11234567"}},
+ {INITIAL_PADDING "0*123456789ABC*", 3,
+ {INITIAL_PADDING "00123456", INITIAL_PADDING "00000000",
+ INITIAL_PADDING "12345678"}},
+ {INITIAL_PADDING "0123456789A*BC", 1, {INITIAL_PADDING "01234567"}},
+ {"GNUNETVPN000100000IPEX6-fc5a:4e1:c2ba::1", 1, {"GNUNETVPN000100000IPEX6-"}}
+ };
+
+ const char *graph_start_str = "digraph G {\nrankdir=LR\n";
+ const char *graph_end_str = "\n}\n";
+
+ for (i = 0; i < 13; i++)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating DFA for regex %s\n",
+ rxstr[i].regex);
+
+
+ /* Create graph */
+ if (GNUNET_YES == GNUNET_REGEX_ITERATE_SAVE_DEBUG_GRAPH)
+ {
+ GNUNET_asprintf (&filename, "iteration_graph_%u.dot", i);
+ ctx.graph_filep = fopen (filename, "w");
+ if (NULL == ctx.graph_filep)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
+ "Could not open file %s for saving iteration graph.\n",
+ filename);
+ ctx.should_save_graph = GNUNET_NO;
+ }
+ else
+ {
+ ctx.should_save_graph = GNUNET_YES;
+ fwrite (graph_start_str, strlen (graph_start_str), 1, ctx.graph_filep);
+ }
+ GNUNET_free (filename);
+ }
+ else
+ {
+ ctx.should_save_graph = GNUNET_NO;
+ ctx.graph_filep = NULL;
+ }
+
+ /* Iterate over DFA edges */
+ transition_counter = 0;
+ ctx.string_count = rxstr[i].string_count;
+ ctx.strings = rxstr[i].strings;
+ ctx.match_count = 0;
+ dfa =
+ GNUNET_REGEX_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex), 0);
+ GNUNET_REGEX_iterate_all_edges (dfa, key_iterator, &ctx);
+ num_transitions =
+ GNUNET_REGEX_get_transition_count (dfa) - dfa->start->transition_count;
+
+ if (transition_counter < num_transitions)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "Automaton has %d transitions, iterated over %d transitions\n",
+ num_transitions, transition_counter);
+ error += 1;
+ }
+
+ if (ctx.match_count < ctx.string_count)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "Missing initial states for regex %s\n", rxstr[i].regex);
+ error += (ctx.string_count - ctx.match_count);
+ }
+ else if (ctx.match_count > ctx.string_count)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "Duplicate initial transitions for regex %s\n",
+ rxstr[i].regex);
+ error += (ctx.string_count - ctx.match_count);
+ }
+
+ GNUNET_REGEX_automaton_destroy (dfa);
+
+ /* Finish graph */
+ if (GNUNET_YES == ctx.should_save_graph)
+ {
+ fwrite (graph_end_str, strlen (graph_end_str), 1, ctx.graph_filep);
+ fclose (ctx.graph_filep);
+ ctx.graph_filep = NULL;
+ ctx.should_save_graph = GNUNET_NO;
+ }
+ }
+
+
+ for (i = 0; i < 13; i++)
+ {
+ ctx.string_count = rxstr[i].string_count;
+ ctx.strings = rxstr[i].strings;
+ ctx.match_count = 0;
+
+ dfa =
+ GNUNET_REGEX_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex), 0);
+ GNUNET_REGEX_dfa_add_multi_strides (NULL, dfa, 2);
+ GNUNET_REGEX_iterate_all_edges (dfa, key_iterator, &ctx);
+
+ if (ctx.match_count < ctx.string_count)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "Missing initial states for regex %s\n", rxstr[i].regex);
+ error += (ctx.string_count - ctx.match_count);
+ }
+
+ GNUNET_REGEX_automaton_destroy (dfa);
+ }
+
+ error += ctx.error;
return error;
}
diff --git a/src/regex/test_regex_proofs.c b/src/regex/test_regex_proofs.c
new file mode 100644
index 0000000..92a3a41
--- /dev/null
+++ b/src/regex/test_regex_proofs.c
@@ -0,0 +1,171 @@
+/*
+ This file is part of GNUnet
+ (C) 2012 Christian Grothoff (and other contributing authors)
+
+ GNUnet is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3, or (at your
+ option) any later version.
+
+ GNUnet is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GNUnet; see the file COPYING. If not, write to the
+ Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA.
+*/
+/**
+ * @file regex/test_regex_proofs.c
+ * @brief test for regex.c
+ * @author Maximilian Szengel
+ */
+#include "platform.h"
+#include "gnunet_regex_lib.h"
+#include "regex_internal.h"
+
+
+/**
+ * Test if the given regex's canonical regex is the same as this canonical
+ * regex's canonical regex. Confused? Ok, then: 1. construct a dfa A from the
+ * given 'regex' 2. get the canonical regex of dfa A 3. construct a dfa B from
+ * this canonical regex 3. compare the canonical regex of dfa A with the
+ * canonical regex of dfa B.
+ *
+ * @param regex regular expression used for this test (see above).
+ *
+ * @return 0 on success, 1 on failure
+ */
+static unsigned int
+test_proof (const char *regex)
+{
+ unsigned int error;
+ struct GNUNET_REGEX_Automaton *dfa;
+ char *c_rx1;
+ const char *c_rx2;
+
+ dfa = GNUNET_REGEX_construct_dfa (regex, strlen (regex), 1);
+ GNUNET_assert (NULL != dfa);
+ c_rx1 = GNUNET_strdup (GNUNET_REGEX_get_canonical_regex (dfa));
+ GNUNET_REGEX_automaton_destroy (dfa);
+ dfa = GNUNET_REGEX_construct_dfa (c_rx1, strlen (c_rx1), 1);
+ GNUNET_assert (NULL != dfa);
+ c_rx2 = GNUNET_REGEX_get_canonical_regex (dfa);
+
+ error = (0 == strcmp (c_rx1, c_rx2)) ? 0 : 1;
+
+ if (error > 0)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "Comparing canonical regex of\n%s\nfailed:\n%s\nvs.\n%s\n",
+ regex, c_rx1, c_rx2);
+ }
+
+ GNUNET_free (c_rx1);
+ GNUNET_REGEX_automaton_destroy (dfa);
+
+ return error;
+}
+
+
+/**
+ * Use 'test_proof' function to randomly test the canonical regexes of 'count'
+ * random expressions of length 'rx_length'.
+ *
+ * @param count number of random regular expressions to test.
+ * @param rx_length length of the random regular expressions.
+ *
+ * @return 0 on succes, number of failures otherwise.
+ */
+static unsigned int
+test_proofs_random (unsigned int count, size_t rx_length)
+{
+ unsigned int i;
+ char *rand_rx;
+ unsigned int failures;
+
+ failures = 0;
+
+ for (i = 0; i < count; i++)
+ {
+ rand_rx = GNUNET_REGEX_generate_random_regex (rx_length, NULL);
+ failures += test_proof (rand_rx);
+ GNUNET_free (rand_rx);
+ }
+
+ return failures;
+}
+
+
+/**
+ * Test a number of known examples of regexes for proper canonicalization.
+ *
+ * @return 0 on success, number of failures otherwise.
+ */
+static unsigned int
+test_proofs_static ()
+{
+ unsigned int i;
+ unsigned int error;
+
+ const char *regex[8] = {
+ "a|aa*a",
+ "a+",
+ "a*",
+ "a*a*",
+ "(F*C|WfPf|y+F*C)",
+ "y*F*C|WfPf",
+ "((a|b)c|(a|b)(d|(a|b)e))",
+ "((a|b)(c|d)|(a|b)(a|b)e)"
+ };
+
+ const char *canon_rx1;
+ const char *canon_rx2;
+ struct GNUNET_REGEX_Automaton *dfa1;
+ struct GNUNET_REGEX_Automaton *dfa2;
+
+ error = 0;
+
+ for (i = 0; i < 8; i += 2)
+ {
+ dfa1 = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i]), 1);
+ dfa2 = GNUNET_REGEX_construct_dfa (regex[i + 1], strlen (regex[i + 1]), 1);
+ GNUNET_assert (NULL != dfa1);
+ GNUNET_assert (NULL != dfa2);
+
+ canon_rx1 = GNUNET_REGEX_get_canonical_regex (dfa1);
+ canon_rx2 = GNUNET_REGEX_get_canonical_regex (dfa2);
+
+ error += (0 == strcmp (canon_rx1, canon_rx2)) ? 0 : 1;
+
+ if (error > 0)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "Comparing canonical regex failed:\nrx1:\t%s\ncrx1:\t%s\nrx2:\t%s\ncrx2:\t%s\n",
+ regex[i], canon_rx1, regex[i + 1], canon_rx2);
+ }
+
+ GNUNET_REGEX_automaton_destroy (dfa1);
+ GNUNET_REGEX_automaton_destroy (dfa2);
+ }
+
+ return error;
+}
+
+
+int
+main (int argc, char *argv[])
+{
+ GNUNET_log_setup ("test-regex", "WARNING", NULL);
+
+ int error;
+
+ error = 0;
+
+ error += test_proofs_static ();
+ error += test_proofs_random (100, 30);
+
+ return error;
+}