aboutsummaryrefslogtreecommitdiff
path: root/treexpr.h
blob: c424a54c855f70bcc7ad2ff31bf5fe4b75a983ed (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
/* treexpr.h - Tree expression language header
 + Copyright (C) 2005 David Barksdale
 +
 +  This library is free software; you can redistribute it and/or
 +  modify it under the terms of the GNU Lesser General Public
 +  License as published by the Free Software Foundation; either
 +  version 2.1 of the License, or (at your option) any later version.
 +
 +  This library is distributed in the hope that it will be useful,
 +  but WITHOUT ANY WARRANTY; without even the implied warranty of
 +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 +  Lesser General Public License for more details.
 +
 +  You should have received a copy of the GNU Lesser General Public
 +  License along with this library; if not, write to the Free Software
 +  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */

#ifndef _TREEXPR_H_
#define _TREEXPR_H_

#include <libxml/tree.h>
#include <sys/types.h>
#include "regex.h"

/*
 * State machines
 */

struct state;

struct epsilon
{
	struct epsilon *next;
	struct state *st;
};

#define RESUBR	( 10 )

struct attribute
{
	struct attribute *next;
	char *name; // name of attribute to match
	regex_t re; // compiled regular expression to match
	regmatch_t match[RESUBR]; // matches
	char *str; // string containing matches
};

struct trans
{
	struct state *st;	// state we transition to upon match
	// stuff to match
	char *name;			// name of tag to match against
	regex_t re;			// compiled regular expression to match against contents
	regmatch_t match[RESUBR]; // matches
	char *str;			// string containing matches
	struct attribute *attrs; // attributes to match against
	struct machine *ptr; // machine to match children
};

struct state
{
	struct trans *tr; // optional transition
	struct epsilon *ep;	// list of epsilon transitions
	int num; // state number (generated at run time)
	// graph traversal
	struct state *next; // internal list of states for a machine
};

struct machine
{
	struct state *start; // start state
	struct state *final; // final state (yes, only one)
	// parse errors
	const char *error, *buf;
	// execution
	int states; // number of states
	int **E; // arrays of bit masks for E function
	// I moved these here to make repeated execution a little faster
	// we alloc these buffers on the first execution and then reuse them
	int *cur_state; 
	int *next_state;
};

/* Matches */

struct regex_match
{
	struct regex_match *next;
	regmatch_t match; // match
	char *str; // string containing match
};

struct match
{
	struct match *next;
	xmlNodePtr node; // root node of tree match
	struct regex_match *re; // list of regular expression matches
};

/* Public functions */

const char *parse_treexpr( const char *expr, struct machine **m );
void free_machine( struct machine *m );
struct match *document_process( struct machine *m, xmlDocPtr doc );

#endif