1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
|
/* treexpr.h - Tree expression language header
+ Copyright (C) 2005 David Barksdale
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef _TREEXPR_H_
#define _TREEXPR_H_
#include <libxml/tree.h>
#include <sys/types.h>
#include "regex.h"
/*
* State machines
*/
struct state;
struct epsilon
{
struct epsilon *next;
struct state *st;
};
#define RESUBR ( 10 )
struct attribute
{
struct attribute *next;
char *name; // name of attribute to match
regex_t re; // compiled regular expression to match
regmatch_t match[RESUBR]; // matches
char *str; // string containing matches
};
struct trans
{
struct state *st; // state we transition to upon match
// stuff to match
char *name; // name of tag to match against
regex_t re; // compiled regular expression to match against contents
regmatch_t match[RESUBR]; // matches
char *str; // string containing matches
struct attribute *attrs; // attributes to match against
struct machine *ptr; // machine to match children
};
struct state
{
struct trans *tr; // optional transition
struct epsilon *ep; // list of epsilon transitions
int num; // state number (generated at run time)
// graph traversal
struct state *next; // internal list of states for a machine
};
struct machine
{
struct state *start; // start state
struct state *final; // final state (yes, only one)
// parse errors
const char *error, *buf;
// execution
int states; // number of states
int **E; // arrays of bit masks for E function
// I moved these here to make repeated execution a little faster
// we alloc these buffers on the first execution and then reuse them
int *cur_state;
int *next_state;
};
/* Matches */
struct regex_match
{
struct regex_match *next;
regmatch_t match; // match
char *str; // string containing match
};
struct match
{
struct match *next;
xmlNodePtr node; // root node of tree match
struct regex_match *re; // list of regular expression matches
};
/* Public functions */
const char *parse_treexpr( const char *expr, struct machine **m );
void free_machine( struct machine *m );
struct match *document_process( struct machine *m, xmlDocPtr doc );
#endif
|