]> git.proxmox.com Git - mirror_edk2.git/blob - MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regparse.h
MdeModulePkg RegularExpressionDxe: Update Oniguruma to 6.9.0
[mirror_edk2.git] / MdeModulePkg / Universal / RegularExpressionDxe / Oniguruma / regparse.h
1 #ifndef REGPARSE_H
2 #define REGPARSE_H
3 /**********************************************************************
4 regparse.h - Oniguruma (regular expression library)
5 **********************************************************************/
6 /*-
7 * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #include "regint.h"
33
34 /* node type */
35 typedef enum {
36 NODE_STRING = 0,
37 NODE_CCLASS = 1,
38 NODE_CTYPE = 2,
39 NODE_BACKREF = 3,
40 NODE_QUANT = 4,
41 NODE_ENCLOSURE = 5,
42 NODE_ANCHOR = 6,
43 NODE_LIST = 7,
44 NODE_ALT = 8,
45 NODE_CALL = 9,
46 NODE_GIMMICK = 10
47 } NodeType;
48
49 enum GimmickType {
50 GIMMICK_FAIL = 0,
51 GIMMICK_KEEP = 1,
52 GIMMICK_SAVE = 2,
53 GIMMICK_UPDATE_VAR = 3,
54 #ifdef USE_CALLOUT
55 GIMMICK_CALLOUT = 4,
56 #endif
57 };
58
59
60 /* node type bit */
61 #define NODE_TYPE2BIT(type) (1<<(type))
62
63 #define NODE_BIT_STRING NODE_TYPE2BIT(NODE_STRING)
64 #define NODE_BIT_CCLASS NODE_TYPE2BIT(NODE_CCLASS)
65 #define NODE_BIT_CTYPE NODE_TYPE2BIT(NODE_CTYPE)
66 #define NODE_BIT_BACKREF NODE_TYPE2BIT(NODE_BACKREF)
67 #define NODE_BIT_QUANT NODE_TYPE2BIT(NODE_QUANT)
68 #define NODE_BIT_ENCLOSURE NODE_TYPE2BIT(NODE_ENCLOSURE)
69 #define NODE_BIT_ANCHOR NODE_TYPE2BIT(NODE_ANCHOR)
70 #define NODE_BIT_LIST NODE_TYPE2BIT(NODE_LIST)
71 #define NODE_BIT_ALT NODE_TYPE2BIT(NODE_ALT)
72 #define NODE_BIT_CALL NODE_TYPE2BIT(NODE_CALL)
73 #define NODE_BIT_GIMMICK NODE_TYPE2BIT(NODE_GIMMICK)
74
75 #define NODE_IS_SIMPLE_TYPE(node) \
76 ((NODE_TYPE2BIT(NODE_TYPE(node)) & \
77 (NODE_BIT_STRING | NODE_BIT_CCLASS | NODE_BIT_CTYPE | NODE_BIT_BACKREF)) != 0)
78
79 #define NODE_TYPE(node) ((node)->u.base.node_type)
80 #define NODE_SET_TYPE(node, ntype) (node)->u.base.node_type = (ntype)
81
82 #define STR_(node) (&((node)->u.str))
83 #define CCLASS_(node) (&((node)->u.cclass))
84 #define CTYPE_(node) (&((node)->u.ctype))
85 #define BACKREF_(node) (&((node)->u.backref))
86 #define QUANT_(node) (&((node)->u.quant))
87 #define ENCLOSURE_(node) (&((node)->u.enclosure))
88 #define ANCHOR_(node) (&((node)->u.anchor))
89 #define CONS_(node) (&((node)->u.cons))
90 #define CALL_(node) (&((node)->u.call))
91 #define GIMMICK_(node) (&((node)->u.gimmick))
92
93 #define NODE_CAR(node) (CONS_(node)->car)
94 #define NODE_CDR(node) (CONS_(node)->cdr)
95
96 #define CTYPE_ANYCHAR -1
97 #define NODE_IS_ANYCHAR(node) \
98 (NODE_TYPE(node) == NODE_CTYPE && CTYPE_(node)->ctype == CTYPE_ANYCHAR)
99
100 #define CTYPE_OPTION(node, reg) \
101 (NODE_IS_FIXED_OPTION(node) ? CTYPE_(node)->options : reg->options)
102
103
104 #define ANCHOR_ANYCHAR_INF_MASK (ANCHOR_ANYCHAR_INF | ANCHOR_ANYCHAR_INF_ML)
105 #define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)
106
107 enum EnclosureType {
108 ENCLOSURE_MEMORY = 0,
109 ENCLOSURE_OPTION = 1,
110 ENCLOSURE_STOP_BACKTRACK = 2,
111 ENCLOSURE_IF_ELSE = 3,
112 };
113
114 #define NODE_STRING_MARGIN 16
115 #define NODE_STRING_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */
116 #define NODE_BACKREFS_SIZE 6
117
118 #define NODE_STRING_RAW (1<<0) /* by backslashed number */
119 #define NODE_STRING_AMBIG (1<<1)
120 #define NODE_STRING_DONT_GET_OPT_INFO (1<<2)
121
122 #define NODE_STRING_LEN(node) (int )((node)->u.str.end - (node)->u.str.s)
123 #define NODE_STRING_SET_RAW(node) (node)->u.str.flag |= NODE_STRING_RAW
124 #define NODE_STRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NODE_STRING_RAW
125 #define NODE_STRING_SET_AMBIG(node) (node)->u.str.flag |= NODE_STRING_AMBIG
126 #define NODE_STRING_SET_DONT_GET_OPT_INFO(node) \
127 (node)->u.str.flag |= NODE_STRING_DONT_GET_OPT_INFO
128 #define NODE_STRING_IS_RAW(node) \
129 (((node)->u.str.flag & NODE_STRING_RAW) != 0)
130 #define NODE_STRING_IS_AMBIG(node) \
131 (((node)->u.str.flag & NODE_STRING_AMBIG) != 0)
132 #define NODE_STRING_IS_DONT_GET_OPT_INFO(node) \
133 (((node)->u.str.flag & NODE_STRING_DONT_GET_OPT_INFO) != 0)
134
135 #define BACKREFS_P(br) \
136 (IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static)
137
138 enum QuantBodyEmpty {
139 QUANT_BODY_IS_NOT_EMPTY = 0,
140 QUANT_BODY_IS_EMPTY = 1,
141 QUANT_BODY_IS_EMPTY_MEM = 2,
142 QUANT_BODY_IS_EMPTY_REC = 3
143 };
144
145 /* node status bits */
146 #define NODE_ST_MIN_FIXED (1<<0)
147 #define NODE_ST_MAX_FIXED (1<<1)
148 #define NODE_ST_CLEN_FIXED (1<<2)
149 #define NODE_ST_MARK1 (1<<3)
150 #define NODE_ST_MARK2 (1<<4)
151 #define NODE_ST_STOP_BT_SIMPLE_REPEAT (1<<5)
152 #define NODE_ST_RECURSION (1<<6)
153 #define NODE_ST_CALLED (1<<7)
154 #define NODE_ST_ADDR_FIXED (1<<8)
155 #define NODE_ST_NAMED_GROUP (1<<9)
156 #define NODE_ST_IN_REAL_REPEAT (1<<10) /* STK_REPEAT is nested in stack. */
157 #define NODE_ST_IN_ZERO_REPEAT (1<<11) /* (....){0} */
158 #define NODE_ST_IN_MULTI_ENTRY (1<<12)
159 #define NODE_ST_NEST_LEVEL (1<<13)
160 #define NODE_ST_BY_NUMBER (1<<14) /* {n,m} */
161 #define NODE_ST_BY_NAME (1<<15) /* backref by name */
162 #define NODE_ST_BACKREF (1<<16)
163 #define NODE_ST_CHECKER (1<<17)
164 #define NODE_ST_FIXED_OPTION (1<<18)
165 #define NODE_ST_PROHIBIT_RECURSION (1<<19)
166 #define NODE_ST_SUPER (1<<20)
167
168
169 #define NODE_STATUS(node) (((Node* )node)->u.base.status)
170 #define NODE_STATUS_ADD(node,f) (NODE_STATUS(node) |= (NODE_ST_ ## f))
171 #define NODE_STATUS_REMOVE(node,f) (NODE_STATUS(node) &= ~(NODE_ST_ ## f))
172
173 #define NODE_IS_BY_NUMBER(node) ((NODE_STATUS(node) & NODE_ST_BY_NUMBER) != 0)
174 #define NODE_IS_IN_REAL_REPEAT(node) ((NODE_STATUS(node) & NODE_ST_IN_REAL_REPEAT) != 0)
175 #define NODE_IS_CALLED(node) ((NODE_STATUS(node) & NODE_ST_CALLED) != 0)
176 #define NODE_IS_IN_MULTI_ENTRY(node) ((NODE_STATUS(node) & NODE_ST_IN_MULTI_ENTRY) != 0)
177 #define NODE_IS_RECURSION(node) ((NODE_STATUS(node) & NODE_ST_RECURSION) != 0)
178 #define NODE_IS_IN_ZERO_REPEAT(node) ((NODE_STATUS(node) & NODE_ST_IN_ZERO_REPEAT) != 0)
179 #define NODE_IS_NAMED_GROUP(node) ((NODE_STATUS(node) & NODE_ST_NAMED_GROUP) != 0)
180 #define NODE_IS_ADDR_FIXED(node) ((NODE_STATUS(node) & NODE_ST_ADDR_FIXED) != 0)
181 #define NODE_IS_CLEN_FIXED(node) ((NODE_STATUS(node) & NODE_ST_CLEN_FIXED) != 0)
182 #define NODE_IS_MIN_FIXED(node) ((NODE_STATUS(node) & NODE_ST_MIN_FIXED) != 0)
183 #define NODE_IS_MAX_FIXED(node) ((NODE_STATUS(node) & NODE_ST_MAX_FIXED) != 0)
184 #define NODE_IS_MARK1(node) ((NODE_STATUS(node) & NODE_ST_MARK1) != 0)
185 #define NODE_IS_MARK2(node) ((NODE_STATUS(node) & NODE_ST_MARK2) != 0)
186 #define NODE_IS_NEST_LEVEL(node) ((NODE_STATUS(node) & NODE_ST_NEST_LEVEL) != 0)
187 #define NODE_IS_BY_NAME(node) ((NODE_STATUS(node) & NODE_ST_BY_NAME) != 0)
188 #define NODE_IS_BACKREF(node) ((NODE_STATUS(node) & NODE_ST_BACKREF) != 0)
189 #define NODE_IS_CHECKER(node) ((NODE_STATUS(node) & NODE_ST_CHECKER) != 0)
190 #define NODE_IS_FIXED_OPTION(node) ((NODE_STATUS(node) & NODE_ST_FIXED_OPTION) != 0)
191 #define NODE_IS_SUPER(node) ((NODE_STATUS(node) & NODE_ST_SUPER) != 0)
192 #define NODE_IS_PROHIBIT_RECURSION(node) \
193 ((NODE_STATUS(node) & NODE_ST_PROHIBIT_RECURSION) != 0)
194 #define NODE_IS_STOP_BT_SIMPLE_REPEAT(node) \
195 ((NODE_STATUS(node) & NODE_ST_STOP_BT_SIMPLE_REPEAT) != 0)
196
197 #define NODE_BODY(node) ((node)->u.base.body)
198 #define NODE_QUANT_BODY(node) ((node)->body)
199 #define NODE_ENCLOSURE_BODY(node) ((node)->body)
200 #define NODE_CALL_BODY(node) ((node)->body)
201 #define NODE_ANCHOR_BODY(node) ((node)->body)
202
203
204 typedef struct {
205 NodeType node_type;
206 int status;
207
208 UChar* s;
209 UChar* end;
210 unsigned int flag;
211 int capa; /* (allocated size - 1) or 0: use buf[] */
212 UChar buf[NODE_STRING_BUF_SIZE];
213 } StrNode;
214
215 typedef struct {
216 NodeType node_type;
217 int status;
218
219 unsigned int flags;
220 BitSet bs;
221 BBuf* mbuf; /* multi-byte info or NULL */
222 } CClassNode;
223
224 typedef struct {
225 NodeType node_type;
226 int status;
227 struct _Node* body;
228
229 int lower;
230 int upper;
231 int greedy;
232 enum QuantBodyEmpty body_empty_info;
233 struct _Node* head_exact;
234 struct _Node* next_head_exact;
235 int is_refered; /* include called node. don't eliminate even if {0} */
236 } QuantNode;
237
238 typedef struct {
239 NodeType node_type;
240 int status;
241 struct _Node* body;
242
243 enum EnclosureType type;
244 union {
245 struct {
246 int regnum;
247 AbsAddrType called_addr;
248 int entry_count;
249 int called_state;
250 } m;
251 struct {
252 OnigOptionType options;
253 } o;
254 struct {
255 /* body is condition */
256 struct _Node* Then;
257 struct _Node* Else;
258 } te;
259 };
260 /* for multiple call reference */
261 OnigLen min_len; /* min length (byte) */
262 OnigLen max_len; /* max length (byte) */
263 int char_len; /* character length */
264 int opt_count; /* referenced count in optimize_nodes() */
265 } EnclosureNode;
266
267 #ifdef USE_CALL
268
269 typedef struct {
270 int offset;
271 struct _Node* target;
272 } UnsetAddr;
273
274 typedef struct {
275 int num;
276 int alloc;
277 UnsetAddr* us;
278 } UnsetAddrList;
279
280 typedef struct {
281 NodeType node_type;
282 int status;
283 struct _Node* body; /* to EnclosureNode : ENCLOSURE_MEMORY */
284
285 int by_number;
286 int group_num;
287 UChar* name;
288 UChar* name_end;
289 int entry_count;
290 } CallNode;
291
292 #endif
293
294 typedef struct {
295 NodeType node_type;
296 int status;
297
298 int back_num;
299 int back_static[NODE_BACKREFS_SIZE];
300 int* back_dynamic;
301 int nest_level;
302 } BackRefNode;
303
304 typedef struct {
305 NodeType node_type;
306 int status;
307 struct _Node* body;
308
309 int type;
310 int char_len;
311 int ascii_mode;
312 } AnchorNode;
313
314 typedef struct {
315 NodeType node_type;
316 int status;
317
318 struct _Node* car;
319 struct _Node* cdr;
320 } ConsAltNode;
321
322 typedef struct {
323 NodeType node_type;
324 int status;
325
326 int ctype;
327 int not;
328 OnigOptionType options;
329 int ascii_mode;
330 } CtypeNode;
331
332 typedef struct {
333 NodeType node_type;
334 int status;
335
336 enum GimmickType type;
337 int detail_type;
338 int num;
339 int id;
340 } GimmickNode;
341
342 typedef struct _Node {
343 union {
344 struct {
345 NodeType node_type;
346 int status;
347 struct _Node* body;
348 } base;
349
350 StrNode str;
351 CClassNode cclass;
352 QuantNode quant;
353 EnclosureNode enclosure;
354 BackRefNode backref;
355 AnchorNode anchor;
356 ConsAltNode cons;
357 CtypeNode ctype;
358 #ifdef USE_CALL
359 CallNode call;
360 #endif
361 GimmickNode gimmick;
362 } u;
363 } Node;
364
365
366 #define NULL_NODE ((Node* )0)
367
368 #define SCANENV_MEMENV_SIZE 8
369 #define SCANENV_MEMENV(senv) \
370 (IS_NOT_NULL((senv)->mem_env_dynamic) ? \
371 (senv)->mem_env_dynamic : (senv)->mem_env_static)
372
373 typedef struct {
374 Node* node;
375 #if 0
376 int in;
377 int recursion;
378 #endif
379 } MemEnv;
380
381 typedef struct {
382 enum SaveType type;
383 } SaveItem;
384
385 typedef struct {
386 OnigOptionType options;
387 OnigCaseFoldType case_fold_flag;
388 OnigEncoding enc;
389 OnigSyntaxType* syntax;
390 MemStatusType capture_history;
391 MemStatusType bt_mem_start;
392 MemStatusType bt_mem_end;
393 MemStatusType backrefed_mem;
394 UChar* pattern;
395 UChar* pattern_end;
396 UChar* error;
397 UChar* error_end;
398 regex_t* reg; /* for reg->names only */
399 int num_call;
400 #ifdef USE_CALL
401 UnsetAddrList* unset_addr_list;
402 int has_call_zero;
403 #endif
404 int num_mem;
405 int num_named;
406 int mem_alloc;
407 MemEnv mem_env_static[SCANENV_MEMENV_SIZE];
408 MemEnv* mem_env_dynamic;
409 unsigned int parse_depth;
410
411 int keep_num;
412 int save_num;
413 int save_alloc_num;
414 SaveItem* saves;
415 } ScanEnv;
416
417
418 #define IS_SYNTAX_OP(syn, opm) (((syn)->op & (opm)) != 0)
419 #define IS_SYNTAX_OP2(syn, opm) (((syn)->op2 & (opm)) != 0)
420 #define IS_SYNTAX_BV(syn, bvm) (((syn)->behavior & (bvm)) != 0)
421
422 typedef struct {
423 int new_val;
424 } GroupNumRemap;
425
426 extern int onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map));
427
428 extern int onig_strncmp P_((const UChar* s1, const UChar* s2, int n));
429 extern void onig_strcpy P_((UChar* dest, const UChar* src, const UChar* end));
430 extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));
431 extern int onig_scan_unsigned_number P_((UChar** src, const UChar* end, OnigEncoding enc));
432 extern void onig_reduce_nested_quantifier P_((Node* pnode, Node* cnode));
433 extern void onig_node_conv_to_str_node P_((Node* node, int raw));
434 extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end));
435 extern int onig_node_str_set P_((Node* node, const UChar* s, const UChar* end));
436 extern void onig_node_free P_((Node* node));
437 extern Node* onig_node_new_enclosure P_((int type));
438 extern Node* onig_node_new_anchor P_((int type, int ascii_mode));
439 extern Node* onig_node_new_str P_((const UChar* s, const UChar* end));
440 extern Node* onig_node_new_list P_((Node* left, Node* right));
441 extern Node* onig_node_list_add P_((Node* list, Node* x));
442 extern Node* onig_node_new_alt P_((Node* left, Node* right));
443 extern void onig_node_str_clear P_((Node* node));
444 extern int onig_names_free P_((regex_t* reg));
445 extern int onig_parse_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));
446 extern int onig_free_shared_cclass_table P_((void));
447 extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
448 extern OnigLen onig_get_tiny_min_len(Node* node, unsigned int inhibit_node_types, int* invalid_node);
449
450 #ifdef USE_CALLOUT
451 extern int onig_global_callout_names_free(void);
452 #endif
453
454 #ifdef ONIG_DEBUG
455 extern int onig_print_names(FILE*, regex_t*);
456 #endif
457
458 #endif /* REGPARSE_H */