]> git.proxmox.com Git - mirror_edk2.git/blob - MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regparse.h
MdeModulePkg RegularExpressionDxe: Update Oniguruma from v6.9.0 to v6.9.3
[mirror_edk2.git] / MdeModulePkg / Universal / RegularExpressionDxe / Oniguruma / regparse.h
1 #ifndef REGPARSE_H
2 #define REGPARSE_H
3 /**********************************************************************
4 regparse.h - Oniguruma (regular expression library)
5 **********************************************************************/
6 /*-
7 * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #include "regint.h"
33
34 #define NODE_STRING_MARGIN 16
35 #define NODE_STRING_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */
36 #define NODE_BACKREFS_SIZE 6
37
38 /* node type */
39 typedef enum {
40 NODE_STRING = 0,
41 NODE_CCLASS = 1,
42 NODE_CTYPE = 2,
43 NODE_BACKREF = 3,
44 NODE_QUANT = 4,
45 NODE_BAG = 5,
46 NODE_ANCHOR = 6,
47 NODE_LIST = 7,
48 NODE_ALT = 8,
49 NODE_CALL = 9,
50 NODE_GIMMICK = 10
51 } NodeType;
52
53 enum BagType {
54 BAG_MEMORY = 0,
55 BAG_OPTION = 1,
56 BAG_STOP_BACKTRACK = 2,
57 BAG_IF_ELSE = 3,
58 };
59
60 enum GimmickType {
61 GIMMICK_FAIL = 0,
62 GIMMICK_SAVE = 1,
63 GIMMICK_UPDATE_VAR = 2,
64 #ifdef USE_CALLOUT
65 GIMMICK_CALLOUT = 3,
66 #endif
67 };
68
69 enum BodyEmptyType {
70 BODY_IS_NOT_EMPTY = 0,
71 BODY_IS_EMPTY_POSSIBILITY = 1,
72 BODY_IS_EMPTY_POSSIBILITY_MEM = 2,
73 BODY_IS_EMPTY_POSSIBILITY_REC = 3
74 };
75
76 typedef struct {
77 NodeType node_type;
78 int status;
79
80 UChar* s;
81 UChar* end;
82 unsigned int flag;
83 int capacity; /* (allocated size - 1) or 0: use buf[] */
84 UChar buf[NODE_STRING_BUF_SIZE];
85 } StrNode;
86
87 typedef struct {
88 NodeType node_type;
89 int status;
90
91 unsigned int flags;
92 BitSet bs;
93 BBuf* mbuf; /* multi-byte info or NULL */
94 } CClassNode;
95
96 typedef struct {
97 NodeType node_type;
98 int status;
99 struct _Node* body;
100
101 int lower;
102 int upper;
103 int greedy;
104 enum BodyEmptyType emptiness;
105 struct _Node* head_exact;
106 struct _Node* next_head_exact;
107 int is_refered; /* include called node. don't eliminate even if {0} */
108 } QuantNode;
109
110 typedef struct {
111 NodeType node_type;
112 int status;
113 struct _Node* body;
114
115 enum BagType type;
116 union {
117 struct {
118 int regnum;
119 AbsAddrType called_addr;
120 int entry_count;
121 int called_state;
122 } m;
123 struct {
124 OnigOptionType options;
125 } o;
126 struct {
127 /* body is condition */
128 struct _Node* Then;
129 struct _Node* Else;
130 } te;
131 };
132 /* for multiple call reference */
133 OnigLen min_len; /* min length (byte) */
134 OnigLen max_len; /* max length (byte) */
135 int char_len; /* character length */
136 int opt_count; /* referenced count in optimize_nodes() */
137 } BagNode;
138
139 #ifdef USE_CALL
140
141 typedef struct {
142 int offset;
143 struct _Node* target;
144 } UnsetAddr;
145
146 typedef struct {
147 int num;
148 int alloc;
149 UnsetAddr* us;
150 } UnsetAddrList;
151
152 typedef struct {
153 NodeType node_type;
154 int status;
155 struct _Node* body; /* to BagNode : BAG_MEMORY */
156
157 int by_number;
158 int group_num;
159 UChar* name;
160 UChar* name_end;
161 int entry_count;
162 } CallNode;
163
164 #endif
165
166 typedef struct {
167 NodeType node_type;
168 int status;
169
170 int back_num;
171 int back_static[NODE_BACKREFS_SIZE];
172 int* back_dynamic;
173 int nest_level;
174 } BackRefNode;
175
176 typedef struct {
177 NodeType node_type;
178 int status;
179 struct _Node* body;
180
181 int type;
182 int char_len;
183 int ascii_mode;
184 } AnchorNode;
185
186 typedef struct {
187 NodeType node_type;
188 int status;
189
190 struct _Node* car;
191 struct _Node* cdr;
192 } ConsAltNode;
193
194 typedef struct {
195 NodeType node_type;
196 int status;
197
198 int ctype;
199 int not;
200 OnigOptionType options;
201 int ascii_mode;
202 } CtypeNode;
203
204 typedef struct {
205 NodeType node_type;
206 int status;
207
208 enum GimmickType type;
209 int detail_type;
210 int num;
211 int id;
212 } GimmickNode;
213
214 typedef struct _Node {
215 union {
216 struct {
217 NodeType node_type;
218 int status;
219 struct _Node* body;
220 } base;
221
222 StrNode str;
223 CClassNode cclass;
224 QuantNode quant;
225 BagNode bag;
226 BackRefNode backref;
227 AnchorNode anchor;
228 ConsAltNode cons;
229 CtypeNode ctype;
230 #ifdef USE_CALL
231 CallNode call;
232 #endif
233 GimmickNode gimmick;
234 } u;
235 } Node;
236
237 #define NULL_NODE ((Node* )0)
238
239
240 /* node type bit */
241 #define NODE_TYPE2BIT(type) (1<<(type))
242
243 #define NODE_BIT_STRING NODE_TYPE2BIT(NODE_STRING)
244 #define NODE_BIT_CCLASS NODE_TYPE2BIT(NODE_CCLASS)
245 #define NODE_BIT_CTYPE NODE_TYPE2BIT(NODE_CTYPE)
246 #define NODE_BIT_BACKREF NODE_TYPE2BIT(NODE_BACKREF)
247 #define NODE_BIT_QUANT NODE_TYPE2BIT(NODE_QUANT)
248 #define NODE_BIT_BAG NODE_TYPE2BIT(NODE_BAG)
249 #define NODE_BIT_ANCHOR NODE_TYPE2BIT(NODE_ANCHOR)
250 #define NODE_BIT_LIST NODE_TYPE2BIT(NODE_LIST)
251 #define NODE_BIT_ALT NODE_TYPE2BIT(NODE_ALT)
252 #define NODE_BIT_CALL NODE_TYPE2BIT(NODE_CALL)
253 #define NODE_BIT_GIMMICK NODE_TYPE2BIT(NODE_GIMMICK)
254
255 #define NODE_TYPE(node) ((node)->u.base.node_type)
256 #define NODE_SET_TYPE(node, ntype) (node)->u.base.node_type = (ntype)
257
258 #define STR_(node) (&((node)->u.str))
259 #define CCLASS_(node) (&((node)->u.cclass))
260 #define CTYPE_(node) (&((node)->u.ctype))
261 #define BACKREF_(node) (&((node)->u.backref))
262 #define QUANT_(node) (&((node)->u.quant))
263 #define BAG_(node) (&((node)->u.bag))
264 #define ANCHOR_(node) (&((node)->u.anchor))
265 #define CONS_(node) (&((node)->u.cons))
266 #define CALL_(node) (&((node)->u.call))
267 #define GIMMICK_(node) (&((node)->u.gimmick))
268
269 #define NODE_CAR(node) (CONS_(node)->car)
270 #define NODE_CDR(node) (CONS_(node)->cdr)
271
272 #define CTYPE_ANYCHAR -1
273 #define NODE_IS_ANYCHAR(node) \
274 (NODE_TYPE(node) == NODE_CTYPE && CTYPE_(node)->ctype == CTYPE_ANYCHAR)
275
276 #define CTYPE_OPTION(node, reg) \
277 (NODE_IS_FIXED_OPTION(node) ? CTYPE_(node)->options : reg->options)
278
279
280 #define ANCR_ANYCHAR_INF_MASK (ANCR_ANYCHAR_INF | ANCR_ANYCHAR_INF_ML)
281 #define ANCR_END_BUF_MASK (ANCR_END_BUF | ANCR_SEMI_END_BUF)
282
283 #define NODE_STRING_RAW (1<<0) /* by backslashed number */
284 #define NODE_STRING_AMBIG (1<<1)
285 #define NODE_STRING_GOOD_AMBIG (1<<2)
286 #define NODE_STRING_DONT_GET_OPT_INFO (1<<3)
287
288 #define NODE_STRING_LEN(node) (int )((node)->u.str.end - (node)->u.str.s)
289 #define NODE_STRING_SET_RAW(node) (node)->u.str.flag |= NODE_STRING_RAW
290 #define NODE_STRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NODE_STRING_RAW
291 #define NODE_STRING_SET_AMBIG(node) (node)->u.str.flag |= NODE_STRING_AMBIG
292 #define NODE_STRING_SET_GOOD_AMBIG(node) (node)->u.str.flag |= NODE_STRING_GOOD_AMBIG
293 #define NODE_STRING_SET_DONT_GET_OPT_INFO(node) \
294 (node)->u.str.flag |= NODE_STRING_DONT_GET_OPT_INFO
295 #define NODE_STRING_IS_RAW(node) \
296 (((node)->u.str.flag & NODE_STRING_RAW) != 0)
297 #define NODE_STRING_IS_AMBIG(node) \
298 (((node)->u.str.flag & NODE_STRING_AMBIG) != 0)
299 #define NODE_STRING_IS_GOOD_AMBIG(node) \
300 (((node)->u.str.flag & NODE_STRING_GOOD_AMBIG) != 0)
301 #define NODE_STRING_IS_DONT_GET_OPT_INFO(node) \
302 (((node)->u.str.flag & NODE_STRING_DONT_GET_OPT_INFO) != 0)
303
304 #define BACKREFS_P(br) \
305 (IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static)
306
307 /* node status bits */
308 #define NODE_ST_MIN_FIXED (1<<0)
309 #define NODE_ST_MAX_FIXED (1<<1)
310 #define NODE_ST_CLEN_FIXED (1<<2)
311 #define NODE_ST_MARK1 (1<<3)
312 #define NODE_ST_MARK2 (1<<4)
313 #define NODE_ST_STRICT_REAL_REPEAT (1<<5)
314 #define NODE_ST_RECURSION (1<<6)
315 #define NODE_ST_CALLED (1<<7)
316 #define NODE_ST_ADDR_FIXED (1<<8)
317 #define NODE_ST_NAMED_GROUP (1<<9)
318 #define NODE_ST_IN_REAL_REPEAT (1<<10) /* STK_REPEAT is nested in stack. */
319 #define NODE_ST_IN_ZERO_REPEAT (1<<11) /* (....){0} */
320 #define NODE_ST_IN_MULTI_ENTRY (1<<12)
321 #define NODE_ST_NEST_LEVEL (1<<13)
322 #define NODE_ST_BY_NUMBER (1<<14) /* {n,m} */
323 #define NODE_ST_BY_NAME (1<<15) /* backref by name */
324 #define NODE_ST_BACKREF (1<<16)
325 #define NODE_ST_CHECKER (1<<17)
326 #define NODE_ST_FIXED_OPTION (1<<18)
327 #define NODE_ST_PROHIBIT_RECURSION (1<<19)
328 #define NODE_ST_SUPER (1<<20)
329
330
331 #define NODE_STATUS(node) (((Node* )node)->u.base.status)
332 #define NODE_STATUS_ADD(node,f) (NODE_STATUS(node) |= (NODE_ST_ ## f))
333 #define NODE_STATUS_REMOVE(node,f) (NODE_STATUS(node) &= ~(NODE_ST_ ## f))
334
335 #define NODE_IS_BY_NUMBER(node) ((NODE_STATUS(node) & NODE_ST_BY_NUMBER) != 0)
336 #define NODE_IS_IN_REAL_REPEAT(node) ((NODE_STATUS(node) & NODE_ST_IN_REAL_REPEAT) != 0)
337 #define NODE_IS_CALLED(node) ((NODE_STATUS(node) & NODE_ST_CALLED) != 0)
338 #define NODE_IS_IN_MULTI_ENTRY(node) ((NODE_STATUS(node) & NODE_ST_IN_MULTI_ENTRY) != 0)
339 #define NODE_IS_RECURSION(node) ((NODE_STATUS(node) & NODE_ST_RECURSION) != 0)
340 #define NODE_IS_IN_ZERO_REPEAT(node) ((NODE_STATUS(node) & NODE_ST_IN_ZERO_REPEAT) != 0)
341 #define NODE_IS_NAMED_GROUP(node) ((NODE_STATUS(node) & NODE_ST_NAMED_GROUP) != 0)
342 #define NODE_IS_ADDR_FIXED(node) ((NODE_STATUS(node) & NODE_ST_ADDR_FIXED) != 0)
343 #define NODE_IS_CLEN_FIXED(node) ((NODE_STATUS(node) & NODE_ST_CLEN_FIXED) != 0)
344 #define NODE_IS_MIN_FIXED(node) ((NODE_STATUS(node) & NODE_ST_MIN_FIXED) != 0)
345 #define NODE_IS_MAX_FIXED(node) ((NODE_STATUS(node) & NODE_ST_MAX_FIXED) != 0)
346 #define NODE_IS_MARK1(node) ((NODE_STATUS(node) & NODE_ST_MARK1) != 0)
347 #define NODE_IS_MARK2(node) ((NODE_STATUS(node) & NODE_ST_MARK2) != 0)
348 #define NODE_IS_NEST_LEVEL(node) ((NODE_STATUS(node) & NODE_ST_NEST_LEVEL) != 0)
349 #define NODE_IS_BY_NAME(node) ((NODE_STATUS(node) & NODE_ST_BY_NAME) != 0)
350 #define NODE_IS_BACKREF(node) ((NODE_STATUS(node) & NODE_ST_BACKREF) != 0)
351 #define NODE_IS_CHECKER(node) ((NODE_STATUS(node) & NODE_ST_CHECKER) != 0)
352 #define NODE_IS_FIXED_OPTION(node) ((NODE_STATUS(node) & NODE_ST_FIXED_OPTION) != 0)
353 #define NODE_IS_SUPER(node) ((NODE_STATUS(node) & NODE_ST_SUPER) != 0)
354 #define NODE_IS_PROHIBIT_RECURSION(node) \
355 ((NODE_STATUS(node) & NODE_ST_PROHIBIT_RECURSION) != 0)
356 #define NODE_IS_STRICT_REAL_REPEAT(node) \
357 ((NODE_STATUS(node) & NODE_ST_STRICT_REAL_REPEAT) != 0)
358
359 #define NODE_BODY(node) ((node)->u.base.body)
360 #define NODE_QUANT_BODY(node) ((node)->body)
361 #define NODE_BAG_BODY(node) ((node)->body)
362 #define NODE_CALL_BODY(node) ((node)->body)
363 #define NODE_ANCHOR_BODY(node) ((node)->body)
364
365 #define SCANENV_MEMENV_SIZE 8
366 #define SCANENV_MEMENV(senv) \
367 (IS_NOT_NULL((senv)->mem_env_dynamic) ? \
368 (senv)->mem_env_dynamic : (senv)->mem_env_static)
369
370 typedef struct {
371 Node* node;
372 #if 0
373 int in;
374 int recursion;
375 #endif
376 } MemEnv;
377
378 typedef struct {
379 enum SaveType type;
380 } SaveItem;
381
382 typedef struct {
383 OnigOptionType options;
384 OnigCaseFoldType case_fold_flag;
385 OnigEncoding enc;
386 OnigSyntaxType* syntax;
387 MemStatusType capture_history;
388 MemStatusType bt_mem_start;
389 MemStatusType bt_mem_end;
390 MemStatusType backrefed_mem;
391 UChar* pattern;
392 UChar* pattern_end;
393 UChar* error;
394 UChar* error_end;
395 regex_t* reg; /* for reg->names only */
396 int num_call;
397 #ifdef USE_CALL
398 UnsetAddrList* unset_addr_list;
399 int has_call_zero;
400 #endif
401 int num_mem;
402 int num_named;
403 int mem_alloc;
404 MemEnv mem_env_static[SCANENV_MEMENV_SIZE];
405 MemEnv* mem_env_dynamic;
406 unsigned int parse_depth;
407
408 int keep_num;
409 int save_num;
410 int save_alloc_num;
411 SaveItem* saves;
412 } ScanEnv;
413
414
415 #define IS_SYNTAX_OP(syn, opm) (((syn)->op & (opm)) != 0)
416 #define IS_SYNTAX_OP2(syn, opm) (((syn)->op2 & (opm)) != 0)
417 #define IS_SYNTAX_BV(syn, bvm) (((syn)->behavior & (bvm)) != 0)
418
419 typedef struct {
420 int new_val;
421 } GroupNumRemap;
422
423 extern int onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map));
424
425 extern int onig_strncmp P_((const UChar* s1, const UChar* s2, int n));
426 extern void onig_strcpy P_((UChar* dest, const UChar* src, const UChar* end));
427 extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));
428 extern int onig_scan_unsigned_number P_((UChar** src, const UChar* end, OnigEncoding enc));
429 extern void onig_reduce_nested_quantifier P_((Node* pnode, Node* cnode));
430 extern void onig_node_conv_to_str_node P_((Node* node, int raw));
431 extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end));
432 extern int onig_node_str_set P_((Node* node, const UChar* s, const UChar* end));
433 extern void onig_node_free P_((Node* node));
434 extern Node* onig_node_new_bag P_((enum BagType type));
435 extern Node* onig_node_new_anchor P_((int type, int ascii_mode));
436 extern Node* onig_node_new_str P_((const UChar* s, const UChar* end));
437 extern Node* onig_node_new_list P_((Node* left, Node* right));
438 extern Node* onig_node_list_add P_((Node* list, Node* x));
439 extern Node* onig_node_new_alt P_((Node* left, Node* right));
440 extern void onig_node_str_clear P_((Node* node));
441 extern int onig_names_free P_((regex_t* reg));
442 extern int onig_parse_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));
443 extern int onig_free_shared_cclass_table P_((void));
444 extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
445 extern OnigLen onig_get_tiny_min_len(Node* node, unsigned int inhibit_node_types, int* invalid_node);
446
447 #ifdef USE_CALLOUT
448 extern int onig_global_callout_names_free(void);
449 #endif
450
451 #ifdef ONIG_DEBUG
452 extern int onig_print_names(FILE*, regex_t*);
453 #endif
454
455 #endif /* REGPARSE_H */