1 /**********************************************************************
2 regsyntax.c - Oniguruma (regular expression library)
3 **********************************************************************/
5 * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 OnigSyntaxType OnigSyntaxASIS
= {
34 , ONIG_SYN_OP2_INEFFECTIVE_ESCAPE
39 (OnigCodePoint
)'\\' /* esc */
40 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar '.' */
41 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anytime '*' */
42 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* zero or one time '?' */
43 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* one or more time '+' */
44 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar anytime */
48 OnigSyntaxType OnigSyntaxPosixBasic
= {
49 ( SYN_POSIX_COMMON_OP
| ONIG_SYN_OP_ESC_LPAREN_SUBEXP
|
50 ONIG_SYN_OP_ESC_BRACE_INTERVAL
)
53 , ( ONIG_OPTION_SINGLELINE
| ONIG_OPTION_MULTILINE
)
56 (OnigCodePoint
)'\\' /* esc */
57 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar '.' */
58 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anytime '*' */
59 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* zero or one time '?' */
60 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* one or more time '+' */
61 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar anytime */
65 OnigSyntaxType OnigSyntaxPosixExtended
= {
66 ( SYN_POSIX_COMMON_OP
| ONIG_SYN_OP_LPAREN_SUBEXP
|
67 ONIG_SYN_OP_BRACE_INTERVAL
|
68 ONIG_SYN_OP_PLUS_ONE_INF
| ONIG_SYN_OP_QMARK_ZERO_ONE
| ONIG_SYN_OP_VBAR_ALT
)
70 , ( ONIG_SYN_CONTEXT_INDEP_ANCHORS
|
71 ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS
| ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS
|
72 ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP
|
73 ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC
)
74 , ( ONIG_OPTION_SINGLELINE
| ONIG_OPTION_MULTILINE
)
77 (OnigCodePoint
)'\\' /* esc */
78 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar '.' */
79 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anytime '*' */
80 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* zero or one time '?' */
81 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* one or more time '+' */
82 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar anytime */
86 OnigSyntaxType OnigSyntaxEmacs
= {
87 ( ONIG_SYN_OP_DOT_ANYCHAR
| ONIG_SYN_OP_BRACKET_CC
|
88 ONIG_SYN_OP_ESC_BRACE_INTERVAL
|
89 ONIG_SYN_OP_ESC_LPAREN_SUBEXP
| ONIG_SYN_OP_ESC_VBAR_ALT
|
90 ONIG_SYN_OP_ASTERISK_ZERO_INF
| ONIG_SYN_OP_PLUS_ONE_INF
|
91 ONIG_SYN_OP_QMARK_ZERO_ONE
| ONIG_SYN_OP_DECIMAL_BACKREF
|
92 ONIG_SYN_OP_LINE_ANCHOR
| ONIG_SYN_OP_ESC_CONTROL_CHARS
)
93 , ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR
94 , ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC
98 (OnigCodePoint
)'\\' /* esc */
99 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar '.' */
100 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anytime '*' */
101 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* zero or one time '?' */
102 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* one or more time '+' */
103 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar anytime */
107 OnigSyntaxType OnigSyntaxGrep
= {
108 ( ONIG_SYN_OP_DOT_ANYCHAR
| ONIG_SYN_OP_BRACKET_CC
| ONIG_SYN_OP_POSIX_BRACKET
|
109 ONIG_SYN_OP_ESC_BRACE_INTERVAL
| ONIG_SYN_OP_ESC_LPAREN_SUBEXP
|
110 ONIG_SYN_OP_ESC_VBAR_ALT
|
111 ONIG_SYN_OP_ASTERISK_ZERO_INF
| ONIG_SYN_OP_ESC_PLUS_ONE_INF
|
112 ONIG_SYN_OP_ESC_QMARK_ZERO_ONE
| ONIG_SYN_OP_LINE_ANCHOR
|
113 ONIG_SYN_OP_ESC_W_WORD
| ONIG_SYN_OP_ESC_B_WORD_BOUND
|
114 ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END
| ONIG_SYN_OP_DECIMAL_BACKREF
)
116 , ( ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC
| ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC
)
120 (OnigCodePoint
)'\\' /* esc */
121 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar '.' */
122 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anytime '*' */
123 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* zero or one time '?' */
124 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* one or more time '+' */
125 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar anytime */
129 OnigSyntaxType OnigSyntaxGnuRegex
= {
136 (OnigCodePoint
)'\\' /* esc */
137 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar '.' */
138 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anytime '*' */
139 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* zero or one time '?' */
140 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* one or more time '+' */
141 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar anytime */
145 OnigSyntaxType OnigSyntaxJava
= {
146 (( SYN_GNU_REGEX_OP
| ONIG_SYN_OP_QMARK_NON_GREEDY
|
147 ONIG_SYN_OP_ESC_CONTROL_CHARS
| ONIG_SYN_OP_ESC_C_CONTROL
|
148 ONIG_SYN_OP_ESC_OCTAL3
| ONIG_SYN_OP_ESC_X_HEX2
)
149 & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END
)
150 , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE
| ONIG_SYN_OP2_QMARK_GROUP_EFFECT
|
151 ONIG_SYN_OP2_OPTION_PERL
| ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT
|
152 ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL
| ONIG_SYN_OP2_CCLASS_SET_OP
|
153 ONIG_SYN_OP2_ESC_V_VTAB
| ONIG_SYN_OP2_ESC_U_HEX4
|
154 ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY
)
155 , ( SYN_GNU_REGEX_BV
| ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND
)
156 , ONIG_OPTION_SINGLELINE
159 (OnigCodePoint
)'\\' /* esc */
160 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar '.' */
161 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anytime '*' */
162 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* zero or one time '?' */
163 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* one or more time '+' */
164 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar anytime */
168 OnigSyntaxType OnigSyntaxPerl
= {
169 (( SYN_GNU_REGEX_OP
| ONIG_SYN_OP_QMARK_NON_GREEDY
|
170 ONIG_SYN_OP_ESC_OCTAL3
| ONIG_SYN_OP_ESC_X_HEX2
|
171 ONIG_SYN_OP_ESC_X_BRACE_HEX8
| ONIG_SYN_OP_ESC_O_BRACE_OCTAL
|
172 ONIG_SYN_OP_ESC_CONTROL_CHARS
|
173 ONIG_SYN_OP_ESC_C_CONTROL
)
174 & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END
)
175 , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE
|
176 ONIG_SYN_OP2_QMARK_GROUP_EFFECT
| ONIG_SYN_OP2_OPTION_PERL
|
177 ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE
|
178 ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP
|
179 ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS
|
180 ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME
|
181 ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER
|
182 ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY
|
183 ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT
|
184 ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP
|
185 ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE
|
186 ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT
)
188 , ONIG_OPTION_SINGLELINE
191 (OnigCodePoint
)'\\' /* esc */
192 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar '.' */
193 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anytime '*' */
194 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* zero or one time '?' */
195 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* one or more time '+' */
196 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar anytime */
200 /* Perl + named group */
201 OnigSyntaxType OnigSyntaxPerl_NG
= {
202 (( SYN_GNU_REGEX_OP
| ONIG_SYN_OP_QMARK_NON_GREEDY
|
203 ONIG_SYN_OP_ESC_OCTAL3
| ONIG_SYN_OP_ESC_X_HEX2
|
204 ONIG_SYN_OP_ESC_X_BRACE_HEX8
| ONIG_SYN_OP_ESC_O_BRACE_OCTAL
|
205 ONIG_SYN_OP_ESC_CONTROL_CHARS
|
206 ONIG_SYN_OP_ESC_C_CONTROL
)
207 & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END
)
208 , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE
|
209 ONIG_SYN_OP2_QMARK_GROUP_EFFECT
| ONIG_SYN_OP2_OPTION_PERL
|
210 ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE
|
211 ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP
|
212 ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS
|
213 ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME
|
214 ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER
|
215 ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY
|
216 ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT
|
217 ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP
|
218 ONIG_SYN_OP2_ESC_K_NAMED_BACKREF
|
219 ONIG_SYN_OP2_ESC_G_SUBEXP_CALL
|
220 ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP
|
221 ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE
|
222 ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT
|
223 ONIG_SYN_OP2_QMARK_PERL_SUBEXP_CALL
)
224 , ( SYN_GNU_REGEX_BV
|
225 ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP
|
226 ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME
)
227 , ONIG_OPTION_SINGLELINE
230 (OnigCodePoint
)'\\' /* esc */
231 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar '.' */
232 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anytime '*' */
233 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* zero or one time '?' */
234 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* one or more time '+' */
235 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar anytime */
242 onig_set_default_syntax(OnigSyntaxType
* syntax
)
245 syntax
= ONIG_SYNTAX_ONIGURUMA
;
247 OnigDefaultSyntax
= syntax
;
252 onig_copy_syntax(OnigSyntaxType
* to
, OnigSyntaxType
* from
)
258 onig_set_syntax_op(OnigSyntaxType
* syntax
, unsigned int op
)
264 onig_set_syntax_op2(OnigSyntaxType
* syntax
, unsigned int op2
)
270 onig_set_syntax_behavior(OnigSyntaxType
* syntax
, unsigned int behavior
)
272 syntax
->behavior
= behavior
;
276 onig_set_syntax_options(OnigSyntaxType
* syntax
, OnigOptionType options
)
278 syntax
->options
= options
;
282 onig_get_syntax_op(OnigSyntaxType
* syntax
)
288 onig_get_syntax_op2(OnigSyntaxType
* syntax
)
294 onig_get_syntax_behavior(OnigSyntaxType
* syntax
)
296 return syntax
->behavior
;
299 extern OnigOptionType
300 onig_get_syntax_options(OnigSyntaxType
* syntax
)
302 return syntax
->options
;
305 #ifdef USE_VARIABLE_META_CHARS
306 extern int onig_set_meta_char(OnigSyntaxType
* enc
,
307 unsigned int what
, OnigCodePoint code
)
310 case ONIG_META_CHAR_ESCAPE
:
311 enc
->meta_char_table
.esc
= code
;
313 case ONIG_META_CHAR_ANYCHAR
:
314 enc
->meta_char_table
.anychar
= code
;
316 case ONIG_META_CHAR_ANYTIME
:
317 enc
->meta_char_table
.anytime
= code
;
319 case ONIG_META_CHAR_ZERO_OR_ONE_TIME
:
320 enc
->meta_char_table
.zero_or_one_time
= code
;
322 case ONIG_META_CHAR_ONE_OR_MORE_TIME
:
323 enc
->meta_char_table
.one_or_more_time
= code
;
325 case ONIG_META_CHAR_ANYCHAR_ANYTIME
:
326 enc
->meta_char_table
.anychar_anytime
= code
;
329 return ONIGERR_INVALID_ARGUMENT
;
334 #endif /* USE_VARIABLE_META_CHARS */