]>
git.proxmox.com Git - ovs.git/blob - ovn/lib/lex.c
2 * Copyright (c) 2015 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
22 #include "dynamic-string.h"
26 /* Returns a string that represents 'format'. */
28 lex_format_to_string(enum lex_format format
)
33 case LEX_F_HEXADECIMAL
:
46 /* Initializes 'token'. */
48 lex_token_init(struct lex_token
*token
)
50 token
->type
= LEX_T_END
;
54 /* Frees memory owned by 'token'. */
56 lex_token_destroy(struct lex_token
*token
)
61 /* Exchanges 'a' and 'b'. */
63 lex_token_swap(struct lex_token
*a
, struct lex_token
*b
)
65 struct lex_token tmp
= *a
;
70 /* lex_token_format(). */
73 lex_token_n_zeros(enum lex_format format
)
76 case LEX_F_DECIMAL
: return offsetof(union mf_subvalue
, integer
);
77 case LEX_F_HEXADECIMAL
: return 0;
78 case LEX_F_IPV4
: return offsetof(union mf_subvalue
, ipv4
);
79 case LEX_F_IPV6
: return offsetof(union mf_subvalue
, ipv6
);
80 case LEX_F_ETHERNET
: return offsetof(union mf_subvalue
, mac
);
81 default: OVS_NOT_REACHED();
85 /* Returns the effective format for 'token', that is, the format in which it
86 * should actually be printed. This is ordinarily the same as 'token->format',
87 * but it's always possible that someone sets up a token with a format that
88 * won't work for a value, e.g. 'token->value' is wider than 32 bits but the
89 * format is LEX_F_IPV4. (The lexer itself won't do that; this is an attempt
90 * to avoid confusion in the future.) */
91 static enum lex_format
92 lex_token_get_format(const struct lex_token
*token
)
94 size_t n_zeros
= lex_token_n_zeros(token
->format
);
95 return (is_all_zeros(&token
->value
, n_zeros
)
96 && (token
->type
!= LEX_T_MASKED_INTEGER
97 || is_all_zeros(&token
->mask
, n_zeros
))
103 lex_token_format_value(const union mf_subvalue
*value
,
104 enum lex_format format
, struct ds
*s
)
108 ds_put_format(s
, "%"PRIu64
, ntohll(value
->integer
));
111 case LEX_F_HEXADECIMAL
:
112 mf_format_subvalue(value
, s
);
116 ds_put_format(s
, IP_FMT
, IP_ARGS(value
->ipv4
));
120 print_ipv6_addr(s
, &value
->ipv6
);
124 ds_put_format(s
, ETH_ADDR_FMT
, ETH_ADDR_ARGS(value
->mac
));
134 lex_token_format_masked_integer(const struct lex_token
*token
, struct ds
*s
)
136 enum lex_format format
= lex_token_get_format(token
);
138 lex_token_format_value(&token
->value
, format
, s
);
141 const union mf_subvalue
*mask
= &token
->mask
;
142 if (format
== LEX_F_IPV4
&& ip_is_cidr(mask
->ipv4
)) {
143 ds_put_format(s
, "%d", ip_count_cidr_bits(mask
->ipv4
));
144 } else if (token
->format
== LEX_F_IPV6
&& ipv6_is_cidr(&mask
->ipv6
)) {
145 ds_put_format(s
, "%d", ipv6_count_cidr_bits(&mask
->ipv6
));
147 lex_token_format_value(&token
->mask
, format
, s
);
151 /* Appends a string representation of 'token' to 's', in a format that can be
152 * losslessly parsed back by the lexer. (LEX_T_END and LEX_T_ERROR can't be
155 lex_token_format(const struct lex_token
*token
, struct ds
*s
)
157 switch (token
->type
) {
163 ds_put_cstr(s
, token
->s
);
167 ds_put_cstr(s
, "error(");
168 json_string_escape(token
->s
, s
);
173 json_string_escape(token
->s
, s
);
177 lex_token_format_value(&token
->value
, lex_token_get_format(token
), s
);
180 case LEX_T_MASKED_INTEGER
:
181 lex_token_format_masked_integer(token
, s
);
203 ds_put_cstr(s
, "==");
206 ds_put_cstr(s
, "!=");
212 ds_put_cstr(s
, "<=");
218 ds_put_cstr(s
, ">=");
224 ds_put_cstr(s
, "&&");
227 ds_put_cstr(s
, "||");
230 ds_put_cstr(s
, "..");
235 case LEX_T_SEMICOLON
:
247 /* lex_token_parse(). */
249 static void OVS_PRINTF_FORMAT(2, 3)
250 lex_error(struct lex_token
*token
, const char *message
, ...)
252 ovs_assert(!token
->s
);
253 token
->type
= LEX_T_ERROR
;
256 va_start(args
, message
);
257 token
->s
= xvasprintf(message
, args
);
262 lex_parse_hex_integer(const char *start
, size_t len
, struct lex_token
*token
)
264 const char *in
= start
+ (len
- 1);
265 uint8_t *out
= token
->value
.u8
+ (sizeof token
->value
.u8
- 1);
267 for (int i
= 0; i
< len
; i
++) {
268 int hexit
= hexit_value(in
[-i
]);
270 lex_error(token
, "Invalid syntax in hexadecimal constant.");
273 if (hexit
&& i
/ 2 >= sizeof token
->value
.u8
) {
274 lex_error(token
, "Hexadecimal constant requires more than "
275 "%"PRIuSIZE
" bits.", 8 * sizeof token
->value
.u8
);
278 out
[-(i
/ 2)] |= i
% 2 ? hexit
<< 4 : hexit
;
280 token
->format
= LEX_F_HEXADECIMAL
;
284 lex_parse_integer__(const char *p
, struct lex_token
*token
)
286 lex_token_init(token
);
287 token
->type
= LEX_T_INTEGER
;
288 memset(&token
->value
, 0, sizeof token
->value
);
289 const char *start
= p
;
290 const char *end
= start
;
291 while (isalnum((unsigned char) *end
) || *end
== ':'
292 || (*end
== '.' && end
[1] != '.')) {
295 size_t len
= end
- start
;
301 lex_error(token
, "Integer constant expected.");
303 && ovs_scan(start
, ETH_ADDR_SCAN_FMT
"%n",
304 ETH_ADDR_SCAN_ARGS(mac
), &n
)
306 token
->value
.mac
= mac
;
307 token
->format
= LEX_F_ETHERNET
;
308 } else if (start
+ strspn(start
, "0123456789") == end
) {
309 if (p
[0] == '0' && len
> 1) {
310 lex_error(token
, "Decimal constants must not have leading zeros.");
312 unsigned long long int integer
;
316 integer
= strtoull(p
, &tail
, 10);
317 if (tail
!= end
|| errno
== ERANGE
) {
318 lex_error(token
, "Decimal constants must be less than 2**64.");
320 token
->value
.integer
= htonll(integer
);
321 token
->format
= LEX_F_DECIMAL
;
324 } else if (p
[0] == '0' && (p
[1] == 'x' || p
[1] == 'X')) {
326 lex_parse_hex_integer(start
+ 2, len
- 2, token
);
328 lex_error(token
, "Hex digits expected following 0%c.", p
[1]);
330 } else if (len
< INET6_ADDRSTRLEN
) {
331 char copy
[INET6_ADDRSTRLEN
];
332 memcpy(copy
, p
, len
);
336 struct in6_addr ipv6
;
337 if (inet_pton(AF_INET
, copy
, &ipv4
) == 1) {
338 token
->value
.ipv4
= ipv4
.s_addr
;
339 token
->format
= LEX_F_IPV4
;
340 } else if (inet_pton(AF_INET6
, copy
, &ipv6
) == 1) {
341 token
->value
.ipv6
= ipv6
;
342 token
->format
= LEX_F_IPV6
;
344 lex_error(token
, "Invalid numeric constant.");
347 lex_error(token
, "Invalid numeric constant.");
350 ovs_assert(token
->type
== LEX_T_INTEGER
|| token
->type
== LEX_T_ERROR
);
355 lex_parse_mask(const char *p
, struct lex_token
*token
)
357 struct lex_token mask
;
359 /* Parse just past the '/' as a second integer. Handle errors. */
360 p
= lex_parse_integer__(p
+ 1, &mask
);
361 if (mask
.type
== LEX_T_ERROR
) {
362 lex_token_swap(&mask
, token
);
363 lex_token_destroy(&mask
);
366 ovs_assert(mask
.type
== LEX_T_INTEGER
);
368 /* Now convert the value and mask into a masked integer token.
369 * We have a few special cases. */
370 token
->type
= LEX_T_MASKED_INTEGER
;
371 memset(&token
->mask
, 0, sizeof token
->mask
);
372 uint32_t prefix_bits
= ntohll(mask
.value
.integer
);
373 if (token
->format
== mask
.format
) {
374 /* Same format value and mask is always OK. */
375 token
->mask
= mask
.value
;
376 } else if (token
->format
== LEX_F_IPV4
377 && mask
.format
== LEX_F_DECIMAL
378 && prefix_bits
<= 32) {
379 /* IPv4 address with decimal mask is a CIDR prefix. */
380 token
->mask
.integer
= htonll(ntohl(be32_prefix_mask(prefix_bits
)));
381 } else if (token
->format
== LEX_F_IPV6
382 && mask
.format
== LEX_F_DECIMAL
383 && prefix_bits
<= 128) {
384 /* IPv6 address with decimal mask is a CIDR prefix. */
385 token
->mask
.ipv6
= ipv6_create_mask(prefix_bits
);
386 } else if (token
->format
== LEX_F_DECIMAL
387 && mask
.format
== LEX_F_HEXADECIMAL
388 && token
->value
.integer
== 0) {
389 /* Special case for e.g. 0/0x1234. */
390 token
->format
= LEX_F_HEXADECIMAL
;
391 token
->mask
= mask
.value
;
393 lex_error(token
, "Value and mask have incompatible formats.");
397 /* Check invariant that a 1-bit in the value corresponds to a 1-bit in the
399 for (int i
= 0; i
< ARRAY_SIZE(token
->mask
.be32
); i
++) {
400 ovs_be32 v
= token
->value
.be32
[i
];
401 ovs_be32 m
= token
->mask
.be32
[i
];
404 lex_error(token
, "Value contains unmasked 1-bits.");
410 lex_token_destroy(&mask
);
415 lex_parse_integer(const char *p
, struct lex_token
*token
)
417 p
= lex_parse_integer__(p
, token
);
418 if (token
->type
== LEX_T_INTEGER
&& *p
== '/') {
419 p
= lex_parse_mask(p
, token
);
425 lex_parse_string(const char *p
, struct lex_token
*token
)
427 const char *start
= ++p
;
431 lex_error(token
, "Input ends inside quoted string.");
435 token
->type
= (json_string_unescape(start
, p
- start
, &token
->s
)
436 ? LEX_T_STRING
: LEX_T_ERROR
);
454 lex_is_id1(unsigned char c
)
456 return ((c
>= 'a' && c
<= 'z') || (c
>= 'A' && c
<= 'Z')
457 || c
== '_' || c
== '.');
461 lex_is_idn(unsigned char c
)
463 return lex_is_id1(c
) || (c
>= '0' && c
<= '9');
467 lex_parse_id(const char *p
, struct lex_token
*token
)
469 const char *start
= p
;
473 } while (lex_is_idn(*p
));
475 token
->type
= LEX_T_ID
;
476 token
->s
= xmemdup0(start
, p
- start
);
480 /* Initializes 'token' and parses the first token from the beginning of
481 * null-terminated string 'p' into 'token'. Stores a pointer to the start of
482 * the token (after skipping white space and comments, if any) into '*startp'.
483 * Returns the character position at which to begin parsing the next token. */
485 lex_token_parse(struct lex_token
*token
, const char *p
, const char **startp
)
487 lex_token_init(token
);
493 token
->type
= LEX_T_END
;
496 case ' ': case '\t': case '\n': case '\r':
505 } while (*p
!= '\0' && *p
!= '\n');
507 } else if (*p
== '*') {
510 if (*p
== '*' && p
[1] == '/') {
513 } else if (*p
== '\0' || *p
== '\n') {
514 lex_error(token
, "`/*' without matching `*/'.");
523 "`/' is only valid as part of `//' or `/*'.");
528 token
->type
= LEX_T_LPAREN
;
533 token
->type
= LEX_T_RPAREN
;
538 token
->type
= LEX_T_LCURLY
;
543 token
->type
= LEX_T_RCURLY
;
548 token
->type
= LEX_T_LSQUARE
;
553 token
->type
= LEX_T_RSQUARE
;
560 token
->type
= LEX_T_EQ
;
563 token
->type
= LEX_T_EQUALS
;
570 token
->type
= LEX_T_NE
;
573 token
->type
= LEX_T_LOG_NOT
;
580 token
->type
= LEX_T_LOG_AND
;
583 lex_error(token
, "`&' is only valid as part of `&&'.");
590 token
->type
= LEX_T_LOG_OR
;
593 lex_error(token
, "`|' is only valid as part of `||'.");
600 token
->type
= LEX_T_LE
;
603 token
->type
= LEX_T_LT
;
610 token
->type
= LEX_T_GE
;
613 token
->type
= LEX_T_GT
;
620 token
->type
= LEX_T_ELLIPSIS
;
623 lex_error(token
, "`.' is only valid as part of `..' or a number.");
629 token
->type
= LEX_T_COMMA
;
634 token
->type
= LEX_T_SEMICOLON
;
637 case '0': case '1': case '2': case '3': case '4':
638 case '5': case '6': case '7': case '8': case '9':
640 p
= lex_parse_integer(p
, token
);
644 p
= lex_parse_string(p
, token
);
647 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
648 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
649 /* We need to distinguish an Ethernet address or IPv6 address from an
650 * identifier. Fortunately, Ethernet addresses and IPv6 addresses that
651 * are ambiguous based on the first character, always start with hex
652 * digits followed by a colon, but identifiers never do. */
653 p
= (p
[strspn(p
, "0123456789abcdefABCDEF")] == ':'
654 ? lex_parse_integer(p
, token
)
655 : lex_parse_id(p
, token
));
659 if (lex_is_id1(*p
)) {
660 p
= lex_parse_id(p
, token
);
662 if (isprint((unsigned char) *p
)) {
663 lex_error(token
, "Invalid character `%c' in input.", *p
);
665 lex_error(token
, "Invalid byte 0x%d in input.", *p
);
675 /* Initializes 'lexer' for parsing 'input'.
677 * While the lexer is in use, 'input' must remain available, but the caller
678 * otherwise retains ownership of 'input'.
680 * The caller must call lexer_get() to obtain the first token. */
682 lexer_init(struct lexer
*lexer
, const char *input
)
684 lexer
->input
= input
;
686 lex_token_init(&lexer
->token
);
689 /* Frees storage associated with 'lexer'. */
691 lexer_destroy(struct lexer
*lexer
)
693 lex_token_destroy(&lexer
->token
);
696 /* Obtains the next token from 'lexer' into 'lexer->token', and returns the
697 * token's type. The caller may examine 'lexer->token' directly to obtain full
698 * information about the token. */
700 lexer_get(struct lexer
*lexer
)
702 lex_token_destroy(&lexer
->token
);
703 lexer
->input
= lex_token_parse(&lexer
->token
, lexer
->input
, &lexer
->start
);
704 return lexer
->token
.type
;
707 /* Returns the type of the next token that will be fetched by lexer_get(),
708 * without advancing 'lexer->token' to that token. */
710 lexer_lookahead(const struct lexer
*lexer
)
712 struct lex_token next
;
716 lex_token_parse(&next
, lexer
->input
, &start
);
718 lex_token_destroy(&next
);
722 /* If 'lexer''s current token has the given 'type', advances 'lexer' to the
723 * next token and returns true. Otherwise returns false. */
725 lexer_match(struct lexer
*lexer
, enum lex_type type
)
727 if (lexer
->token
.type
== type
) {
735 /* If 'lexer''s current token is the identifier given in 'id', advances 'lexer'
736 * to the next token and returns true. Otherwise returns false. */
738 lexer_match_id(struct lexer
*lexer
, const char *id
)
740 if (lexer
->token
.type
== LEX_T_ID
&& !strcmp(lexer
->token
.s
, id
)) {