]>
git.proxmox.com Git - ovs.git/blob - ovn/lib/lex.c
2 * Copyright (c) 2015, 2016, 2017 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
21 #include "openvswitch/dynamic-string.h"
22 #include "openvswitch/json.h"
27 /* Returns a string that represents 'format'. */
29 lex_format_to_string(enum lex_format format
)
34 case LEX_F_HEXADECIMAL
:
47 /* Initializes 'token'. */
49 lex_token_init(struct lex_token
*token
)
51 token
->type
= LEX_T_END
;
55 /* Frees memory owned by 'token'. */
57 lex_token_destroy(struct lex_token
*token
)
59 if (token
->s
!= token
->buffer
) {
65 /* Exchanges 'a' and 'b'. */
67 lex_token_swap(struct lex_token
*a
, struct lex_token
*b
)
69 struct lex_token tmp
= *a
;
73 /* Before swap, if 's' was pointed to 'buffer', its value shall be changed
74 * to point to the 'buffer' with the copied value. */
75 if (a
->s
== b
->buffer
) {
78 if (b
->s
== a
->buffer
) {
83 /* The string 's' need not be null-terminated at 'length'. */
85 lex_token_strcpy(struct lex_token
*token
, const char *s
, size_t length
)
87 lex_token_destroy(token
);
88 token
->s
= (length
+ 1 <= sizeof token
->buffer
90 : xmalloc(length
+ 1));
91 memcpy(token
->s
, s
, length
);
92 token
->s
[length
] = '\0';
96 lex_token_strset(struct lex_token
*token
, char *s
)
98 lex_token_destroy(token
);
103 lex_token_vsprintf(struct lex_token
*token
, const char *format
, va_list args
)
105 lex_token_destroy(token
);
108 va_copy(args2
, args
);
109 token
->s
= (vsnprintf(token
->buffer
, sizeof token
->buffer
, format
, args
)
110 < sizeof token
->buffer
112 : xvasprintf(format
, args2
));
116 /* lex_token_format(). */
119 lex_token_n_zeros(enum lex_format format
)
122 case LEX_F_DECIMAL
: return offsetof(union mf_subvalue
, integer
);
123 case LEX_F_HEXADECIMAL
: return 0;
124 case LEX_F_IPV4
: return offsetof(union mf_subvalue
, ipv4
);
125 case LEX_F_IPV6
: return offsetof(union mf_subvalue
, ipv6
);
126 case LEX_F_ETHERNET
: return offsetof(union mf_subvalue
, mac
);
127 default: OVS_NOT_REACHED();
131 /* Returns the effective format for 'token', that is, the format in which it
132 * should actually be printed. This is ordinarily the same as 'token->format',
133 * but it's always possible that someone sets up a token with a format that
134 * won't work for a value, e.g. 'token->value' is wider than 32 bits but the
135 * format is LEX_F_IPV4. (The lexer itself won't do that; this is an attempt
136 * to avoid confusion in the future.) */
137 static enum lex_format
138 lex_token_get_format(const struct lex_token
*token
)
140 size_t n_zeros
= lex_token_n_zeros(token
->format
);
141 return (is_all_zeros(&token
->value
, n_zeros
)
142 && (token
->type
!= LEX_T_MASKED_INTEGER
143 || is_all_zeros(&token
->mask
, n_zeros
))
145 : LEX_F_HEXADECIMAL
);
149 lex_token_format_value(const union mf_subvalue
*value
,
150 enum lex_format format
, struct ds
*s
)
154 ds_put_format(s
, "%"PRIu64
, ntohll(value
->integer
));
157 case LEX_F_HEXADECIMAL
:
158 mf_format_subvalue(value
, s
);
162 ds_put_format(s
, IP_FMT
, IP_ARGS(value
->ipv4
));
166 ipv6_format_addr(&value
->ipv6
, s
);
170 ds_put_format(s
, ETH_ADDR_FMT
, ETH_ADDR_ARGS(value
->mac
));
180 lex_token_format_masked_integer(const struct lex_token
*token
, struct ds
*s
)
182 enum lex_format format
= lex_token_get_format(token
);
184 lex_token_format_value(&token
->value
, format
, s
);
187 const union mf_subvalue
*mask
= &token
->mask
;
188 if (format
== LEX_F_IPV4
&& ip_is_cidr(mask
->ipv4
)) {
189 ds_put_format(s
, "%d", ip_count_cidr_bits(mask
->ipv4
));
190 } else if (token
->format
== LEX_F_IPV6
&& ipv6_is_cidr(&mask
->ipv6
)) {
191 ds_put_format(s
, "%d", ipv6_count_cidr_bits(&mask
->ipv6
));
193 lex_token_format_value(&token
->mask
, format
, s
);
197 /* Appends a string representation of 'token' to 's', in a format that can be
198 * losslessly parsed back by the lexer. (LEX_T_END and LEX_T_ERROR can't be
201 lex_token_format(const struct lex_token
*token
, struct ds
*s
)
203 switch (token
->type
) {
209 ds_put_cstr(s
, token
->s
);
213 ds_put_cstr(s
, "error(");
214 json_string_escape(token
->s
, s
);
219 json_string_escape(token
->s
, s
);
223 lex_token_format_value(&token
->value
, lex_token_get_format(token
), s
);
226 case LEX_T_MASKED_INTEGER
:
227 lex_token_format_masked_integer(token
, s
);
231 ds_put_format(s
, "$%s", token
->s
);
253 ds_put_cstr(s
, "==");
256 ds_put_cstr(s
, "!=");
262 ds_put_cstr(s
, "<=");
268 ds_put_cstr(s
, ">=");
274 ds_put_cstr(s
, "&&");
277 ds_put_cstr(s
, "||");
280 ds_put_cstr(s
, "..");
285 case LEX_T_SEMICOLON
:
292 ds_put_cstr(s
, "<->");
294 case LEX_T_DECREMENT
:
295 ds_put_cstr(s
, "--");
306 /* lex_token_parse(). */
308 static void OVS_PRINTF_FORMAT(2, 3)
309 lex_error(struct lex_token
*token
, const char *message
, ...)
311 ovs_assert(!token
->s
);
312 token
->type
= LEX_T_ERROR
;
315 va_start(args
, message
);
316 lex_token_vsprintf(token
, message
, args
);
321 lex_parse_hex_integer(const char *start
, size_t len
, struct lex_token
*token
)
323 const char *in
= start
+ (len
- 1);
324 uint8_t *out
= token
->value
.u8
+ (sizeof token
->value
.u8
- 1);
326 for (int i
= 0; i
< len
; i
++) {
327 int hexit
= hexit_value(in
[-i
]);
329 lex_error(token
, "Invalid syntax in hexadecimal constant.");
332 if (hexit
&& i
/ 2 >= sizeof token
->value
.u8
) {
333 lex_error(token
, "Hexadecimal constant requires more than "
334 "%"PRIuSIZE
" bits.", 8 * sizeof token
->value
.u8
);
337 out
[-(i
/ 2)] |= i
% 2 ? hexit
<< 4 : hexit
;
339 token
->format
= LEX_F_HEXADECIMAL
;
343 lex_parse_integer__(const char *p
, struct lex_token
*token
)
345 lex_token_init(token
);
346 token
->type
= LEX_T_INTEGER
;
347 memset(&token
->value
, 0, sizeof token
->value
);
349 /* Find the extent of an "integer" token, which can be in decimal or
350 * hexadecimal, or an Ethernet address or IPv4 or IPv6 address, as 'start'
353 * Special cases we handle here are:
355 * - The ellipsis token "..", used as e.g. 123..456. A doubled dot
356 * is never valid syntax as part of an "integer", so we stop if
357 * we encounter two dots in a row.
359 * - Syntax like 1.2.3.4:1234 to indicate an IPv4 address followed by a
360 * port number should be considered three tokens: 1.2.3.4 : 1234.
361 * The obvious approach is to allow just dots or just colons within a
362 * given integer, but that would disallow IPv4-mapped IPv6 addresses,
363 * e.g. ::ffff:192.0.2.128. However, even in those addresses, a
364 * colon never follows a dot, so we stop if we encounter a colon
367 * (There is no corresponding way to parse an IPv6 address followed
368 * by a port number: ::1:2:3:4:1234 is unavoidably ambiguous.)
370 const char *start
= p
;
371 const char *end
= start
;
372 bool saw_dot
= false;
373 while (isalnum((unsigned char) *end
)
374 || (*end
== ':' && !saw_dot
)
375 || (*end
== '.' && end
[1] != '.')) {
381 size_t len
= end
- start
;
387 lex_error(token
, "Integer constant expected.");
389 && ovs_scan(start
, ETH_ADDR_SCAN_FMT
"%n",
390 ETH_ADDR_SCAN_ARGS(mac
), &n
)
392 token
->value
.mac
= mac
;
393 token
->format
= LEX_F_ETHERNET
;
394 } else if (start
+ strspn(start
, "0123456789") == end
) {
395 if (p
[0] == '0' && len
> 1) {
396 lex_error(token
, "Decimal constants must not have leading zeros.");
398 unsigned long long int integer
;
402 integer
= strtoull(p
, &tail
, 10);
403 if (tail
!= end
|| errno
== ERANGE
) {
404 lex_error(token
, "Decimal constants must be less than 2**64.");
406 token
->value
.integer
= htonll(integer
);
407 token
->format
= LEX_F_DECIMAL
;
410 } else if (p
[0] == '0' && (p
[1] == 'x' || p
[1] == 'X')) {
412 lex_parse_hex_integer(start
+ 2, len
- 2, token
);
414 lex_error(token
, "Hex digits expected following 0%c.", p
[1]);
416 } else if (len
< INET6_ADDRSTRLEN
) {
417 char copy
[INET6_ADDRSTRLEN
];
418 memcpy(copy
, p
, len
);
421 if (ip_parse(copy
, &token
->value
.ipv4
)) {
422 token
->format
= LEX_F_IPV4
;
423 } else if (ipv6_parse(copy
, &token
->value
.ipv6
)) {
424 token
->format
= LEX_F_IPV6
;
426 lex_error(token
, "Invalid numeric constant.");
429 lex_error(token
, "Invalid numeric constant.");
432 ovs_assert(token
->type
== LEX_T_INTEGER
|| token
->type
== LEX_T_ERROR
);
437 lex_parse_mask(const char *p
, struct lex_token
*token
)
439 struct lex_token mask
;
441 /* Parse just past the '/' as a second integer. Handle errors. */
442 p
= lex_parse_integer__(p
+ 1, &mask
);
443 if (mask
.type
== LEX_T_ERROR
) {
444 lex_token_swap(&mask
, token
);
445 lex_token_destroy(&mask
);
448 ovs_assert(mask
.type
== LEX_T_INTEGER
);
450 /* Now convert the value and mask into a masked integer token.
451 * We have a few special cases. */
452 token
->type
= LEX_T_MASKED_INTEGER
;
453 memset(&token
->mask
, 0, sizeof token
->mask
);
454 uint32_t prefix_bits
= ntohll(mask
.value
.integer
);
455 if (token
->format
== mask
.format
) {
456 /* Same format value and mask is always OK. */
457 token
->mask
= mask
.value
;
458 } else if (token
->format
== LEX_F_IPV4
459 && mask
.format
== LEX_F_DECIMAL
460 && prefix_bits
<= 32) {
461 /* IPv4 address with decimal mask is a CIDR prefix. */
462 token
->mask
.integer
= htonll(ntohl(be32_prefix_mask(prefix_bits
)));
463 } else if (token
->format
== LEX_F_IPV6
464 && mask
.format
== LEX_F_DECIMAL
465 && prefix_bits
<= 128) {
466 /* IPv6 address with decimal mask is a CIDR prefix. */
467 token
->mask
.ipv6
= ipv6_create_mask(prefix_bits
);
468 } else if (token
->format
== LEX_F_DECIMAL
469 && mask
.format
== LEX_F_HEXADECIMAL
470 && token
->value
.integer
== 0) {
471 /* Special case for e.g. 0/0x1234. */
472 token
->format
= LEX_F_HEXADECIMAL
;
473 token
->mask
= mask
.value
;
475 lex_error(token
, "Value and mask have incompatible formats.");
479 /* Check invariant that a 1-bit in the value corresponds to a 1-bit in the
481 for (int i
= 0; i
< ARRAY_SIZE(token
->mask
.be32
); i
++) {
482 ovs_be32 v
= token
->value
.be32
[i
];
483 ovs_be32 m
= token
->mask
.be32
[i
];
486 lex_error(token
, "Value contains unmasked 1-bits.");
492 lex_token_destroy(&mask
);
497 lex_parse_integer(const char *p
, struct lex_token
*token
)
499 p
= lex_parse_integer__(p
, token
);
500 if (token
->type
== LEX_T_INTEGER
&& *p
== '/') {
501 p
= lex_parse_mask(p
, token
);
507 lex_parse_string(const char *p
, struct lex_token
*token
)
509 const char *start
= ++p
;
514 lex_error(token
, "Input ends inside quoted string.");
518 token
->type
= (json_string_unescape(start
, p
- start
, &s
)
519 ? LEX_T_STRING
: LEX_T_ERROR
);
520 lex_token_strset(token
, s
);
538 lex_is_id1(unsigned char c
)
540 return ((c
>= 'a' && c
<= 'z') || (c
>= 'A' && c
<= 'Z')
541 || c
== '_' || c
== '.');
545 lex_is_idn(unsigned char c
)
547 return lex_is_id1(c
) || (c
>= '0' && c
<= '9');
551 lex_parse_id(const char *p
, enum lex_type type
, struct lex_token
*token
)
553 const char *start
= p
;
557 } while (lex_is_idn(*p
));
560 lex_token_strcpy(token
, start
, p
- start
);
565 lex_parse_addr_set(const char *p
, struct lex_token
*token
)
568 if (!lex_is_id1(*p
)) {
569 lex_error(token
, "`$' must be followed by a valid identifier.");
573 return lex_parse_id(p
, LEX_T_MACRO
, token
);
576 /* Initializes 'token' and parses the first token from the beginning of
577 * null-terminated string 'p' into 'token'. Stores a pointer to the start of
578 * the token (after skipping white space and comments, if any) into '*startp'.
579 * Returns the character position at which to begin parsing the next token. */
581 lex_token_parse(struct lex_token
*token
, const char *p
, const char **startp
)
583 lex_token_init(token
);
589 token
->type
= LEX_T_END
;
592 case ' ': case '\t': case '\n': case '\r': case '\v': case '\f':
601 } while (*p
!= '\0' && *p
!= '\n');
603 } else if (*p
== '*') {
606 if (*p
== '*' && p
[1] == '/') {
609 } else if (*p
== '\0' || *p
== '\n') {
610 lex_error(token
, "`/*' without matching `*/'.");
619 "`/' is only valid as part of `//' or `/*'.");
624 token
->type
= LEX_T_LPAREN
;
629 token
->type
= LEX_T_RPAREN
;
634 token
->type
= LEX_T_LCURLY
;
639 token
->type
= LEX_T_RCURLY
;
644 token
->type
= LEX_T_LSQUARE
;
649 token
->type
= LEX_T_RSQUARE
;
656 token
->type
= LEX_T_EQ
;
659 token
->type
= LEX_T_EQUALS
;
666 token
->type
= LEX_T_NE
;
669 token
->type
= LEX_T_LOG_NOT
;
676 token
->type
= LEX_T_LOG_AND
;
679 lex_error(token
, "`&' is only valid as part of `&&'.");
686 token
->type
= LEX_T_LOG_OR
;
689 lex_error(token
, "`|' is only valid as part of `||'.");
696 token
->type
= LEX_T_LE
;
698 } else if (*p
== '-' && p
[1] == '>') {
699 token
->type
= LEX_T_EXCHANGE
;
702 token
->type
= LEX_T_LT
;
709 token
->type
= LEX_T_GE
;
712 token
->type
= LEX_T_GT
;
719 token
->type
= LEX_T_ELLIPSIS
;
722 lex_error(token
, "`.' is only valid as part of `..' or a number.");
728 token
->type
= LEX_T_COMMA
;
733 token
->type
= LEX_T_SEMICOLON
;
739 token
->type
= LEX_T_DECREMENT
;
742 lex_error(token
, "`-' is only valid as part of `--'.");
747 p
= lex_parse_addr_set(p
, token
);
752 token
->type
= LEX_T_COLON
;
756 /* IPv6 address beginning with "::". */
758 case '0': case '1': case '2': case '3': case '4':
759 case '5': case '6': case '7': case '8': case '9':
760 p
= lex_parse_integer(p
, token
);
764 p
= lex_parse_string(p
, token
);
767 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
768 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
769 /* We need to distinguish an Ethernet address or IPv6 address from an
770 * identifier. Fortunately, Ethernet addresses and IPv6 addresses that
771 * are ambiguous based on the first character, always start with hex
772 * digits followed by a colon, but identifiers never do. */
773 p
= (p
[strspn(p
, "0123456789abcdefABCDEF")] == ':'
774 ? lex_parse_integer(p
, token
)
775 : lex_parse_id(p
, LEX_T_ID
, token
));
779 if (lex_is_id1(*p
)) {
780 p
= lex_parse_id(p
, LEX_T_ID
, token
);
782 if (isprint((unsigned char) *p
)) {
783 lex_error(token
, "Invalid character `%c' in input.", *p
);
785 lex_error(token
, "Invalid byte 0x%d in input.", *p
);
795 /* Initializes 'lexer' for parsing 'input'.
797 * While the lexer is in use, 'input' must remain available, but the caller
798 * otherwise retains ownership of 'input'.
800 * The caller must call lexer_get() to obtain the first token. */
802 lexer_init(struct lexer
*lexer
, const char *input
)
804 lexer
->input
= input
;
806 lex_token_init(&lexer
->token
);
810 /* Frees storage associated with 'lexer'. */
812 lexer_destroy(struct lexer
*lexer
)
814 lex_token_destroy(&lexer
->token
);
818 /* Obtains the next token from 'lexer' into 'lexer->token', and returns the
819 * token's type. The caller may examine 'lexer->token' directly to obtain full
820 * information about the token. */
822 lexer_get(struct lexer
*lexer
)
824 lex_token_destroy(&lexer
->token
);
825 lexer
->input
= lex_token_parse(&lexer
->token
, lexer
->input
, &lexer
->start
);
826 return lexer
->token
.type
;
829 /* Returns the type of the next token that will be fetched by lexer_get(),
830 * without advancing 'lexer->token' to that token. */
832 lexer_lookahead(const struct lexer
*lexer
)
834 struct lex_token next
;
838 lex_token_parse(&next
, lexer
->input
, &start
);
840 lex_token_destroy(&next
);
844 /* If 'lexer''s current token has the given 'type', advances 'lexer' to the
845 * next token and returns true. Otherwise returns false. */
847 lexer_match(struct lexer
*lexer
, enum lex_type type
)
849 if (lexer
->token
.type
== type
) {
858 lexer_force_match(struct lexer
*lexer
, enum lex_type t
)
860 if (t
== LEX_T_END
) {
861 return lexer_force_end(lexer
);
862 } else if (lexer_match(lexer
, t
)) {
865 struct lex_token token
= { .type
= t
};
866 struct ds s
= DS_EMPTY_INITIALIZER
;
867 lex_token_format(&token
, &s
);
869 lexer_syntax_error(lexer
, "expecting `%s'", ds_cstr(&s
));
877 /* If 'lexer''s current token is the identifier given in 'id', advances 'lexer'
878 * to the next token and returns true. Otherwise returns false. */
880 lexer_match_id(struct lexer
*lexer
, const char *id
)
882 if (lexer
->token
.type
== LEX_T_ID
&& !strcmp(lexer
->token
.s
, id
)) {
891 lexer_is_int(const struct lexer
*lexer
)
893 return (lexer
->token
.type
== LEX_T_INTEGER
894 && lexer
->token
.format
== LEX_F_DECIMAL
895 && ntohll(lexer
->token
.value
.integer
) <= INT_MAX
);
899 lexer_get_int(struct lexer
*lexer
, int *value
)
901 if (lexer_is_int(lexer
)) {
902 *value
= ntohll(lexer
->token
.value
.integer
);
912 lexer_force_int(struct lexer
*lexer
, int *value
)
914 bool ok
= lexer_get_int(lexer
, value
);
916 lexer_syntax_error(lexer
, "expecting small integer");
922 lexer_force_end(struct lexer
*lexer
)
924 if (lexer
->token
.type
== LEX_T_END
) {
927 lexer_syntax_error(lexer
, "expecting end of input");
933 lexer_error_handle_common(struct lexer
*lexer
)
936 /* Already have an error, suppress this one since the cascade seems
937 * unlikely to be useful. */
939 } else if (lexer
->token
.type
== LEX_T_ERROR
) {
940 /* The lexer signaled an error. Nothing at a higher level accepts an
941 * error token, so we'll inevitably end up here with some meaningless
942 * parse error. Report the lexical error instead. */
943 lexer
->error
= xstrdup(lexer
->token
.s
);
950 void OVS_PRINTF_FORMAT(2, 3)
951 lexer_error(struct lexer
*lexer
, const char *message
, ...)
953 if (lexer_error_handle_common(lexer
)) {
958 va_start(args
, message
);
959 lexer
->error
= xvasprintf(message
, args
);
963 void OVS_PRINTF_FORMAT(2, 3)
964 lexer_syntax_error(struct lexer
*lexer
, const char *message
, ...)
966 if (lexer_error_handle_common(lexer
)) {
973 ds_put_cstr(&s
, "Syntax error");
974 if (lexer
->token
.type
== LEX_T_END
) {
975 ds_put_cstr(&s
, " at end of input");
976 } else if (lexer
->start
) {
977 ds_put_format(&s
, " at `%.*s'",
978 (int) (lexer
->input
- lexer
->start
),
983 ds_put_char(&s
, ' ');
986 va_start(args
, message
);
987 ds_put_format_valist(&s
, message
, args
);
990 ds_put_char(&s
, '.');
992 lexer
->error
= ds_steal_cstr(&s
);
996 lexer_steal_error(struct lexer
*lexer
)
998 char *error
= lexer
->error
;