]> git.proxmox.com Git - ovs.git/blame - ovn/lib/lex.c
Use ip_parse() and ipv6_parse() and variants in more places.
[ovs.git] / ovn / lib / lex.c
CommitLineData
10b1662b
BP
1/*
2 * Copyright (c) 2015 Nicira, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <config.h>
18#include "lex.h"
19#include <ctype.h>
20#include <errno.h>
21#include <stdarg.h>
22#include "dynamic-string.h"
23#include "json.h"
e7695092 24#include "packets.h"
10b1662b 25#include "util.h"
363b5330
BP
26\f
27/* Returns a string that represents 'format'. */
28const char *
29lex_format_to_string(enum lex_format format)
30{
31 switch (format) {
32 case LEX_F_DECIMAL:
33 return "decimal";
34 case LEX_F_HEXADECIMAL:
35 return "hexadecimal";
36 case LEX_F_IPV4:
37 return "IPv4";
38 case LEX_F_IPV6:
39 return "IPv6";
40 case LEX_F_ETHERNET:
41 return "Ethernet";
42 default:
43 abort();
44 }
45}
46\f
10b1662b
BP
47/* Initializes 'token'. */
48void
49lex_token_init(struct lex_token *token)
50{
51 token->type = LEX_T_END;
52 token->s = NULL;
53}
54
55/* Frees memory owned by 'token'. */
56void
57lex_token_destroy(struct lex_token *token)
58{
59 free(token->s);
60}
61
62/* Exchanges 'a' and 'b'. */
63void
64lex_token_swap(struct lex_token *a, struct lex_token *b)
65{
66 struct lex_token tmp = *a;
67 *a = *b;
68 *b = tmp;
69}
70\f
71/* lex_token_format(). */
72
73static size_t
74lex_token_n_zeros(enum lex_format format)
75{
76 switch (format) {
77 case LEX_F_DECIMAL: return offsetof(union mf_subvalue, integer);
78 case LEX_F_HEXADECIMAL: return 0;
79 case LEX_F_IPV4: return offsetof(union mf_subvalue, ipv4);
80 case LEX_F_IPV6: return offsetof(union mf_subvalue, ipv6);
81 case LEX_F_ETHERNET: return offsetof(union mf_subvalue, mac);
82 default: OVS_NOT_REACHED();
83 }
84}
85
86/* Returns the effective format for 'token', that is, the format in which it
87 * should actually be printed. This is ordinarily the same as 'token->format',
88 * but it's always possible that someone sets up a token with a format that
89 * won't work for a value, e.g. 'token->value' is wider than 32 bits but the
90 * format is LEX_F_IPV4. (The lexer itself won't do that; this is an attempt
91 * to avoid confusion in the future.) */
92static enum lex_format
93lex_token_get_format(const struct lex_token *token)
94{
95 size_t n_zeros = lex_token_n_zeros(token->format);
96 return (is_all_zeros(&token->value, n_zeros)
97 && (token->type != LEX_T_MASKED_INTEGER
98 || is_all_zeros(&token->mask, n_zeros))
99 ? token->format
100 : LEX_F_HEXADECIMAL);
101}
102
103static void
104lex_token_format_value(const union mf_subvalue *value,
105 enum lex_format format, struct ds *s)
106{
107 switch (format) {
108 case LEX_F_DECIMAL:
109 ds_put_format(s, "%"PRIu64, ntohll(value->integer));
110 break;
111
112 case LEX_F_HEXADECIMAL:
113 mf_format_subvalue(value, s);
114 break;
115
116 case LEX_F_IPV4:
117 ds_put_format(s, IP_FMT, IP_ARGS(value->ipv4));
118 break;
119
120 case LEX_F_IPV6:
ac6d120f 121 ipv6_format_addr(&value->ipv6, s);
10b1662b
BP
122 break;
123
124 case LEX_F_ETHERNET:
125 ds_put_format(s, ETH_ADDR_FMT, ETH_ADDR_ARGS(value->mac));
126 break;
127
128 default:
129 OVS_NOT_REACHED();
130 }
131
132}
133
134static void
135lex_token_format_masked_integer(const struct lex_token *token, struct ds *s)
136{
137 enum lex_format format = lex_token_get_format(token);
138
139 lex_token_format_value(&token->value, format, s);
140 ds_put_char(s, '/');
141
142 const union mf_subvalue *mask = &token->mask;
143 if (format == LEX_F_IPV4 && ip_is_cidr(mask->ipv4)) {
144 ds_put_format(s, "%d", ip_count_cidr_bits(mask->ipv4));
145 } else if (token->format == LEX_F_IPV6 && ipv6_is_cidr(&mask->ipv6)) {
146 ds_put_format(s, "%d", ipv6_count_cidr_bits(&mask->ipv6));
147 } else {
148 lex_token_format_value(&token->mask, format, s);
149 }
150}
151
10b1662b
BP
152/* Appends a string representation of 'token' to 's', in a format that can be
153 * losslessly parsed back by the lexer. (LEX_T_END and LEX_T_ERROR can't be
154 * parsed back.) */
155void
3d611299 156lex_token_format(const struct lex_token *token, struct ds *s)
10b1662b
BP
157{
158 switch (token->type) {
159 case LEX_T_END:
160 ds_put_cstr(s, "$");
161 break;
162
163 case LEX_T_ID:
164 ds_put_cstr(s, token->s);
165 break;
166
167 case LEX_T_ERROR:
168 ds_put_cstr(s, "error(");
3b626771 169 json_string_escape(token->s, s);
10b1662b
BP
170 ds_put_char(s, ')');
171 break;
172
173 case LEX_T_STRING:
3b626771 174 json_string_escape(token->s, s);
10b1662b
BP
175 break;
176
177 case LEX_T_INTEGER:
178 lex_token_format_value(&token->value, lex_token_get_format(token), s);
179 break;
180
181 case LEX_T_MASKED_INTEGER:
182 lex_token_format_masked_integer(token, s);
183 break;
184
185 case LEX_T_LPAREN:
186 ds_put_cstr(s, "(");
187 break;
188 case LEX_T_RPAREN:
189 ds_put_cstr(s, ")");
190 break;
191 case LEX_T_LCURLY:
192 ds_put_cstr(s, "{");
193 break;
194 case LEX_T_RCURLY:
195 ds_put_cstr(s, "}");
196 break;
197 case LEX_T_LSQUARE:
198 ds_put_cstr(s, "[");
199 break;
200 case LEX_T_RSQUARE:
201 ds_put_cstr(s, "]");
202 break;
203 case LEX_T_EQ:
204 ds_put_cstr(s, "==");
205 break;
206 case LEX_T_NE:
207 ds_put_cstr(s, "!=");
208 break;
209 case LEX_T_LT:
210 ds_put_cstr(s, "<");
211 break;
212 case LEX_T_LE:
213 ds_put_cstr(s, "<=");
214 break;
215 case LEX_T_GT:
216 ds_put_cstr(s, ">");
217 break;
218 case LEX_T_GE:
219 ds_put_cstr(s, ">=");
220 break;
221 case LEX_T_LOG_NOT:
222 ds_put_cstr(s, "!");
223 break;
224 case LEX_T_LOG_AND:
225 ds_put_cstr(s, "&&");
226 break;
227 case LEX_T_LOG_OR:
228 ds_put_cstr(s, "||");
229 break;
230 case LEX_T_ELLIPSIS:
231 ds_put_cstr(s, "..");
232 break;
233 case LEX_T_COMMA:
234 ds_put_cstr(s, ",");
235 break;
236 case LEX_T_SEMICOLON:
237 ds_put_cstr(s, ";");
238 break;
239 case LEX_T_EQUALS:
240 ds_put_cstr(s, "=");
241 break;
a20c96c6
BP
242 case LEX_T_EXCHANGE:
243 ds_put_cstr(s, "<->");
244 break;
56091efe
BP
245 case LEX_T_DECREMENT:
246 ds_put_cstr(s, "--");
247 break;
10b1662b
BP
248 default:
249 OVS_NOT_REACHED();
250 }
251
252}
253\f
254/* lex_token_parse(). */
255
256static void OVS_PRINTF_FORMAT(2, 3)
257lex_error(struct lex_token *token, const char *message, ...)
258{
259 ovs_assert(!token->s);
260 token->type = LEX_T_ERROR;
261
262 va_list args;
263 va_start(args, message);
264 token->s = xvasprintf(message, args);
265 va_end(args);
266}
267
268static void
269lex_parse_hex_integer(const char *start, size_t len, struct lex_token *token)
270{
271 const char *in = start + (len - 1);
272 uint8_t *out = token->value.u8 + (sizeof token->value.u8 - 1);
273
274 for (int i = 0; i < len; i++) {
275 int hexit = hexit_value(in[-i]);
276 if (hexit < 0) {
277 lex_error(token, "Invalid syntax in hexadecimal constant.");
278 return;
279 }
280 if (hexit && i / 2 >= sizeof token->value.u8) {
281 lex_error(token, "Hexadecimal constant requires more than "
282 "%"PRIuSIZE" bits.", 8 * sizeof token->value.u8);
283 return;
284 }
285 out[-(i / 2)] |= i % 2 ? hexit << 4 : hexit;
286 }
287 token->format = LEX_F_HEXADECIMAL;
288}
289
290static const char *
291lex_parse_integer__(const char *p, struct lex_token *token)
292{
293 lex_token_init(token);
294 token->type = LEX_T_INTEGER;
295 memset(&token->value, 0, sizeof token->value);
296 const char *start = p;
297 const char *end = start;
298 while (isalnum((unsigned char) *end) || *end == ':'
299 || (*end == '.' && end[1] != '.')) {
300 end++;
301 }
302 size_t len = end - start;
303
304 int n;
74ff3298 305 struct eth_addr mac;
10b1662b
BP
306
307 if (!len) {
308 lex_error(token, "Integer constant expected.");
309 } else if (len == 17
310 && ovs_scan(start, ETH_ADDR_SCAN_FMT"%n",
311 ETH_ADDR_SCAN_ARGS(mac), &n)
312 && n == len) {
74ff3298 313 token->value.mac = mac;
10b1662b
BP
314 token->format = LEX_F_ETHERNET;
315 } else if (start + strspn(start, "0123456789") == end) {
316 if (p[0] == '0' && len > 1) {
317 lex_error(token, "Decimal constants must not have leading zeros.");
318 } else {
319 unsigned long long int integer;
320 char *tail;
321
322 errno = 0;
323 integer = strtoull(p, &tail, 10);
324 if (tail != end || errno == ERANGE) {
325 lex_error(token, "Decimal constants must be less than 2**64.");
326 } else {
327 token->value.integer = htonll(integer);
328 token->format = LEX_F_DECIMAL;
329 }
330 }
331 } else if (p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
332 if (len > 2) {
333 lex_parse_hex_integer(start + 2, len - 2, token);
334 } else {
335 lex_error(token, "Hex digits expected following 0%c.", p[1]);
336 }
337 } else if (len < INET6_ADDRSTRLEN) {
338 char copy[INET6_ADDRSTRLEN];
339 memcpy(copy, p, len);
340 copy[len] = '\0';
341
e7695092 342 if (ip_parse(copy, &token->value.ipv4)) {
10b1662b 343 token->format = LEX_F_IPV4;
e7695092 344 } else if (ipv6_parse(copy, &token->value.ipv6)) {
10b1662b
BP
345 token->format = LEX_F_IPV6;
346 } else {
347 lex_error(token, "Invalid numeric constant.");
348 }
349 } else {
350 lex_error(token, "Invalid numeric constant.");
351 }
352
353 ovs_assert(token->type == LEX_T_INTEGER || token->type == LEX_T_ERROR);
354 return end;
355}
356
357static const char *
358lex_parse_mask(const char *p, struct lex_token *token)
359{
360 struct lex_token mask;
361
362 /* Parse just past the '/' as a second integer. Handle errors. */
363 p = lex_parse_integer__(p + 1, &mask);
364 if (mask.type == LEX_T_ERROR) {
365 lex_token_swap(&mask, token);
366 lex_token_destroy(&mask);
367 return p;
368 }
369 ovs_assert(mask.type == LEX_T_INTEGER);
370
371 /* Now convert the value and mask into a masked integer token.
372 * We have a few special cases. */
373 token->type = LEX_T_MASKED_INTEGER;
374 memset(&token->mask, 0, sizeof token->mask);
375 uint32_t prefix_bits = ntohll(mask.value.integer);
376 if (token->format == mask.format) {
377 /* Same format value and mask is always OK. */
378 token->mask = mask.value;
379 } else if (token->format == LEX_F_IPV4
380 && mask.format == LEX_F_DECIMAL
381 && prefix_bits <= 32) {
382 /* IPv4 address with decimal mask is a CIDR prefix. */
383 token->mask.integer = htonll(ntohl(be32_prefix_mask(prefix_bits)));
384 } else if (token->format == LEX_F_IPV6
385 && mask.format == LEX_F_DECIMAL
386 && prefix_bits <= 128) {
387 /* IPv6 address with decimal mask is a CIDR prefix. */
388 token->mask.ipv6 = ipv6_create_mask(prefix_bits);
389 } else if (token->format == LEX_F_DECIMAL
390 && mask.format == LEX_F_HEXADECIMAL
391 && token->value.integer == 0) {
392 /* Special case for e.g. 0/0x1234. */
393 token->format = LEX_F_HEXADECIMAL;
394 token->mask = mask.value;
395 } else {
396 lex_error(token, "Value and mask have incompatible formats.");
397 return p;
398 }
399
400 /* Check invariant that a 1-bit in the value corresponds to a 1-bit in the
401 * mask. */
402 for (int i = 0; i < ARRAY_SIZE(token->mask.be32); i++) {
403 ovs_be32 v = token->value.be32[i];
404 ovs_be32 m = token->mask.be32[i];
405
406 if (v & ~m) {
407 lex_error(token, "Value contains unmasked 1-bits.");
408 break;
409 }
410 }
411
412 /* Done! */
413 lex_token_destroy(&mask);
414 return p;
415}
416
417static const char *
418lex_parse_integer(const char *p, struct lex_token *token)
419{
420 p = lex_parse_integer__(p, token);
421 if (token->type == LEX_T_INTEGER && *p == '/') {
422 p = lex_parse_mask(p, token);
423 }
424 return p;
425}
426
427static const char *
428lex_parse_string(const char *p, struct lex_token *token)
429{
430 const char *start = ++p;
431 for (;;) {
432 switch (*p) {
433 case '\0':
434 lex_error(token, "Input ends inside quoted string.");
435 return p;
436
437 case '"':
438 token->type = (json_string_unescape(start, p - start, &token->s)
439 ? LEX_T_STRING : LEX_T_ERROR);
440 return p + 1;
441
442 case '\\':
443 p++;
444 if (*p) {
445 p++;
446 }
447 break;
448
449 default:
450 p++;
451 break;
452 }
453 }
454}
455
456static bool
457lex_is_id1(unsigned char c)
458{
459 return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
460 || c == '_' || c == '.');
461}
462
463static bool
464lex_is_idn(unsigned char c)
465{
466 return lex_is_id1(c) || (c >= '0' && c <= '9');
467}
468
469static const char *
470lex_parse_id(const char *p, struct lex_token *token)
471{
472 const char *start = p;
473
474 do {
475 p++;
476 } while (lex_is_idn(*p));
477
478 token->type = LEX_T_ID;
479 token->s = xmemdup0(start, p - start);
480 return p;
481}
482
483/* Initializes 'token' and parses the first token from the beginning of
484 * null-terminated string 'p' into 'token'. Stores a pointer to the start of
485 * the token (after skipping white space and comments, if any) into '*startp'.
486 * Returns the character position at which to begin parsing the next token. */
487const char *
488lex_token_parse(struct lex_token *token, const char *p, const char **startp)
489{
490 lex_token_init(token);
491
492next:
493 *startp = p;
494 switch (*p) {
495 case '\0':
496 token->type = LEX_T_END;
497 return p;
498
499 case ' ': case '\t': case '\n': case '\r':
500 p++;
501 goto next;
502
503 case '/':
504 p++;
505 if (*p == '/') {
506 do {
507 p++;
508 } while (*p != '\0' && *p != '\n');
509 goto next;
510 } else if (*p == '*') {
511 p++;
512 for (;;) {
513 if (*p == '*' && p[1] == '/') {
514 p += 2;
515 goto next;
516 } else if (*p == '\0' || *p == '\n') {
517 lex_error(token, "`/*' without matching `*/'.");
518 return p;
519 } else {
520 p++;
521 }
522 }
523 goto next;
524 } else {
525 lex_error(token,
526 "`/' is only valid as part of `//' or `/*'.");
527 }
528 break;
529
530 case '(':
531 token->type = LEX_T_LPAREN;
532 p++;
533 break;
534
535 case ')':
536 token->type = LEX_T_RPAREN;
537 p++;
538 break;
539
540 case '{':
541 token->type = LEX_T_LCURLY;
542 p++;
543 break;
544
545 case '}':
546 token->type = LEX_T_RCURLY;
547 p++;
548 break;
549
550 case '[':
551 token->type = LEX_T_LSQUARE;
552 p++;
553 break;
554
555 case ']':
556 token->type = LEX_T_RSQUARE;
557 p++;
558 break;
559
560 case '=':
561 p++;
562 if (*p == '=') {
563 token->type = LEX_T_EQ;
564 p++;
565 } else {
566 token->type = LEX_T_EQUALS;
567 }
568 break;
569
570 case '!':
571 p++;
572 if (*p == '=') {
573 token->type = LEX_T_NE;
574 p++;
575 } else {
576 token->type = LEX_T_LOG_NOT;
577 }
578 break;
579
580 case '&':
581 p++;
582 if (*p == '&') {
583 token->type = LEX_T_LOG_AND;
584 p++;
585 } else {
586 lex_error(token, "`&' is only valid as part of `&&'.");
587 }
588 break;
589
590 case '|':
591 p++;
592 if (*p == '|') {
593 token->type = LEX_T_LOG_OR;
594 p++;
595 } else {
596 lex_error(token, "`|' is only valid as part of `||'.");
597 }
598 break;
599
600 case '<':
601 p++;
602 if (*p == '=') {
603 token->type = LEX_T_LE;
604 p++;
a20c96c6
BP
605 } else if (*p == '-' && p[1] == '>') {
606 token->type = LEX_T_EXCHANGE;
607 p += 2;
10b1662b
BP
608 } else {
609 token->type = LEX_T_LT;
610 }
611 break;
612
613 case '>':
614 p++;
615 if (*p == '=') {
616 token->type = LEX_T_GE;
617 p++;
618 } else {
619 token->type = LEX_T_GT;
620 }
621 break;
622
623 case '.':
624 p++;
625 if (*p == '.') {
626 token->type = LEX_T_ELLIPSIS;
627 p++;
628 } else {
629 lex_error(token, "`.' is only valid as part of `..' or a number.");
630 }
631 break;
632
633 case ',':
634 p++;
635 token->type = LEX_T_COMMA;
636 break;
637
638 case ';':
639 p++;
640 token->type = LEX_T_SEMICOLON;
641 break;
642
56091efe
BP
643 case '-':
644 p++;
645 if (*p == '-') {
646 token->type = LEX_T_DECREMENT;
647 p++;
648 } else {
649 lex_error(token, "`-' is only valid as part of `--'.");
650 }
651 break;
652
10b1662b
BP
653 case '0': case '1': case '2': case '3': case '4':
654 case '5': case '6': case '7': case '8': case '9':
655 case ':':
656 p = lex_parse_integer(p, token);
657 break;
658
659 case '"':
660 p = lex_parse_string(p, token);
661 break;
662
663 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
664 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
665 /* We need to distinguish an Ethernet address or IPv6 address from an
666 * identifier. Fortunately, Ethernet addresses and IPv6 addresses that
667 * are ambiguous based on the first character, always start with hex
668 * digits followed by a colon, but identifiers never do. */
669 p = (p[strspn(p, "0123456789abcdefABCDEF")] == ':'
670 ? lex_parse_integer(p, token)
671 : lex_parse_id(p, token));
672 break;
673
674 default:
675 if (lex_is_id1(*p)) {
676 p = lex_parse_id(p, token);
677 } else {
678 if (isprint((unsigned char) *p)) {
679 lex_error(token, "Invalid character `%c' in input.", *p);
680 } else {
681 lex_error(token, "Invalid byte 0x%d in input.", *p);
682 }
683 p++;
684 }
685 break;
686 }
687
688 return p;
689}
690\f
691/* Initializes 'lexer' for parsing 'input'.
692 *
693 * While the lexer is in use, 'input' must remain available, but the caller
694 * otherwise retains ownership of 'input'.
695 *
696 * The caller must call lexer_get() to obtain the first token. */
697void
698lexer_init(struct lexer *lexer, const char *input)
699{
700 lexer->input = input;
701 lexer->start = NULL;
702 lex_token_init(&lexer->token);
703}
704
705/* Frees storage associated with 'lexer'. */
706void
707lexer_destroy(struct lexer *lexer)
708{
709 lex_token_destroy(&lexer->token);
710}
711
712/* Obtains the next token from 'lexer' into 'lexer->token', and returns the
713 * token's type. The caller may examine 'lexer->token' directly to obtain full
714 * information about the token. */
715enum lex_type
716lexer_get(struct lexer *lexer)
717{
718 lex_token_destroy(&lexer->token);
719 lexer->input = lex_token_parse(&lexer->token, lexer->input, &lexer->start);
720 return lexer->token.type;
721}
722
27912fdb
BP
723/* Returns the type of the next token that will be fetched by lexer_get(),
724 * without advancing 'lexer->token' to that token. */
725enum lex_type
726lexer_lookahead(const struct lexer *lexer)
727{
728 struct lex_token next;
729 enum lex_type type;
730 const char *start;
731
732 lex_token_parse(&next, lexer->input, &start);
733 type = next.type;
734 lex_token_destroy(&next);
735 return type;
736}
737
10b1662b
BP
738/* If 'lexer''s current token has the given 'type', advances 'lexer' to the
739 * next token and returns true. Otherwise returns false. */
740bool
741lexer_match(struct lexer *lexer, enum lex_type type)
742{
743 if (lexer->token.type == type) {
744 lexer_get(lexer);
745 return true;
746 } else {
747 return false;
748 }
749}
27912fdb
BP
750
751/* If 'lexer''s current token is the identifier given in 'id', advances 'lexer'
752 * to the next token and returns true. Otherwise returns false. */
753bool
754lexer_match_id(struct lexer *lexer, const char *id)
755{
756 if (lexer->token.type == LEX_T_ID && !strcmp(lexer->token.s, id)) {
757 lexer_get(lexer);
758 return true;
759 } else {
760 return false;
761 }
762}
558ec83d
BP
763
764bool
765lexer_is_int(const struct lexer *lexer)
766{
767 return (lexer->token.type == LEX_T_INTEGER
768 && lexer->token.format == LEX_F_DECIMAL
769 && ntohll(lexer->token.value.integer) <= INT_MAX);
770}
771
772bool
773lexer_get_int(struct lexer *lexer, int *value)
774{
775 if (lexer_is_int(lexer)) {
776 *value = ntohll(lexer->token.value.integer);
777 lexer_get(lexer);
778 return true;
779 } else {
780 *value = 0;
781 return false;
782 }
783}