]> git.proxmox.com Git - ovs.git/blob - ovn/northd/ovn-northd.c
ovn: Implement basic ARP support for L3 logical routers.
[ovs.git] / ovn / northd / ovn-northd.c
1 /*
2 * Licensed under the Apache License, Version 2.0 (the "License");
3 * you may not use this file except in compliance with the License.
4 * You may obtain a copy of the License at:
5 *
6 * http://www.apache.org/licenses/LICENSE-2.0
7 *
8 * Unless required by applicable law or agreed to in writing, software
9 * distributed under the License is distributed on an "AS IS" BASIS,
10 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 * See the License for the specific language governing permissions and
12 * limitations under the License.
13 */
14
15 #include <config.h>
16
17 #include <getopt.h>
18 #include <stdlib.h>
19 #include <stdio.h>
20
21 #include "command-line.h"
22 #include "daemon.h"
23 #include "dirs.h"
24 #include "dynamic-string.h"
25 #include "fatal-signal.h"
26 #include "hash.h"
27 #include "hmap.h"
28 #include "json.h"
29 #include "ovn/lib/lex.h"
30 #include "ovn/lib/ovn-nb-idl.h"
31 #include "ovn/lib/ovn-sb-idl.h"
32 #include "poll-loop.h"
33 #include "smap.h"
34 #include "stream.h"
35 #include "stream-ssl.h"
36 #include "unixctl.h"
37 #include "util.h"
38 #include "uuid.h"
39 #include "openvswitch/vlog.h"
40
41 VLOG_DEFINE_THIS_MODULE(ovn_northd);
42
43 static unixctl_cb_func ovn_northd_exit;
44
45 struct northd_context {
46 struct ovsdb_idl *ovnnb_idl;
47 struct ovsdb_idl *ovnsb_idl;
48 struct ovsdb_idl_txn *ovnnb_txn;
49 struct ovsdb_idl_txn *ovnsb_txn;
50 };
51
52 static const char *ovnnb_db;
53 static const char *ovnsb_db;
54
55 static const char *default_db(void);
56 \f
57 /* Pipeline stages. */
58
59 /* The two pipelines in an OVN logical flow table. */
60 enum ovn_pipeline {
61 P_IN, /* Ingress pipeline. */
62 P_OUT /* Egress pipeline. */
63 };
64
65 /* The two purposes for which ovn-northd uses OVN logical datapaths. */
66 enum ovn_datapath_type {
67 DP_SWITCH, /* OVN logical switch. */
68 DP_ROUTER /* OVN logical router. */
69 };
70
71 /* Returns an "enum ovn_stage" built from the arguments.
72 *
73 * (It's better to use ovn_stage_build() for type-safety reasons, but inline
74 * functions can't be used in enums or switch cases.) */
75 #define OVN_STAGE_BUILD(DP_TYPE, PIPELINE, TABLE) \
76 (((DP_TYPE) << 9) | ((PIPELINE) << 8) | (TABLE))
77
78 /* A stage within an OVN logical switch or router.
79 *
80 * An "enum ovn_stage" indicates whether the stage is part of a logical switch
81 * or router, whether the stage is part of the ingress or egress pipeline, and
82 * the table within that pipeline. The first three components are combined to
83 * form the stage's full name, e.g. S_SWITCH_IN_PORT_SEC,
84 * S_ROUTER_OUT_DELIVERY. */
85 enum ovn_stage {
86 #define PIPELINE_STAGES \
87 /* Logical switch ingress stages. */ \
88 PIPELINE_STAGE(SWITCH, IN, PORT_SEC, 0, "ls_in_port_sec") \
89 PIPELINE_STAGE(SWITCH, IN, PRE_ACL, 1, "ls_in_pre_acl") \
90 PIPELINE_STAGE(SWITCH, IN, ACL, 2, "ls_in_acl") \
91 PIPELINE_STAGE(SWITCH, IN, ARP_RSP, 3, "ls_in_arp_rsp") \
92 PIPELINE_STAGE(SWITCH, IN, L2_LKUP, 4, "ls_in_l2_lkup") \
93 \
94 /* Logical switch egress stages. */ \
95 PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 0, "ls_out_pre_acl") \
96 PIPELINE_STAGE(SWITCH, OUT, ACL, 1, "ls_out_acl") \
97 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC, 2, "ls_out_port_sec") \
98 \
99 /* Logical router ingress stages. */ \
100 PIPELINE_STAGE(ROUTER, IN, ADMISSION, 0, "lr_in_admission") \
101 PIPELINE_STAGE(ROUTER, IN, IP_INPUT, 1, "lr_in_ip_input") \
102 PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 2, "lr_in_ip_routing") \
103 PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 3, "lr_in_arp_resolve") \
104 PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 4, "lr_in_arp_request") \
105 \
106 /* Logical router egress stages. */ \
107 PIPELINE_STAGE(ROUTER, OUT, DELIVERY, 0, "lr_out_delivery")
108
109 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
110 S_##DP_TYPE##_##PIPELINE##_##STAGE \
111 = OVN_STAGE_BUILD(DP_##DP_TYPE, P_##PIPELINE, TABLE),
112 PIPELINE_STAGES
113 #undef PIPELINE_STAGE
114 };
115
116 /* Due to various hard-coded priorities need to implement ACLs, the
117 * northbound database supports a smaller range of ACL priorities than
118 * are available to logical flows. This value is added to an ACL
119 * priority to determine the ACL's logical flow priority. */
120 #define OVN_ACL_PRI_OFFSET 1000
121
122 /* Returns an "enum ovn_stage" built from the arguments. */
123 static enum ovn_stage
124 ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline,
125 uint8_t table)
126 {
127 return OVN_STAGE_BUILD(dp_type, pipeline, table);
128 }
129
130 /* Returns the pipeline to which 'stage' belongs. */
131 static enum ovn_pipeline
132 ovn_stage_get_pipeline(enum ovn_stage stage)
133 {
134 return (stage >> 8) & 1;
135 }
136
137 /* Returns the table to which 'stage' belongs. */
138 static uint8_t
139 ovn_stage_get_table(enum ovn_stage stage)
140 {
141 return stage & 0xff;
142 }
143
144 /* Returns a string name for 'stage'. */
145 static const char *
146 ovn_stage_to_str(enum ovn_stage stage)
147 {
148 switch (stage) {
149 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
150 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return NAME;
151 PIPELINE_STAGES
152 #undef PIPELINE_STAGE
153 default: return "<unknown>";
154 }
155 }
156 \f
157 static void
158 usage(void)
159 {
160 printf("\
161 %s: OVN northbound management daemon\n\
162 usage: %s [OPTIONS]\n\
163 \n\
164 Options:\n\
165 --ovnnb-db=DATABASE connect to ovn-nb database at DATABASE\n\
166 (default: %s)\n\
167 --ovnsb-db=DATABASE connect to ovn-sb database at DATABASE\n\
168 (default: %s)\n\
169 -h, --help display this help message\n\
170 -o, --options list available options\n\
171 -V, --version display version information\n\
172 ", program_name, program_name, default_db(), default_db());
173 daemon_usage();
174 vlog_usage();
175 stream_usage("database", true, true, false);
176 }
177 \f
178 struct tnlid_node {
179 struct hmap_node hmap_node;
180 uint32_t tnlid;
181 };
182
183 static void
184 destroy_tnlids(struct hmap *tnlids)
185 {
186 struct tnlid_node *node, *next;
187 HMAP_FOR_EACH_SAFE (node, next, hmap_node, tnlids) {
188 hmap_remove(tnlids, &node->hmap_node);
189 free(node);
190 }
191 hmap_destroy(tnlids);
192 }
193
194 static void
195 add_tnlid(struct hmap *set, uint32_t tnlid)
196 {
197 struct tnlid_node *node = xmalloc(sizeof *node);
198 hmap_insert(set, &node->hmap_node, hash_int(tnlid, 0));
199 node->tnlid = tnlid;
200 }
201
202 static bool
203 tnlid_in_use(const struct hmap *set, uint32_t tnlid)
204 {
205 const struct tnlid_node *node;
206 HMAP_FOR_EACH_IN_BUCKET (node, hmap_node, hash_int(tnlid, 0), set) {
207 if (node->tnlid == tnlid) {
208 return true;
209 }
210 }
211 return false;
212 }
213
214 static uint32_t
215 allocate_tnlid(struct hmap *set, const char *name, uint32_t max,
216 uint32_t *hint)
217 {
218 for (uint32_t tnlid = *hint + 1; tnlid != *hint;
219 tnlid = tnlid + 1 <= max ? tnlid + 1 : 1) {
220 if (!tnlid_in_use(set, tnlid)) {
221 add_tnlid(set, tnlid);
222 *hint = tnlid;
223 return tnlid;
224 }
225 }
226
227 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
228 VLOG_WARN_RL(&rl, "all %s tunnel ids exhausted", name);
229 return 0;
230 }
231 \f
232 /* The 'key' comes from nbs->header_.uuid or nbr->header_.uuid or
233 * sb->external_ids:logical-switch. */
234 struct ovn_datapath {
235 struct hmap_node key_node; /* Index on 'key'. */
236 struct uuid key; /* (nbs/nbr)->header_.uuid. */
237
238 const struct nbrec_logical_switch *nbs; /* May be NULL. */
239 const struct nbrec_logical_router *nbr; /* May be NULL. */
240 const struct sbrec_datapath_binding *sb; /* May be NULL. */
241
242 struct ovs_list list; /* In list of similar records. */
243
244 /* Logical router data (digested from nbr). */
245 const struct ovn_port *gateway_port;
246 ovs_be32 gateway;
247
248 /* Logical switch data. */
249 struct ovn_port **router_ports;
250 size_t n_router_ports;
251
252 struct hmap port_tnlids;
253 uint32_t port_key_hint;
254
255 bool has_unknown;
256 };
257
258 static struct ovn_datapath *
259 ovn_datapath_create(struct hmap *datapaths, const struct uuid *key,
260 const struct nbrec_logical_switch *nbs,
261 const struct nbrec_logical_router *nbr,
262 const struct sbrec_datapath_binding *sb)
263 {
264 struct ovn_datapath *od = xzalloc(sizeof *od);
265 od->key = *key;
266 od->sb = sb;
267 od->nbs = nbs;
268 od->nbr = nbr;
269 hmap_init(&od->port_tnlids);
270 od->port_key_hint = 0;
271 hmap_insert(datapaths, &od->key_node, uuid_hash(&od->key));
272 return od;
273 }
274
275 static void
276 ovn_datapath_destroy(struct hmap *datapaths, struct ovn_datapath *od)
277 {
278 if (od) {
279 /* Don't remove od->list. It is used within build_datapaths() as a
280 * private list and once we've exited that function it is not safe to
281 * use it. */
282 hmap_remove(datapaths, &od->key_node);
283 destroy_tnlids(&od->port_tnlids);
284 free(od->router_ports);
285 free(od);
286 }
287 }
288
289 static struct ovn_datapath *
290 ovn_datapath_find(struct hmap *datapaths, const struct uuid *uuid)
291 {
292 struct ovn_datapath *od;
293
294 HMAP_FOR_EACH_WITH_HASH (od, key_node, uuid_hash(uuid), datapaths) {
295 if (uuid_equals(uuid, &od->key)) {
296 return od;
297 }
298 }
299 return NULL;
300 }
301
302 static struct ovn_datapath *
303 ovn_datapath_from_sbrec(struct hmap *datapaths,
304 const struct sbrec_datapath_binding *sb)
305 {
306 struct uuid key;
307
308 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
309 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
310 return NULL;
311 }
312 return ovn_datapath_find(datapaths, &key);
313 }
314
315 static void
316 join_datapaths(struct northd_context *ctx, struct hmap *datapaths,
317 struct ovs_list *sb_only, struct ovs_list *nb_only,
318 struct ovs_list *both)
319 {
320 hmap_init(datapaths);
321 list_init(sb_only);
322 list_init(nb_only);
323 list_init(both);
324
325 const struct sbrec_datapath_binding *sb, *sb_next;
326 SBREC_DATAPATH_BINDING_FOR_EACH_SAFE (sb, sb_next, ctx->ovnsb_idl) {
327 struct uuid key;
328 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
329 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
330 ovsdb_idl_txn_add_comment(
331 ctx->ovnsb_txn,
332 "deleting Datapath_Binding "UUID_FMT" that lacks "
333 "external-ids:logical-switch and "
334 "external-ids:logical-router",
335 UUID_ARGS(&sb->header_.uuid));
336 sbrec_datapath_binding_delete(sb);
337 continue;
338 }
339
340 if (ovn_datapath_find(datapaths, &key)) {
341 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
342 VLOG_INFO_RL(
343 &rl, "deleting Datapath_Binding "UUID_FMT" with "
344 "duplicate external-ids:logical-switch/router "UUID_FMT,
345 UUID_ARGS(&sb->header_.uuid), UUID_ARGS(&key));
346 sbrec_datapath_binding_delete(sb);
347 continue;
348 }
349
350 struct ovn_datapath *od = ovn_datapath_create(datapaths, &key,
351 NULL, NULL, sb);
352 list_push_back(sb_only, &od->list);
353 }
354
355 const struct nbrec_logical_switch *nbs;
356 NBREC_LOGICAL_SWITCH_FOR_EACH (nbs, ctx->ovnnb_idl) {
357 struct ovn_datapath *od = ovn_datapath_find(datapaths,
358 &nbs->header_.uuid);
359 if (od) {
360 od->nbs = nbs;
361 list_remove(&od->list);
362 list_push_back(both, &od->list);
363 } else {
364 od = ovn_datapath_create(datapaths, &nbs->header_.uuid,
365 nbs, NULL, NULL);
366 list_push_back(nb_only, &od->list);
367 }
368 }
369
370 const struct nbrec_logical_router *nbr;
371 NBREC_LOGICAL_ROUTER_FOR_EACH (nbr, ctx->ovnnb_idl) {
372 struct ovn_datapath *od = ovn_datapath_find(datapaths,
373 &nbr->header_.uuid);
374 if (od) {
375 if (!od->nbs) {
376 od->nbr = nbr;
377 list_remove(&od->list);
378 list_push_back(both, &od->list);
379 } else {
380 /* Can't happen! */
381 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
382 VLOG_WARN_RL(&rl,
383 "duplicate UUID "UUID_FMT" in OVN_Northbound",
384 UUID_ARGS(&nbr->header_.uuid));
385 continue;
386 }
387 } else {
388 od = ovn_datapath_create(datapaths, &nbr->header_.uuid,
389 NULL, nbr, NULL);
390 list_push_back(nb_only, &od->list);
391 }
392
393 od->gateway = 0;
394 if (nbr->default_gw) {
395 ovs_be32 ip;
396 if (!ip_parse(nbr->default_gw, &ip) || !ip) {
397 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
398 VLOG_WARN_RL(&rl, "bad 'gateway' %s", nbr->default_gw);
399 } else {
400 od->gateway = ip;
401 }
402 }
403
404 /* Set the gateway port to NULL. If there is a gateway, it will get
405 * filled in as we go through the ports later. */
406 od->gateway_port = NULL;
407 }
408 }
409
410 static uint32_t
411 ovn_datapath_allocate_key(struct hmap *dp_tnlids)
412 {
413 static uint32_t hint;
414 return allocate_tnlid(dp_tnlids, "datapath", (1u << 24) - 1, &hint);
415 }
416
417 /* Updates the southbound Datapath_Binding table so that it contains the
418 * logical switches and routers specified by the northbound database.
419 *
420 * Initializes 'datapaths' to contain a "struct ovn_datapath" for every logical
421 * switch and router. */
422 static void
423 build_datapaths(struct northd_context *ctx, struct hmap *datapaths)
424 {
425 struct ovs_list sb_only, nb_only, both;
426
427 join_datapaths(ctx, datapaths, &sb_only, &nb_only, &both);
428
429 if (!list_is_empty(&nb_only)) {
430 /* First index the in-use datapath tunnel IDs. */
431 struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
432 struct ovn_datapath *od;
433 LIST_FOR_EACH (od, list, &both) {
434 add_tnlid(&dp_tnlids, od->sb->tunnel_key);
435 }
436
437 /* Add southbound record for each unmatched northbound record. */
438 LIST_FOR_EACH (od, list, &nb_only) {
439 uint16_t tunnel_key = ovn_datapath_allocate_key(&dp_tnlids);
440 if (!tunnel_key) {
441 break;
442 }
443
444 od->sb = sbrec_datapath_binding_insert(ctx->ovnsb_txn);
445
446 char uuid_s[UUID_LEN + 1];
447 sprintf(uuid_s, UUID_FMT, UUID_ARGS(&od->key));
448 const char *key = od->nbs ? "logical-switch" : "logical-router";
449 const struct smap id = SMAP_CONST1(&id, key, uuid_s);
450 sbrec_datapath_binding_set_external_ids(od->sb, &id);
451
452 sbrec_datapath_binding_set_tunnel_key(od->sb, tunnel_key);
453 }
454 destroy_tnlids(&dp_tnlids);
455 }
456
457 /* Delete southbound records without northbound matches. */
458 struct ovn_datapath *od, *next;
459 LIST_FOR_EACH_SAFE (od, next, list, &sb_only) {
460 list_remove(&od->list);
461 sbrec_datapath_binding_delete(od->sb);
462 ovn_datapath_destroy(datapaths, od);
463 }
464 }
465 \f
466 struct ovn_port {
467 struct hmap_node key_node; /* Index on 'key'. */
468 char *key; /* nbs->name, nbr->name, sb->logical_port. */
469 char *json_key; /* 'key', quoted for use in JSON. */
470
471 const struct nbrec_logical_port *nbs; /* May be NULL. */
472 const struct nbrec_logical_router_port *nbr; /* May be NULL. */
473 const struct sbrec_port_binding *sb; /* May be NULL. */
474
475 /* Logical router port data. */
476 ovs_be32 ip, mask; /* 192.168.10.123/24. */
477 ovs_be32 network; /* 192.168.10.0. */
478 ovs_be32 bcast; /* 192.168.10.255. */
479 struct eth_addr mac;
480 struct ovn_port *peer;
481
482 struct ovn_datapath *od;
483
484 struct ovs_list list; /* In list of similar records. */
485 };
486
487 static struct ovn_port *
488 ovn_port_create(struct hmap *ports, const char *key,
489 const struct nbrec_logical_port *nbs,
490 const struct nbrec_logical_router_port *nbr,
491 const struct sbrec_port_binding *sb)
492 {
493 struct ovn_port *op = xzalloc(sizeof *op);
494
495 struct ds json_key = DS_EMPTY_INITIALIZER;
496 json_string_escape(key, &json_key);
497 op->json_key = ds_steal_cstr(&json_key);
498
499 op->key = xstrdup(key);
500 op->sb = sb;
501 op->nbs = nbs;
502 op->nbr = nbr;
503 hmap_insert(ports, &op->key_node, hash_string(op->key, 0));
504 return op;
505 }
506
507 static void
508 ovn_port_destroy(struct hmap *ports, struct ovn_port *port)
509 {
510 if (port) {
511 /* Don't remove port->list. It is used within build_ports() as a
512 * private list and once we've exited that function it is not safe to
513 * use it. */
514 hmap_remove(ports, &port->key_node);
515 free(port->json_key);
516 free(port->key);
517 free(port);
518 }
519 }
520
521 static struct ovn_port *
522 ovn_port_find(struct hmap *ports, const char *name)
523 {
524 struct ovn_port *op;
525
526 HMAP_FOR_EACH_WITH_HASH (op, key_node, hash_string(name, 0), ports) {
527 if (!strcmp(op->key, name)) {
528 return op;
529 }
530 }
531 return NULL;
532 }
533
534 static uint32_t
535 ovn_port_allocate_key(struct ovn_datapath *od)
536 {
537 return allocate_tnlid(&od->port_tnlids, "port",
538 (1u << 15) - 1, &od->port_key_hint);
539 }
540
541 static void
542 join_logical_ports(struct northd_context *ctx,
543 struct hmap *datapaths, struct hmap *ports,
544 struct ovs_list *sb_only, struct ovs_list *nb_only,
545 struct ovs_list *both)
546 {
547 hmap_init(ports);
548 list_init(sb_only);
549 list_init(nb_only);
550 list_init(both);
551
552 const struct sbrec_port_binding *sb;
553 SBREC_PORT_BINDING_FOR_EACH (sb, ctx->ovnsb_idl) {
554 struct ovn_port *op = ovn_port_create(ports, sb->logical_port,
555 NULL, NULL, sb);
556 list_push_back(sb_only, &op->list);
557 }
558
559 struct ovn_datapath *od;
560 HMAP_FOR_EACH (od, key_node, datapaths) {
561 if (od->nbs) {
562 for (size_t i = 0; i < od->nbs->n_ports; i++) {
563 const struct nbrec_logical_port *nbs = od->nbs->ports[i];
564 struct ovn_port *op = ovn_port_find(ports, nbs->name);
565 if (op) {
566 if (op->nbs || op->nbr) {
567 static struct vlog_rate_limit rl
568 = VLOG_RATE_LIMIT_INIT(5, 1);
569 VLOG_WARN_RL(&rl, "duplicate logical port %s",
570 nbs->name);
571 continue;
572 }
573 op->nbs = nbs;
574 list_remove(&op->list);
575 list_push_back(both, &op->list);
576 } else {
577 op = ovn_port_create(ports, nbs->name, nbs, NULL, NULL);
578 list_push_back(nb_only, &op->list);
579 }
580
581 op->od = od;
582 }
583 } else {
584 for (size_t i = 0; i < od->nbr->n_ports; i++) {
585 const struct nbrec_logical_router_port *nbr
586 = od->nbr->ports[i];
587
588 struct eth_addr mac;
589 if (!eth_addr_from_string(nbr->mac, &mac)) {
590 static struct vlog_rate_limit rl
591 = VLOG_RATE_LIMIT_INIT(5, 1);
592 VLOG_WARN_RL(&rl, "bad 'mac' %s", nbr->mac);
593 continue;
594 }
595
596 ovs_be32 ip, mask;
597 char *error = ip_parse_masked(nbr->network, &ip, &mask);
598 if (error || mask == OVS_BE32_MAX || !ip_is_cidr(mask)) {
599 static struct vlog_rate_limit rl
600 = VLOG_RATE_LIMIT_INIT(5, 1);
601 VLOG_WARN_RL(&rl, "bad 'network' %s", nbr->network);
602 free(error);
603 continue;
604 }
605
606 struct ovn_port *op = ovn_port_find(ports, nbr->name);
607 if (op) {
608 if (op->nbs || op->nbr) {
609 static struct vlog_rate_limit rl
610 = VLOG_RATE_LIMIT_INIT(5, 1);
611 VLOG_WARN_RL(&rl, "duplicate logical router port %s",
612 nbr->name);
613 continue;
614 }
615 op->nbr = nbr;
616 list_remove(&op->list);
617 list_push_back(both, &op->list);
618 } else {
619 op = ovn_port_create(ports, nbr->name, NULL, nbr, NULL);
620 list_push_back(nb_only, &op->list);
621 }
622
623 op->ip = ip;
624 op->mask = mask;
625 op->network = ip & mask;
626 op->bcast = ip | ~mask;
627 op->mac = mac;
628
629 op->od = od;
630
631 /* If 'od' has a gateway and 'op' routes to it... */
632 if (od->gateway && !((op->network ^ od->gateway) & op->mask)) {
633 /* ...and if 'op' is a longer match than the current
634 * choice... */
635 const struct ovn_port *gw = od->gateway_port;
636 int len = gw ? ip_count_cidr_bits(gw->mask) : 0;
637 if (ip_count_cidr_bits(op->mask) > len) {
638 /* ...then it's the default gateway port. */
639 od->gateway_port = op;
640 }
641 }
642 }
643 }
644 }
645
646 /* Connect logical router ports, and logical switch ports of type "router",
647 * to their peers. */
648 struct ovn_port *op;
649 HMAP_FOR_EACH (op, key_node, ports) {
650 if (op->nbs && !strcmp(op->nbs->type, "router")) {
651 const char *peer_name = smap_get(&op->nbs->options, "router-port");
652 if (!peer_name) {
653 continue;
654 }
655
656 struct ovn_port *peer = ovn_port_find(ports, peer_name);
657 if (!peer || !peer->nbr) {
658 continue;
659 }
660
661 peer->peer = op;
662 op->peer = peer;
663 op->od->router_ports = xrealloc(
664 op->od->router_ports,
665 sizeof *op->od->router_ports * (op->od->n_router_ports + 1));
666 op->od->router_ports[op->od->n_router_ports++] = op;
667 } else if (op->nbr && op->nbr->peer) {
668 op->peer = ovn_port_find(ports, op->nbr->name);
669 }
670 }
671 }
672
673 static void
674 ovn_port_update_sbrec(const struct ovn_port *op)
675 {
676 sbrec_port_binding_set_datapath(op->sb, op->od->sb);
677 if (op->nbr) {
678 sbrec_port_binding_set_type(op->sb, "patch");
679
680 const char *peer = op->peer ? op->peer->key : "<error>";
681 const struct smap ids = SMAP_CONST1(&ids, "peer", peer);
682 sbrec_port_binding_set_options(op->sb, &ids);
683
684 sbrec_port_binding_set_parent_port(op->sb, NULL);
685 sbrec_port_binding_set_tag(op->sb, NULL, 0);
686 sbrec_port_binding_set_mac(op->sb, NULL, 0);
687 } else {
688 if (strcmp(op->nbs->type, "router")) {
689 sbrec_port_binding_set_type(op->sb, op->nbs->type);
690 sbrec_port_binding_set_options(op->sb, &op->nbs->options);
691 } else {
692 sbrec_port_binding_set_type(op->sb, "patch");
693
694 const char *router_port = smap_get(&op->nbs->options,
695 "router-port");
696 if (!router_port) {
697 router_port = "<error>";
698 }
699 const struct smap ids = SMAP_CONST1(&ids, "peer", router_port);
700 sbrec_port_binding_set_options(op->sb, &ids);
701 }
702 sbrec_port_binding_set_parent_port(op->sb, op->nbs->parent_name);
703 sbrec_port_binding_set_tag(op->sb, op->nbs->tag, op->nbs->n_tag);
704 sbrec_port_binding_set_mac(op->sb, (const char **) op->nbs->addresses,
705 op->nbs->n_addresses);
706 }
707 }
708
709 /* Updates the southbound Port_Binding table so that it contains the logical
710 * ports specified by the northbound database.
711 *
712 * Initializes 'ports' to contain a "struct ovn_port" for every logical port,
713 * using the "struct ovn_datapath"s in 'datapaths' to look up logical
714 * datapaths. */
715 static void
716 build_ports(struct northd_context *ctx, struct hmap *datapaths,
717 struct hmap *ports)
718 {
719 struct ovs_list sb_only, nb_only, both;
720
721 join_logical_ports(ctx, datapaths, ports, &sb_only, &nb_only, &both);
722
723 /* For logical ports that are in both databases, update the southbound
724 * record based on northbound data. Also index the in-use tunnel_keys. */
725 struct ovn_port *op, *next;
726 LIST_FOR_EACH_SAFE (op, next, list, &both) {
727 ovn_port_update_sbrec(op);
728
729 add_tnlid(&op->od->port_tnlids, op->sb->tunnel_key);
730 if (op->sb->tunnel_key > op->od->port_key_hint) {
731 op->od->port_key_hint = op->sb->tunnel_key;
732 }
733 }
734
735 /* Add southbound record for each unmatched northbound record. */
736 LIST_FOR_EACH_SAFE (op, next, list, &nb_only) {
737 uint16_t tunnel_key = ovn_port_allocate_key(op->od);
738 if (!tunnel_key) {
739 continue;
740 }
741
742 op->sb = sbrec_port_binding_insert(ctx->ovnsb_txn);
743 ovn_port_update_sbrec(op);
744
745 sbrec_port_binding_set_logical_port(op->sb, op->key);
746 sbrec_port_binding_set_tunnel_key(op->sb, tunnel_key);
747 }
748
749 /* Delete southbound records without northbound matches. */
750 LIST_FOR_EACH_SAFE(op, next, list, &sb_only) {
751 list_remove(&op->list);
752 sbrec_port_binding_delete(op->sb);
753 ovn_port_destroy(ports, op);
754 }
755 }
756 \f
757 #define OVN_MIN_MULTICAST 32768
758 #define OVN_MAX_MULTICAST 65535
759
760 struct multicast_group {
761 const char *name;
762 uint16_t key; /* OVN_MIN_MULTICAST...OVN_MAX_MULTICAST. */
763 };
764
765 #define MC_FLOOD "_MC_flood"
766 static const struct multicast_group mc_flood = { MC_FLOOD, 65535 };
767
768 #define MC_UNKNOWN "_MC_unknown"
769 static const struct multicast_group mc_unknown = { MC_UNKNOWN, 65534 };
770
771 static bool
772 multicast_group_equal(const struct multicast_group *a,
773 const struct multicast_group *b)
774 {
775 return !strcmp(a->name, b->name) && a->key == b->key;
776 }
777
778 /* Multicast group entry. */
779 struct ovn_multicast {
780 struct hmap_node hmap_node; /* Index on 'datapath' and 'key'. */
781 struct ovn_datapath *datapath;
782 const struct multicast_group *group;
783
784 struct ovn_port **ports;
785 size_t n_ports, allocated_ports;
786 };
787
788 static uint32_t
789 ovn_multicast_hash(const struct ovn_datapath *datapath,
790 const struct multicast_group *group)
791 {
792 return hash_pointer(datapath, group->key);
793 }
794
795 static struct ovn_multicast *
796 ovn_multicast_find(struct hmap *mcgroups, struct ovn_datapath *datapath,
797 const struct multicast_group *group)
798 {
799 struct ovn_multicast *mc;
800
801 HMAP_FOR_EACH_WITH_HASH (mc, hmap_node,
802 ovn_multicast_hash(datapath, group), mcgroups) {
803 if (mc->datapath == datapath
804 && multicast_group_equal(mc->group, group)) {
805 return mc;
806 }
807 }
808 return NULL;
809 }
810
811 static void
812 ovn_multicast_add(struct hmap *mcgroups, const struct multicast_group *group,
813 struct ovn_port *port)
814 {
815 struct ovn_datapath *od = port->od;
816 struct ovn_multicast *mc = ovn_multicast_find(mcgroups, od, group);
817 if (!mc) {
818 mc = xmalloc(sizeof *mc);
819 hmap_insert(mcgroups, &mc->hmap_node, ovn_multicast_hash(od, group));
820 mc->datapath = od;
821 mc->group = group;
822 mc->n_ports = 0;
823 mc->allocated_ports = 4;
824 mc->ports = xmalloc(mc->allocated_ports * sizeof *mc->ports);
825 }
826 if (mc->n_ports >= mc->allocated_ports) {
827 mc->ports = x2nrealloc(mc->ports, &mc->allocated_ports,
828 sizeof *mc->ports);
829 }
830 mc->ports[mc->n_ports++] = port;
831 }
832
833 static void
834 ovn_multicast_destroy(struct hmap *mcgroups, struct ovn_multicast *mc)
835 {
836 if (mc) {
837 hmap_remove(mcgroups, &mc->hmap_node);
838 free(mc->ports);
839 free(mc);
840 }
841 }
842
843 static void
844 ovn_multicast_update_sbrec(const struct ovn_multicast *mc,
845 const struct sbrec_multicast_group *sb)
846 {
847 struct sbrec_port_binding **ports = xmalloc(mc->n_ports * sizeof *ports);
848 for (size_t i = 0; i < mc->n_ports; i++) {
849 ports[i] = CONST_CAST(struct sbrec_port_binding *, mc->ports[i]->sb);
850 }
851 sbrec_multicast_group_set_ports(sb, ports, mc->n_ports);
852 free(ports);
853 }
854 \f
855 /* Logical flow generation.
856 *
857 * This code generates the Logical_Flow table in the southbound database, as a
858 * function of most of the northbound database.
859 */
860
861 struct ovn_lflow {
862 struct hmap_node hmap_node;
863
864 struct ovn_datapath *od;
865 enum ovn_stage stage;
866 uint16_t priority;
867 char *match;
868 char *actions;
869 };
870
871 static size_t
872 ovn_lflow_hash(const struct ovn_lflow *lflow)
873 {
874 size_t hash = uuid_hash(&lflow->od->key);
875 hash = hash_2words((lflow->stage << 16) | lflow->priority, hash);
876 hash = hash_string(lflow->match, hash);
877 return hash_string(lflow->actions, hash);
878 }
879
880 static bool
881 ovn_lflow_equal(const struct ovn_lflow *a, const struct ovn_lflow *b)
882 {
883 return (a->od == b->od
884 && a->stage == b->stage
885 && a->priority == b->priority
886 && !strcmp(a->match, b->match)
887 && !strcmp(a->actions, b->actions));
888 }
889
890 static void
891 ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
892 enum ovn_stage stage, uint16_t priority,
893 char *match, char *actions)
894 {
895 lflow->od = od;
896 lflow->stage = stage;
897 lflow->priority = priority;
898 lflow->match = match;
899 lflow->actions = actions;
900 }
901
902 /* Adds a row with the specified contents to the Logical_Flow table. */
903 static void
904 ovn_lflow_add(struct hmap *lflow_map, struct ovn_datapath *od,
905 enum ovn_stage stage, uint16_t priority,
906 const char *match, const char *actions)
907 {
908 struct ovn_lflow *lflow = xmalloc(sizeof *lflow);
909 ovn_lflow_init(lflow, od, stage, priority,
910 xstrdup(match), xstrdup(actions));
911 hmap_insert(lflow_map, &lflow->hmap_node, ovn_lflow_hash(lflow));
912 }
913
914 static struct ovn_lflow *
915 ovn_lflow_find(struct hmap *lflows, struct ovn_datapath *od,
916 enum ovn_stage stage, uint16_t priority,
917 const char *match, const char *actions)
918 {
919 struct ovn_lflow target;
920 ovn_lflow_init(&target, od, stage, priority,
921 CONST_CAST(char *, match), CONST_CAST(char *, actions));
922
923 struct ovn_lflow *lflow;
924 HMAP_FOR_EACH_WITH_HASH (lflow, hmap_node, ovn_lflow_hash(&target),
925 lflows) {
926 if (ovn_lflow_equal(lflow, &target)) {
927 return lflow;
928 }
929 }
930 return NULL;
931 }
932
933 static void
934 ovn_lflow_destroy(struct hmap *lflows, struct ovn_lflow *lflow)
935 {
936 if (lflow) {
937 hmap_remove(lflows, &lflow->hmap_node);
938 free(lflow->match);
939 free(lflow->actions);
940 free(lflow);
941 }
942 }
943
944 struct ipv4_netaddr {
945 ovs_be32 addr;
946 unsigned int plen;
947 };
948
949 struct ipv6_netaddr {
950 struct in6_addr addr;
951 unsigned int plen;
952 };
953
954 struct lport_addresses {
955 struct eth_addr ea;
956 size_t n_ipv4_addrs;
957 struct ipv4_netaddr *ipv4_addrs;
958 size_t n_ipv6_addrs;
959 struct ipv6_netaddr *ipv6_addrs;
960 };
961
962 /*
963 * Extracts the mac, ipv4 and ipv6 addresses from the input param 'address'
964 * which should be of the format 'MAC [IP1 IP2 ..]" where IPn should be
965 * a valid IPv4 or IPv6 address and stores them in the 'ipv4_addrs' and
966 * 'ipv6_addrs' fields of input param 'laddrs'.
967 * The caller has to free the 'ipv4_addrs' and 'ipv6_addrs' fields.
968 * If input param 'store_ipv6' is true only then extracted ipv6 addresses
969 * are stored in 'ipv6_addrs' fields.
970 * Return true if at least 'MAC' is found in 'address', false otherwise.
971 * Eg 1.
972 * If 'address' = '00:00:00:00:00:01 10.0.0.4 fe80::ea2a:eaff:fe28:3390/64
973 * 30.0.0.3/23' and 'store_ipv6' = true
974 * then returns true with laddrs->n_ipv4_addrs = 2, naddrs->n_ipv6_addrs = 1.
975 *
976 * Eg. 2
977 * If 'address' = '00:00:00:00:00:01 10.0.0.4 fe80::ea2a:eaff:fe28:3390/64
978 * 30.0.0.3/23' and 'store_ipv6' = false
979 * then returns true with laddrs->n_ipv4_addrs = 2, naddrs->n_ipv6_addrs = 0.
980 *
981 * Eg 3. If 'address' = '00:00:00:00:00:01 10.0.0.4 addr 30.0.0.4', then
982 * returns true with laddrs->n_ipv4_addrs = 1 and laddrs->n_ipv6_addrs = 0.
983 */
984 static bool
985 extract_lport_addresses(char *address, struct lport_addresses *laddrs,
986 bool store_ipv6)
987 {
988 char *buf = address;
989 int buf_index = 0;
990 char *buf_end = buf + strlen(address);
991 if (!ovs_scan_len(buf, &buf_index, ETH_ADDR_SCAN_FMT,
992 ETH_ADDR_SCAN_ARGS(laddrs->ea))) {
993 return false;
994 }
995
996 ovs_be32 ip4;
997 struct in6_addr ip6;
998 unsigned int plen;
999 char *error;
1000
1001 laddrs->n_ipv4_addrs = 0;
1002 laddrs->n_ipv6_addrs = 0;
1003 laddrs->ipv4_addrs = NULL;
1004 laddrs->ipv6_addrs = NULL;
1005
1006 /* Loop through the buffer and extract the IPv4/IPv6 addresses
1007 * and store in the 'laddrs'. Break the loop if invalid data is found.
1008 */
1009 buf += buf_index;
1010 while (buf < buf_end) {
1011 buf_index = 0;
1012 error = ip_parse_cidr_len(buf, &buf_index, &ip4, &plen);
1013 if (!error) {
1014 laddrs->n_ipv4_addrs++;
1015 laddrs->ipv4_addrs = xrealloc(
1016 laddrs->ipv4_addrs,
1017 sizeof (struct ipv4_netaddr) * laddrs->n_ipv4_addrs);
1018 laddrs->ipv4_addrs[laddrs->n_ipv4_addrs - 1].addr = ip4;
1019 laddrs->ipv4_addrs[laddrs->n_ipv4_addrs - 1].plen = plen;
1020 buf += buf_index;
1021 continue;
1022 }
1023 free(error);
1024 error = ipv6_parse_cidr_len(buf, &buf_index, &ip6, &plen);
1025 if (!error && store_ipv6) {
1026 laddrs->n_ipv6_addrs++;
1027 laddrs->ipv6_addrs = xrealloc(
1028 laddrs->ipv6_addrs,
1029 sizeof(struct ipv6_netaddr) * laddrs->n_ipv6_addrs);
1030 memcpy(&laddrs->ipv6_addrs[laddrs->n_ipv6_addrs - 1].addr, &ip6,
1031 sizeof(struct in6_addr));
1032 laddrs->ipv6_addrs[laddrs->n_ipv6_addrs - 1].plen = plen;
1033 }
1034
1035 if (error) {
1036 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1037 VLOG_INFO_RL(&rl, "invalid syntax '%s' in address", address);
1038 free(error);
1039 break;
1040 }
1041 buf += buf_index;
1042 }
1043
1044 return true;
1045 }
1046
1047 /* Appends port security constraints on L2 address field 'eth_addr_field'
1048 * (e.g. "eth.src" or "eth.dst") to 'match'. 'port_security', with
1049 * 'n_port_security' elements, is the collection of port_security constraints
1050 * from an OVN_NB Logical_Port row. */
1051 static void
1052 build_port_security(const char *eth_addr_field,
1053 char **port_security, size_t n_port_security,
1054 struct ds *match)
1055 {
1056 size_t base_len = match->length;
1057 ds_put_format(match, " && %s == {", eth_addr_field);
1058
1059 size_t n = 0;
1060 for (size_t i = 0; i < n_port_security; i++) {
1061 struct eth_addr ea;
1062
1063 if (eth_addr_from_string(port_security[i], &ea)) {
1064 ds_put_format(match, ETH_ADDR_FMT, ETH_ADDR_ARGS(ea));
1065 ds_put_char(match, ' ');
1066 n++;
1067 }
1068 }
1069 ds_chomp(match, ' ');
1070 ds_put_cstr(match, "}");
1071
1072 if (!n) {
1073 match->length = base_len;
1074 }
1075 }
1076
1077 static bool
1078 lport_is_enabled(const struct nbrec_logical_port *lport)
1079 {
1080 return !lport->enabled || *lport->enabled;
1081 }
1082
1083 static bool
1084 lport_is_up(const struct nbrec_logical_port *lport)
1085 {
1086 return !lport->up || *lport->up;
1087 }
1088
1089 static bool
1090 has_stateful_acl(struct ovn_datapath *od)
1091 {
1092 for (size_t i = 0; i < od->nbs->n_acls; i++) {
1093 struct nbrec_acl *acl = od->nbs->acls[i];
1094 if (!strcmp(acl->action, "allow-related")) {
1095 return true;
1096 }
1097 }
1098
1099 return false;
1100 }
1101
1102 static void
1103 build_acls(struct ovn_datapath *od, struct hmap *lflows, struct hmap *ports)
1104 {
1105 bool has_stateful = has_stateful_acl(od);
1106 struct ovn_port *op;
1107 struct ds match_in, match_out;
1108
1109 /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are
1110 * allowed by default. */
1111 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 0, "1", "next;");
1112 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 0, "1", "next;");
1113
1114 /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
1115 * default. A related rule at priority 1 is added below if there
1116 * are any stateful ACLs in this datapath. */
1117 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 0, "1", "next;");
1118 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 0, "1", "next;");
1119
1120 /* If there are any stateful ACL rules in this dapapath, we must
1121 * send all IP packets through the conntrack action, which handles
1122 * defragmentation, in order to match L4 headers. */
1123 if (has_stateful) {
1124 HMAP_FOR_EACH (op, key_node, ports) {
1125 if (op->od == od && !strcmp(op->nbs->type, "router")) {
1126 /* Can't use ct() for router ports. Consider the following configuration:
1127 lp1(10.0.0.2) on hostA--ls1--lr0--ls2--lp2(10.0.1.2) on hostB,
1128 For a ping from lp1 to lp2, First, the response will go through ct()
1129 with a zone for lp2 in the ls2 ingress pipeline on hostB.
1130 That ct zone knows about this connection. Next, it goes through ct()
1131 with the zone for the router port in the egress pipeline of ls2 on hostB.
1132 This zone does not know about the connection, as the icmp request
1133 went through the logical router on hostA, not hostB. This would only work
1134 with distributed conntrack state across all chassis. */
1135
1136 ds_init(&match_in);
1137 ds_init(&match_out);
1138 ds_put_format(&match_in, "ip && inport == %s", op->json_key);
1139 ds_put_format(&match_out, "ip && outport == %s", op->json_key);
1140 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110, ds_cstr(&match_in), "next;");
1141 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110, ds_cstr(&match_out), "next;");
1142
1143 ds_destroy(&match_in);
1144 ds_destroy(&match_out);
1145 }
1146 }
1147
1148 /* Ingress and Egress Pre-ACL Table (Priority 100).
1149 *
1150 * Regardless of whether the ACL is "from-lport" or "to-lport",
1151 * we need rules in both the ingress and egress table, because
1152 * the return traffic needs to be followed. */
1153 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 100, "ip", "ct_next;");
1154 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip", "ct_next;");
1155
1156 /* Ingress and Egress ACL Table (Priority 1).
1157 *
1158 * By default, traffic is allowed. This is partially handled by
1159 * the Priority 0 ACL flows added earlier, but we also need to
1160 * commit IP flows. This is because, while the initiater's
1161 * direction may not have any stateful rules, the server's may
1162 * and then its return traffic would not have an associated
1163 * conntrack entry and would return "+invalid". */
1164 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 1, "ip",
1165 "ct_commit; next;");
1166 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 1, "ip",
1167 "ct_commit; next;");
1168
1169 /* Ingress and Egress ACL Table (Priority 65535).
1170 *
1171 * Always drop traffic that's in an invalid state. This is
1172 * enforced at a higher priority than ACLs can be defined. */
1173 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
1174 "ct.inv", "drop;");
1175 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
1176 "ct.inv", "drop;");
1177
1178 /* Ingress and Egress ACL Table (Priority 65535).
1179 *
1180 * Always allow traffic that is established to a committed
1181 * conntrack entry. This is enforced at a higher priority than
1182 * ACLs can be defined. */
1183 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
1184 "ct.est && !ct.rel && !ct.new && !ct.inv",
1185 "next;");
1186 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
1187 "ct.est && !ct.rel && !ct.new && !ct.inv",
1188 "next;");
1189
1190 /* Ingress and Egress ACL Table (Priority 65535).
1191 *
1192 * Always allow traffic that is related to an existing conntrack
1193 * entry. This is enforced at a higher priority than ACLs can
1194 * be defined.
1195 *
1196 * NOTE: This does not support related data sessions (eg,
1197 * a dynamically negotiated FTP data channel), but will allow
1198 * related traffic such as an ICMP Port Unreachable through
1199 * that's generated from a non-listening UDP port. */
1200 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
1201 "!ct.est && ct.rel && !ct.new && !ct.inv",
1202 "next;");
1203 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
1204 "!ct.est && ct.rel && !ct.new && !ct.inv",
1205 "next;");
1206 }
1207
1208 /* Ingress or Egress ACL Table (Various priorities). */
1209 for (size_t i = 0; i < od->nbs->n_acls; i++) {
1210 struct nbrec_acl *acl = od->nbs->acls[i];
1211 bool ingress = !strcmp(acl->direction, "from-lport") ? true :false;
1212 enum ovn_stage stage = ingress ? S_SWITCH_IN_ACL : S_SWITCH_OUT_ACL;
1213
1214 if (!strcmp(acl->action, "allow")) {
1215 /* If there are any stateful flows, we must even commit "allow"
1216 * actions. This is because, while the initiater's
1217 * direction may not have any stateful rules, the server's
1218 * may and then its return traffic would not have an
1219 * associated conntrack entry and would return "+invalid". */
1220 const char *actions = has_stateful ? "ct_commit; next;" : "next;";
1221 ovn_lflow_add(lflows, od, stage,
1222 acl->priority + OVN_ACL_PRI_OFFSET,
1223 acl->match, actions);
1224 } else if (!strcmp(acl->action, "allow-related")) {
1225 struct ds match = DS_EMPTY_INITIALIZER;
1226
1227 /* Commit the connection tracking entry, which allows all
1228 * other traffic related to this entry to flow due to the
1229 * 65535 priority flow defined earlier. */
1230 ds_put_format(&match, "ct.new && (%s)", acl->match);
1231 ovn_lflow_add(lflows, od, stage,
1232 acl->priority + OVN_ACL_PRI_OFFSET,
1233 ds_cstr(&match), "ct_commit; next;");
1234
1235 ds_destroy(&match);
1236 } else if (!strcmp(acl->action, "drop")) {
1237 ovn_lflow_add(lflows, od, stage,
1238 acl->priority + OVN_ACL_PRI_OFFSET,
1239 acl->match, "drop;");
1240 } else if (!strcmp(acl->action, "reject")) {
1241 /* xxx Need to support "reject". */
1242 VLOG_INFO("reject is not a supported action");
1243 ovn_lflow_add(lflows, od, stage,
1244 acl->priority + OVN_ACL_PRI_OFFSET,
1245 acl->match, "drop;");
1246 }
1247 }
1248 }
1249
1250 static void
1251 build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
1252 struct hmap *lflows, struct hmap *mcgroups)
1253 {
1254 /* This flow table structure is documented in ovn-northd(8), so please
1255 * update ovn-northd.8.xml if you change anything. */
1256
1257 /* Build pre-ACL and ACL tables for both ingress and egress.
1258 * Ingress tables 1 and 2. Egress tables 0 and 1. */
1259 struct ovn_datapath *od;
1260 HMAP_FOR_EACH (od, key_node, datapaths) {
1261 if (!od->nbs) {
1262 continue;
1263 }
1264
1265 build_acls(od, lflows, ports);
1266 }
1267
1268 /* Logical switch ingress table 0: Admission control framework (priority
1269 * 100). */
1270 HMAP_FOR_EACH (od, key_node, datapaths) {
1271 if (!od->nbs) {
1272 continue;
1273 }
1274
1275 /* Logical VLANs not supported. */
1276 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC, 100, "vlan.present",
1277 "drop;");
1278
1279 /* Broadcast/multicast source address is invalid. */
1280 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC, 100, "eth.src[40]",
1281 "drop;");
1282
1283 /* Port security flows have priority 50 (see below) and will continue
1284 * to the next table if packet source is acceptable. */
1285 }
1286
1287 /* Logical switch ingress table 0: Ingress port security (priority 50). */
1288 struct ovn_port *op;
1289 HMAP_FOR_EACH (op, key_node, ports) {
1290 if (!op->nbs) {
1291 continue;
1292 }
1293
1294 if (!lport_is_enabled(op->nbs)) {
1295 /* Drop packets from disabled logical ports (since logical flow
1296 * tables are default-drop). */
1297 continue;
1298 }
1299
1300 struct ds match = DS_EMPTY_INITIALIZER;
1301 ds_put_format(&match, "inport == %s", op->json_key);
1302 build_port_security("eth.src",
1303 op->nbs->port_security, op->nbs->n_port_security,
1304 &match);
1305 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC, 50,
1306 ds_cstr(&match), "next;");
1307 ds_destroy(&match);
1308 }
1309
1310 /* Ingress table 3: ARP responder, skip requests coming from localnet ports.
1311 * (priority 100). */
1312 HMAP_FOR_EACH (op, key_node, ports) {
1313 if (!op->nbs) {
1314 continue;
1315 }
1316
1317 if (!strcmp(op->nbs->type, "localnet")) {
1318 char *match = xasprintf("inport == %s", op->json_key);
1319 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_RSP, 100,
1320 match, "next;");
1321 free(match);
1322 }
1323 }
1324
1325 /* Ingress table 3: ARP responder, reply for known IPs.
1326 * (priority 50). */
1327 HMAP_FOR_EACH (op, key_node, ports) {
1328 if (!op->nbs) {
1329 continue;
1330 }
1331
1332 /*
1333 * Add ARP reply flows if either the
1334 * - port is up or
1335 * - port type is router
1336 */
1337 if (!lport_is_up(op->nbs) && strcmp(op->nbs->type, "router")) {
1338 continue;
1339 }
1340
1341 for (size_t i = 0; i < op->nbs->n_addresses; i++) {
1342 struct lport_addresses laddrs;
1343 if (!extract_lport_addresses(op->nbs->addresses[i], &laddrs,
1344 false)) {
1345 continue;
1346 }
1347 for (size_t j = 0; j < laddrs.n_ipv4_addrs; j++) {
1348 char *match = xasprintf(
1349 "arp.tpa == "IP_FMT" && arp.op == 1",
1350 IP_ARGS(laddrs.ipv4_addrs[j].addr));
1351 char *actions = xasprintf(
1352 "eth.dst = eth.src; "
1353 "eth.src = "ETH_ADDR_FMT"; "
1354 "arp.op = 2; /* ARP reply */ "
1355 "arp.tha = arp.sha; "
1356 "arp.sha = "ETH_ADDR_FMT"; "
1357 "arp.tpa = arp.spa; "
1358 "arp.spa = "IP_FMT"; "
1359 "outport = inport; "
1360 "inport = \"\"; /* Allow sending out inport. */ "
1361 "output;",
1362 ETH_ADDR_ARGS(laddrs.ea),
1363 ETH_ADDR_ARGS(laddrs.ea),
1364 IP_ARGS(laddrs.ipv4_addrs[j].addr));
1365 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_RSP, 50,
1366 match, actions);
1367 free(match);
1368 free(actions);
1369 }
1370
1371 free(laddrs.ipv4_addrs);
1372 }
1373 }
1374
1375 /* Ingress table 3: ARP responder, by default goto next.
1376 * (priority 0)*/
1377 HMAP_FOR_EACH (od, key_node, datapaths) {
1378 if (!od->nbs) {
1379 continue;
1380 }
1381
1382 ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_RSP, 0, "1", "next;");
1383 }
1384
1385 /* Ingress table 4: Destination lookup, broadcast and multicast handling
1386 * (priority 100). */
1387 HMAP_FOR_EACH (op, key_node, ports) {
1388 if (!op->nbs) {
1389 continue;
1390 }
1391
1392 if (lport_is_enabled(op->nbs)) {
1393 ovn_multicast_add(mcgroups, &mc_flood, op);
1394 }
1395 }
1396 HMAP_FOR_EACH (od, key_node, datapaths) {
1397 if (!od->nbs) {
1398 continue;
1399 }
1400
1401 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 100, "eth.mcast",
1402 "outport = \""MC_FLOOD"\"; output;");
1403 }
1404
1405 /* Ingress table 4: Destination lookup, unicast handling (priority 50), */
1406 HMAP_FOR_EACH (op, key_node, ports) {
1407 if (!op->nbs) {
1408 continue;
1409 }
1410
1411 for (size_t i = 0; i < op->nbs->n_addresses; i++) {
1412 struct eth_addr mac;
1413
1414 if (eth_addr_from_string(op->nbs->addresses[i], &mac)) {
1415 struct ds match, actions;
1416
1417 ds_init(&match);
1418 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
1419 ETH_ADDR_ARGS(mac));
1420
1421 ds_init(&actions);
1422 ds_put_format(&actions, "outport = %s; output;", op->json_key);
1423 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
1424 ds_cstr(&match), ds_cstr(&actions));
1425 ds_destroy(&actions);
1426 ds_destroy(&match);
1427 } else if (!strcmp(op->nbs->addresses[i], "unknown")) {
1428 if (lport_is_enabled(op->nbs)) {
1429 ovn_multicast_add(mcgroups, &mc_unknown, op);
1430 op->od->has_unknown = true;
1431 }
1432 } else {
1433 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1434
1435 VLOG_INFO_RL(&rl,
1436 "%s: invalid syntax '%s' in addresses column",
1437 op->nbs->name, op->nbs->addresses[i]);
1438 }
1439 }
1440 }
1441
1442 /* Ingress table 4: Destination lookup for unknown MACs (priority 0). */
1443 HMAP_FOR_EACH (od, key_node, datapaths) {
1444 if (!od->nbs) {
1445 continue;
1446 }
1447
1448 if (od->has_unknown) {
1449 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1",
1450 "outport = \""MC_UNKNOWN"\"; output;");
1451 }
1452 }
1453
1454 /* Egress table 2: Egress port security multicast/broadcast (priority
1455 * 100). */
1456 HMAP_FOR_EACH (od, key_node, datapaths) {
1457 if (!od->nbs) {
1458 continue;
1459 }
1460
1461 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC, 100, "eth.mcast",
1462 "output;");
1463 }
1464
1465 /* Egress table 2: Egress port security (priorities 50 and 150).
1466 *
1467 * Priority 50 rules implement port security for enabled logical port.
1468 *
1469 * Priority 150 rules drop packets to disabled logical ports, so that they
1470 * don't even receive multicast or broadcast packets. */
1471 HMAP_FOR_EACH (op, key_node, ports) {
1472 if (!op->nbs) {
1473 continue;
1474 }
1475
1476 struct ds match = DS_EMPTY_INITIALIZER;
1477 ds_put_format(&match, "outport == %s", op->json_key);
1478 if (lport_is_enabled(op->nbs)) {
1479 build_port_security("eth.dst", op->nbs->port_security,
1480 op->nbs->n_port_security, &match);
1481 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC, 50,
1482 ds_cstr(&match), "output;");
1483 } else {
1484 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC, 150,
1485 ds_cstr(&match), "drop;");
1486 }
1487
1488 ds_destroy(&match);
1489 }
1490 }
1491
1492 static bool
1493 lrport_is_enabled(const struct nbrec_logical_router_port *lrport)
1494 {
1495 return !lrport->enabled || *lrport->enabled;
1496 }
1497
1498 static void
1499 add_route(struct hmap *lflows, const struct ovn_port *op,
1500 ovs_be32 network, ovs_be32 mask, ovs_be32 gateway)
1501 {
1502 char *match = xasprintf("ip4.dst == "IP_FMT"/"IP_FMT,
1503 IP_ARGS(network), IP_ARGS(mask));
1504
1505 struct ds actions = DS_EMPTY_INITIALIZER;
1506 ds_put_cstr(&actions, "ip.ttl--; reg0 = ");
1507 if (gateway) {
1508 ds_put_format(&actions, IP_FMT, IP_ARGS(gateway));
1509 } else {
1510 ds_put_cstr(&actions, "ip4.dst");
1511 }
1512 ds_put_format(&actions,
1513 "; "
1514 "reg1 = "IP_FMT"; "
1515 "eth.src = "ETH_ADDR_FMT"; "
1516 "outport = %s; "
1517 "next;",
1518 IP_ARGS(op->ip), ETH_ADDR_ARGS(op->mac), op->json_key);
1519
1520 /* The priority here is calculated to implement longest-prefix-match
1521 * routing. */
1522 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_ROUTING,
1523 count_1bits(ntohl(mask)), match, ds_cstr(&actions));
1524 ds_destroy(&actions);
1525 free(match);
1526 }
1527
1528 static void
1529 build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
1530 struct hmap *lflows)
1531 {
1532 /* This flow table structure is documented in ovn-northd(8), so please
1533 * update ovn-northd.8.xml if you change anything. */
1534
1535 /* Logical router ingress table 0: Admission control framework. */
1536 struct ovn_datapath *od;
1537 HMAP_FOR_EACH (od, key_node, datapaths) {
1538 if (!od->nbr) {
1539 continue;
1540 }
1541
1542 /* Logical VLANs not supported.
1543 * Broadcast/multicast source address is invalid. */
1544 ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100,
1545 "vlan.present || eth.src[40]", "drop;");
1546 }
1547
1548 /* Logical router ingress table 0: match (priority 50). */
1549 struct ovn_port *op;
1550 HMAP_FOR_EACH (op, key_node, ports) {
1551 if (!op->nbr) {
1552 continue;
1553 }
1554
1555 if (!lrport_is_enabled(op->nbr)) {
1556 /* Drop packets from disabled logical ports (since logical flow
1557 * tables are default-drop). */
1558 continue;
1559 }
1560
1561 char *match = xasprintf(
1562 "(eth.mcast || eth.dst == "ETH_ADDR_FMT") && inport == %s",
1563 ETH_ADDR_ARGS(op->mac), op->json_key);
1564 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
1565 match, "next;");
1566 free(match);
1567 }
1568
1569 /* Logical router ingress table 1: IP Input. */
1570 HMAP_FOR_EACH (od, key_node, datapaths) {
1571 if (!od->nbr) {
1572 continue;
1573 }
1574
1575 /* L3 admission control: drop multicast and broadcast source, localhost
1576 * source or destination, and zero network source or destination
1577 * (priority 100). */
1578 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100,
1579 "ip4.mcast || "
1580 "ip4.src == 255.255.255.255 || "
1581 "ip4.src == 127.0.0.0/8 || "
1582 "ip4.dst == 127.0.0.0/8 || "
1583 "ip4.src == 0.0.0.0/8 || "
1584 "ip4.dst == 0.0.0.0/8",
1585 "drop;");
1586
1587 /* ARP reply handling. Use ARP replies to populate the logical
1588 * router's ARP table. */
1589 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "arp.op == 2",
1590 "put_arp(inport, arp.spa, arp.sha);");
1591
1592 /* Drop Ethernet local broadcast. By definition this traffic should
1593 * not be forwarded.*/
1594 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
1595 "eth.bcast", "drop;");
1596
1597 /* Drop IP multicast. */
1598 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
1599 "ip4.mcast", "drop;");
1600
1601 /* TTL discard.
1602 *
1603 * XXX Need to send ICMP time exceeded if !ip.later_frag. */
1604 char *match = xasprintf("ip4 && ip.ttl == {0, 1}");
1605 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30, match, "drop;");
1606 free(match);
1607
1608 /* Pass other traffic not already handled to the next table for
1609 * routing. */
1610 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;");
1611 }
1612
1613 HMAP_FOR_EACH (op, key_node, ports) {
1614 if (!op->nbr) {
1615 continue;
1616 }
1617
1618 /* L3 admission control: drop packets that originate from an IP address
1619 * owned by the router or a broadcast address known to the router
1620 * (priority 100). */
1621 char *match = xasprintf("ip4.src == {"IP_FMT", "IP_FMT"}",
1622 IP_ARGS(op->ip), IP_ARGS(op->bcast));
1623 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
1624 match, "drop;");
1625 free(match);
1626
1627 /* ICMP echo reply. These flows reply to ICMP echo requests
1628 * received for the router's IP address. */
1629 match = xasprintf(
1630 "inport == %s && (ip4.dst == "IP_FMT" || ip4.dst == "IP_FMT") && "
1631 "icmp4.type == 8 && icmp4.code == 0",
1632 op->json_key, IP_ARGS(op->ip), IP_ARGS(op->bcast));
1633 char *actions = xasprintf(
1634 "ip4.dst = ip4.src; "
1635 "ip4.src = "IP_FMT"; "
1636 "ip.ttl = 255; "
1637 "icmp4.type = 0; "
1638 "inport = \"\"; /* Allow sending out inport. */ "
1639 "next; ",
1640 IP_ARGS(op->ip));
1641 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
1642 match, actions);
1643 free(match);
1644 free(actions);
1645
1646 /* ARP reply. These flows reply to ARP requests for the router's own
1647 * IP address. */
1648 match = xasprintf(
1649 "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1",
1650 op->json_key, IP_ARGS(op->ip));
1651 actions = xasprintf(
1652 "eth.dst = eth.src; "
1653 "eth.src = "ETH_ADDR_FMT"; "
1654 "arp.op = 2; /* ARP reply */ "
1655 "arp.tha = arp.sha; "
1656 "arp.sha = "ETH_ADDR_FMT"; "
1657 "arp.tpa = arp.spa; "
1658 "arp.spa = "IP_FMT"; "
1659 "outport = %s; "
1660 "inport = \"\"; /* Allow sending out inport. */ "
1661 "output;",
1662 ETH_ADDR_ARGS(op->mac),
1663 ETH_ADDR_ARGS(op->mac),
1664 IP_ARGS(op->ip),
1665 op->json_key);
1666 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
1667 match, actions);
1668 free(match);
1669 free(actions);
1670
1671 /* Drop IP traffic to this router. */
1672 match = xasprintf("ip4.dst == "IP_FMT, IP_ARGS(op->ip));
1673 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
1674 match, "drop;");
1675 free(match);
1676 }
1677
1678 /* Logical router ingress table 2: IP Routing.
1679 *
1680 * A packet that arrives at this table is an IP packet that should be
1681 * routed to the address in ip4.dst. This table sets outport to the correct
1682 * output port, eth.src to the output port's MAC address, and reg0 to the
1683 * next-hop IP address (leaving ip4.dst, the packet’s final destination,
1684 * unchanged), and advances to the next table for ARP resolution. */
1685 HMAP_FOR_EACH (op, key_node, ports) {
1686 if (!op->nbr) {
1687 continue;
1688 }
1689
1690 add_route(lflows, op, op->network, op->mask, 0);
1691 }
1692 HMAP_FOR_EACH (od, key_node, datapaths) {
1693 if (!od->nbr) {
1694 continue;
1695 }
1696
1697 if (od->gateway && od->gateway_port) {
1698 add_route(lflows, od->gateway_port, 0, 0, od->gateway);
1699 }
1700 }
1701 /* XXX destination unreachable */
1702
1703 /* Local router ingress table 3: ARP Resolution.
1704 *
1705 * Any packet that reaches this table is an IP packet whose next-hop IP
1706 * address is in reg0. (ip4.dst is the final destination.) This table
1707 * resolves the IP address in reg0 into an output port in outport and an
1708 * Ethernet address in eth.dst. */
1709 HMAP_FOR_EACH (op, key_node, ports) {
1710 if (op->nbr) {
1711 /* XXX ARP for neighboring router */
1712 } else if (op->od->n_router_ports) {
1713 for (size_t i = 0; i < op->nbs->n_addresses; i++) {
1714 struct lport_addresses laddrs;
1715 if (!extract_lport_addresses(op->nbs->addresses[i], &laddrs,
1716 false)) {
1717 continue;
1718 }
1719
1720 for (size_t k = 0; k < laddrs.n_ipv4_addrs; k++) {
1721 ovs_be32 ip = laddrs.ipv4_addrs[k].addr;
1722 for (size_t j = 0; j < op->od->n_router_ports; j++) {
1723 /* Get the Logical_Router_Port that the Logical_Port is
1724 * connected to, as 'peer'. */
1725 const char *peer_name = smap_get(
1726 &op->od->router_ports[j]->nbs->options,
1727 "router-port");
1728 if (!peer_name) {
1729 continue;
1730 }
1731
1732 struct ovn_port *peer
1733 = ovn_port_find(ports, peer_name);
1734 if (!peer || !peer->nbr) {
1735 continue;
1736 }
1737
1738 /* Make sure that 'ip' is in 'peer''s network. */
1739 if ((ip ^ peer->network) & peer->mask) {
1740 continue;
1741 }
1742
1743 char *match = xasprintf(
1744 "outport == %s && reg0 == "IP_FMT,
1745 peer->json_key, IP_ARGS(ip));
1746 char *actions = xasprintf("eth.dst = "ETH_ADDR_FMT"; "
1747 "next;",
1748 ETH_ADDR_ARGS(laddrs.ea));
1749 ovn_lflow_add(lflows, peer->od,
1750 S_ROUTER_IN_ARP_RESOLVE,
1751 100, match, actions);
1752 free(actions);
1753 free(match);
1754 break;
1755 }
1756 }
1757
1758 free(laddrs.ipv4_addrs);
1759 }
1760 }
1761 }
1762 HMAP_FOR_EACH (od, key_node, datapaths) {
1763 if (!od->nbr) {
1764 continue;
1765 }
1766
1767 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "1",
1768 "get_arp(outport, reg0); next;");
1769 }
1770
1771 /* Local router ingress table 4: ARP request.
1772 *
1773 * In the common case where the Ethernet destination has been resolved,
1774 * this table outputs the packet (priority 100). Otherwise, it composes
1775 * and sends an ARP request (priority 0). */
1776 HMAP_FOR_EACH (od, key_node, datapaths) {
1777 if (!od->nbr) {
1778 continue;
1779 }
1780
1781 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100,
1782 "eth.dst == 00:00:00:00:00:00",
1783 "arp { "
1784 "eth.dst = ff:ff:ff:ff:ff:ff; "
1785 "arp.spa = reg1; "
1786 "arp.op = 1; " /* ARP request */
1787 "output; "
1788 "};");
1789 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1", "output;");
1790 }
1791
1792 /* Logical router egress table 0: Delivery (priority 100).
1793 *
1794 * Priority 100 rules deliver packets to enabled logical ports. */
1795 HMAP_FOR_EACH (op, key_node, ports) {
1796 if (!op->nbr) {
1797 continue;
1798 }
1799
1800 if (!lrport_is_enabled(op->nbr)) {
1801 /* Drop packets to disabled logical ports (since logical flow
1802 * tables are default-drop). */
1803 continue;
1804 }
1805
1806 char *match = xasprintf("outport == %s", op->json_key);
1807 ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100,
1808 match, "output;");
1809 free(match);
1810 }
1811 }
1812
1813 /* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
1814 * constructing their contents based on the OVN_NB database. */
1815 static void
1816 build_lflows(struct northd_context *ctx, struct hmap *datapaths,
1817 struct hmap *ports)
1818 {
1819 struct hmap lflows = HMAP_INITIALIZER(&lflows);
1820 struct hmap mcgroups = HMAP_INITIALIZER(&mcgroups);
1821
1822 build_lswitch_flows(datapaths, ports, &lflows, &mcgroups);
1823 build_lrouter_flows(datapaths, ports, &lflows);
1824
1825 /* Push changes to the Logical_Flow table to database. */
1826 const struct sbrec_logical_flow *sbflow, *next_sbflow;
1827 SBREC_LOGICAL_FLOW_FOR_EACH_SAFE (sbflow, next_sbflow, ctx->ovnsb_idl) {
1828 struct ovn_datapath *od
1829 = ovn_datapath_from_sbrec(datapaths, sbflow->logical_datapath);
1830 if (!od) {
1831 sbrec_logical_flow_delete(sbflow);
1832 continue;
1833 }
1834
1835 enum ovn_datapath_type dp_type = od->nbs ? DP_SWITCH : DP_ROUTER;
1836 enum ovn_pipeline pipeline
1837 = !strcmp(sbflow->pipeline, "ingress") ? P_IN : P_OUT;
1838 struct ovn_lflow *lflow = ovn_lflow_find(
1839 &lflows, od, ovn_stage_build(dp_type, pipeline, sbflow->table_id),
1840 sbflow->priority, sbflow->match, sbflow->actions);
1841 if (lflow) {
1842 ovn_lflow_destroy(&lflows, lflow);
1843 } else {
1844 sbrec_logical_flow_delete(sbflow);
1845 }
1846 }
1847 struct ovn_lflow *lflow, *next_lflow;
1848 HMAP_FOR_EACH_SAFE (lflow, next_lflow, hmap_node, &lflows) {
1849 enum ovn_pipeline pipeline = ovn_stage_get_pipeline(lflow->stage);
1850 uint8_t table = ovn_stage_get_table(lflow->stage);
1851
1852 sbflow = sbrec_logical_flow_insert(ctx->ovnsb_txn);
1853 sbrec_logical_flow_set_logical_datapath(sbflow, lflow->od->sb);
1854 sbrec_logical_flow_set_pipeline(
1855 sbflow, pipeline == P_IN ? "ingress" : "egress");
1856 sbrec_logical_flow_set_table_id(sbflow, table);
1857 sbrec_logical_flow_set_priority(sbflow, lflow->priority);
1858 sbrec_logical_flow_set_match(sbflow, lflow->match);
1859 sbrec_logical_flow_set_actions(sbflow, lflow->actions);
1860
1861 const struct smap ids = SMAP_CONST1(&ids, "stage-name",
1862 ovn_stage_to_str(lflow->stage));
1863 sbrec_logical_flow_set_external_ids(sbflow, &ids);
1864
1865 ovn_lflow_destroy(&lflows, lflow);
1866 }
1867 hmap_destroy(&lflows);
1868
1869 /* Push changes to the Multicast_Group table to database. */
1870 const struct sbrec_multicast_group *sbmc, *next_sbmc;
1871 SBREC_MULTICAST_GROUP_FOR_EACH_SAFE (sbmc, next_sbmc, ctx->ovnsb_idl) {
1872 struct ovn_datapath *od = ovn_datapath_from_sbrec(datapaths,
1873 sbmc->datapath);
1874 if (!od) {
1875 sbrec_multicast_group_delete(sbmc);
1876 continue;
1877 }
1878
1879 struct multicast_group group = { .name = sbmc->name,
1880 .key = sbmc->tunnel_key };
1881 struct ovn_multicast *mc = ovn_multicast_find(&mcgroups, od, &group);
1882 if (mc) {
1883 ovn_multicast_update_sbrec(mc, sbmc);
1884 ovn_multicast_destroy(&mcgroups, mc);
1885 } else {
1886 sbrec_multicast_group_delete(sbmc);
1887 }
1888 }
1889 struct ovn_multicast *mc, *next_mc;
1890 HMAP_FOR_EACH_SAFE (mc, next_mc, hmap_node, &mcgroups) {
1891 sbmc = sbrec_multicast_group_insert(ctx->ovnsb_txn);
1892 sbrec_multicast_group_set_datapath(sbmc, mc->datapath->sb);
1893 sbrec_multicast_group_set_name(sbmc, mc->group->name);
1894 sbrec_multicast_group_set_tunnel_key(sbmc, mc->group->key);
1895 ovn_multicast_update_sbrec(mc, sbmc);
1896 ovn_multicast_destroy(&mcgroups, mc);
1897 }
1898 hmap_destroy(&mcgroups);
1899 }
1900 \f
1901 static void
1902 ovnnb_db_run(struct northd_context *ctx)
1903 {
1904 if (!ctx->ovnsb_txn) {
1905 return;
1906 }
1907 VLOG_DBG("ovn-nb db contents may have changed.");
1908 struct hmap datapaths, ports;
1909 build_datapaths(ctx, &datapaths);
1910 build_ports(ctx, &datapaths, &ports);
1911 build_lflows(ctx, &datapaths, &ports);
1912
1913 struct ovn_datapath *dp, *next_dp;
1914 HMAP_FOR_EACH_SAFE (dp, next_dp, key_node, &datapaths) {
1915 ovn_datapath_destroy(&datapaths, dp);
1916 }
1917 hmap_destroy(&datapaths);
1918
1919 struct ovn_port *port, *next_port;
1920 HMAP_FOR_EACH_SAFE (port, next_port, key_node, &ports) {
1921 ovn_port_destroy(&ports, port);
1922 }
1923 hmap_destroy(&ports);
1924 }
1925
1926 /*
1927 * The only change we get notified about is if the 'chassis' column of the
1928 * 'Port_Binding' table changes. When this column is not empty, it means we
1929 * need to set the corresponding logical port as 'up' in the northbound DB.
1930 */
1931 static void
1932 ovnsb_db_run(struct northd_context *ctx)
1933 {
1934 if (!ctx->ovnnb_txn) {
1935 return;
1936 }
1937 struct hmap lports_hmap;
1938 const struct sbrec_port_binding *sb;
1939 const struct nbrec_logical_port *nb;
1940
1941 struct lport_hash_node {
1942 struct hmap_node node;
1943 const struct nbrec_logical_port *nb;
1944 } *hash_node, *hash_node_next;
1945
1946 VLOG_DBG("Recalculating port up states for ovn-nb db.");
1947
1948 hmap_init(&lports_hmap);
1949
1950 NBREC_LOGICAL_PORT_FOR_EACH(nb, ctx->ovnnb_idl) {
1951 hash_node = xzalloc(sizeof *hash_node);
1952 hash_node->nb = nb;
1953 hmap_insert(&lports_hmap, &hash_node->node, hash_string(nb->name, 0));
1954 }
1955
1956 SBREC_PORT_BINDING_FOR_EACH(sb, ctx->ovnsb_idl) {
1957 nb = NULL;
1958 HMAP_FOR_EACH_WITH_HASH(hash_node, node,
1959 hash_string(sb->logical_port, 0),
1960 &lports_hmap) {
1961 if (!strcmp(sb->logical_port, hash_node->nb->name)) {
1962 nb = hash_node->nb;
1963 break;
1964 }
1965 }
1966
1967 if (!nb) {
1968 /* The logical port doesn't exist for this port binding. This can
1969 * happen under normal circumstances when ovn-northd hasn't gotten
1970 * around to pruning the Port_Binding yet. */
1971 continue;
1972 }
1973
1974 if (sb->chassis && (!nb->up || !*nb->up)) {
1975 bool up = true;
1976 nbrec_logical_port_set_up(nb, &up, 1);
1977 } else if (!sb->chassis && (!nb->up || *nb->up)) {
1978 bool up = false;
1979 nbrec_logical_port_set_up(nb, &up, 1);
1980 }
1981 }
1982
1983 HMAP_FOR_EACH_SAFE(hash_node, hash_node_next, node, &lports_hmap) {
1984 hmap_remove(&lports_hmap, &hash_node->node);
1985 free(hash_node);
1986 }
1987 hmap_destroy(&lports_hmap);
1988 }
1989 \f
1990
1991 static char *default_db_;
1992
1993 static const char *
1994 default_db(void)
1995 {
1996 if (!default_db_) {
1997 default_db_ = xasprintf("unix:%s/db.sock", ovs_rundir());
1998 }
1999 return default_db_;
2000 }
2001
2002 static void
2003 parse_options(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
2004 {
2005 enum {
2006 DAEMON_OPTION_ENUMS,
2007 VLOG_OPTION_ENUMS,
2008 };
2009 static const struct option long_options[] = {
2010 {"ovnsb-db", required_argument, NULL, 'd'},
2011 {"ovnnb-db", required_argument, NULL, 'D'},
2012 {"help", no_argument, NULL, 'h'},
2013 {"options", no_argument, NULL, 'o'},
2014 {"version", no_argument, NULL, 'V'},
2015 DAEMON_LONG_OPTIONS,
2016 VLOG_LONG_OPTIONS,
2017 STREAM_SSL_LONG_OPTIONS,
2018 {NULL, 0, NULL, 0},
2019 };
2020 char *short_options = ovs_cmdl_long_options_to_short_options(long_options);
2021
2022 for (;;) {
2023 int c;
2024
2025 c = getopt_long(argc, argv, short_options, long_options, NULL);
2026 if (c == -1) {
2027 break;
2028 }
2029
2030 switch (c) {
2031 DAEMON_OPTION_HANDLERS;
2032 VLOG_OPTION_HANDLERS;
2033 STREAM_SSL_OPTION_HANDLERS;
2034
2035 case 'd':
2036 ovnsb_db = optarg;
2037 break;
2038
2039 case 'D':
2040 ovnnb_db = optarg;
2041 break;
2042
2043 case 'h':
2044 usage();
2045 exit(EXIT_SUCCESS);
2046
2047 case 'o':
2048 ovs_cmdl_print_options(long_options);
2049 exit(EXIT_SUCCESS);
2050
2051 case 'V':
2052 ovs_print_version(0, 0);
2053 exit(EXIT_SUCCESS);
2054
2055 default:
2056 break;
2057 }
2058 }
2059
2060 if (!ovnsb_db) {
2061 ovnsb_db = default_db();
2062 }
2063
2064 if (!ovnnb_db) {
2065 ovnnb_db = default_db();
2066 }
2067
2068 free(short_options);
2069 }
2070
2071 static void
2072 add_column_noalert(struct ovsdb_idl *idl,
2073 const struct ovsdb_idl_column *column)
2074 {
2075 ovsdb_idl_add_column(idl, column);
2076 ovsdb_idl_omit_alert(idl, column);
2077 }
2078
2079 int
2080 main(int argc, char *argv[])
2081 {
2082 int res = EXIT_SUCCESS;
2083 struct unixctl_server *unixctl;
2084 int retval;
2085 bool exiting;
2086
2087 fatal_ignore_sigpipe();
2088 set_program_name(argv[0]);
2089 service_start(&argc, &argv);
2090 parse_options(argc, argv);
2091
2092 daemonize_start(false);
2093
2094 retval = unixctl_server_create(NULL, &unixctl);
2095 if (retval) {
2096 exit(EXIT_FAILURE);
2097 }
2098 unixctl_command_register("exit", "", 0, 0, ovn_northd_exit, &exiting);
2099
2100 daemonize_complete();
2101
2102 nbrec_init();
2103 sbrec_init();
2104
2105 /* We want to detect all changes to the ovn-nb db. */
2106 struct ovsdb_idl_loop ovnnb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
2107 ovsdb_idl_create(ovnnb_db, &nbrec_idl_class, true, true));
2108
2109 struct ovsdb_idl_loop ovnsb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
2110 ovsdb_idl_create(ovnsb_db, &sbrec_idl_class, false, true));
2111
2112 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_logical_flow);
2113 add_column_noalert(ovnsb_idl_loop.idl,
2114 &sbrec_logical_flow_col_logical_datapath);
2115 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_pipeline);
2116 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_table_id);
2117 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_priority);
2118 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_match);
2119 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_actions);
2120
2121 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_multicast_group);
2122 add_column_noalert(ovnsb_idl_loop.idl,
2123 &sbrec_multicast_group_col_datapath);
2124 add_column_noalert(ovnsb_idl_loop.idl,
2125 &sbrec_multicast_group_col_tunnel_key);
2126 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_name);
2127 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_ports);
2128
2129 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_datapath_binding);
2130 add_column_noalert(ovnsb_idl_loop.idl,
2131 &sbrec_datapath_binding_col_tunnel_key);
2132 add_column_noalert(ovnsb_idl_loop.idl,
2133 &sbrec_datapath_binding_col_external_ids);
2134
2135 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_port_binding);
2136 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_datapath);
2137 add_column_noalert(ovnsb_idl_loop.idl,
2138 &sbrec_port_binding_col_logical_port);
2139 add_column_noalert(ovnsb_idl_loop.idl,
2140 &sbrec_port_binding_col_tunnel_key);
2141 add_column_noalert(ovnsb_idl_loop.idl,
2142 &sbrec_port_binding_col_parent_port);
2143 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_tag);
2144 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_type);
2145 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_options);
2146 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_mac);
2147 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_port_binding_col_chassis);
2148
2149 /* Main loop. */
2150 exiting = false;
2151 while (!exiting) {
2152 struct northd_context ctx = {
2153 .ovnnb_idl = ovnnb_idl_loop.idl,
2154 .ovnnb_txn = ovsdb_idl_loop_run(&ovnnb_idl_loop),
2155 .ovnsb_idl = ovnsb_idl_loop.idl,
2156 .ovnsb_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop),
2157 };
2158
2159 ovnnb_db_run(&ctx);
2160 ovnsb_db_run(&ctx);
2161
2162 unixctl_server_run(unixctl);
2163 unixctl_server_wait(unixctl);
2164 if (exiting) {
2165 poll_immediate_wake();
2166 }
2167 ovsdb_idl_loop_commit_and_wait(&ovnnb_idl_loop);
2168 ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);
2169
2170 poll_block();
2171 if (should_service_stop()) {
2172 exiting = true;
2173 }
2174 }
2175
2176 unixctl_server_destroy(unixctl);
2177 ovsdb_idl_loop_destroy(&ovnnb_idl_loop);
2178 ovsdb_idl_loop_destroy(&ovnsb_idl_loop);
2179 service_stop();
2180
2181 free(default_db_);
2182 exit(res);
2183 }
2184
2185 static void
2186 ovn_northd_exit(struct unixctl_conn *conn, int argc OVS_UNUSED,
2187 const char *argv[] OVS_UNUSED, void *exiting_)
2188 {
2189 bool *exiting = exiting_;
2190 *exiting = true;
2191
2192 unixctl_command_reply(conn, NULL);
2193 }