]> git.proxmox.com Git - mirror_ovs.git/blob - ovn/northd/ovn-northd.c
ovn: Add 'na' action and lflow for ND
[mirror_ovs.git] / ovn / northd / ovn-northd.c
1 /*
2 * Licensed under the Apache License, Version 2.0 (the "License");
3 * you may not use this file except in compliance with the License.
4 * You may obtain a copy of the License at:
5 *
6 * http://www.apache.org/licenses/LICENSE-2.0
7 *
8 * Unless required by applicable law or agreed to in writing, software
9 * distributed under the License is distributed on an "AS IS" BASIS,
10 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 * See the License for the specific language governing permissions and
12 * limitations under the License.
13 */
14
15 #include <config.h>
16
17 #include <getopt.h>
18 #include <stdlib.h>
19 #include <stdio.h>
20
21 #include "command-line.h"
22 #include "daemon.h"
23 #include "dirs.h"
24 #include "openvswitch/dynamic-string.h"
25 #include "fatal-signal.h"
26 #include "hash.h"
27 #include "hmap.h"
28 #include "json.h"
29 #include "ovn/lib/lex.h"
30 #include "ovn/lib/ovn-nb-idl.h"
31 #include "ovn/lib/ovn-sb-idl.h"
32 #include "ovn/lib/ovn-util.h"
33 #include "packets.h"
34 #include "poll-loop.h"
35 #include "smap.h"
36 #include "stream.h"
37 #include "stream-ssl.h"
38 #include "unixctl.h"
39 #include "util.h"
40 #include "uuid.h"
41 #include "openvswitch/vlog.h"
42
43 VLOG_DEFINE_THIS_MODULE(ovn_northd);
44
45 static unixctl_cb_func ovn_northd_exit;
46
47 struct northd_context {
48 struct ovsdb_idl *ovnnb_idl;
49 struct ovsdb_idl *ovnsb_idl;
50 struct ovsdb_idl_txn *ovnnb_txn;
51 struct ovsdb_idl_txn *ovnsb_txn;
52 };
53
54 static const char *ovnnb_db;
55 static const char *ovnsb_db;
56
57 static const char *default_nb_db(void);
58 static const char *default_sb_db(void);
59 \f
60 /* Pipeline stages. */
61
62 /* The two pipelines in an OVN logical flow table. */
63 enum ovn_pipeline {
64 P_IN, /* Ingress pipeline. */
65 P_OUT /* Egress pipeline. */
66 };
67
68 /* The two purposes for which ovn-northd uses OVN logical datapaths. */
69 enum ovn_datapath_type {
70 DP_SWITCH, /* OVN logical switch. */
71 DP_ROUTER /* OVN logical router. */
72 };
73
74 /* Returns an "enum ovn_stage" built from the arguments.
75 *
76 * (It's better to use ovn_stage_build() for type-safety reasons, but inline
77 * functions can't be used in enums or switch cases.) */
78 #define OVN_STAGE_BUILD(DP_TYPE, PIPELINE, TABLE) \
79 (((DP_TYPE) << 9) | ((PIPELINE) << 8) | (TABLE))
80
81 /* A stage within an OVN logical switch or router.
82 *
83 * An "enum ovn_stage" indicates whether the stage is part of a logical switch
84 * or router, whether the stage is part of the ingress or egress pipeline, and
85 * the table within that pipeline. The first three components are combined to
86 * form the stage's full name, e.g. S_SWITCH_IN_PORT_SEC_L2,
87 * S_ROUTER_OUT_DELIVERY. */
88 enum ovn_stage {
89 #define PIPELINE_STAGES \
90 /* Logical switch ingress stages. */ \
91 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_L2, 0, "ls_in_port_sec_l2") \
92 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_IP, 1, "ls_in_port_sec_ip") \
93 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_ND, 2, "ls_in_port_sec_nd") \
94 PIPELINE_STAGE(SWITCH, IN, PRE_ACL, 3, "ls_in_pre_acl") \
95 PIPELINE_STAGE(SWITCH, IN, ACL, 4, "ls_in_acl") \
96 PIPELINE_STAGE(SWITCH, IN, ARP_ND_RSP, 5, "ls_in_arp_nd_rsp") \
97 PIPELINE_STAGE(SWITCH, IN, L2_LKUP, 6, "ls_in_l2_lkup") \
98 \
99 /* Logical switch egress stages. */ \
100 PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 0, "ls_out_pre_acl") \
101 PIPELINE_STAGE(SWITCH, OUT, ACL, 1, "ls_out_acl") \
102 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_IP, 2, "ls_out_port_sec_ip") \
103 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_L2, 3, "ls_out_port_sec_l2") \
104 \
105 /* Logical router ingress stages. */ \
106 PIPELINE_STAGE(ROUTER, IN, ADMISSION, 0, "lr_in_admission") \
107 PIPELINE_STAGE(ROUTER, IN, IP_INPUT, 1, "lr_in_ip_input") \
108 PIPELINE_STAGE(ROUTER, IN, UNSNAT, 2, "lr_in_unsnat") \
109 PIPELINE_STAGE(ROUTER, IN, DNAT, 3, "lr_in_dnat") \
110 PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 4, "lr_in_ip_routing") \
111 PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 5, "lr_in_arp_resolve") \
112 PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 6, "lr_in_arp_request") \
113 \
114 /* Logical router egress stages. */ \
115 PIPELINE_STAGE(ROUTER, OUT, SNAT, 0, "lr_out_snat") \
116 PIPELINE_STAGE(ROUTER, OUT, DELIVERY, 1, "lr_out_delivery")
117
118 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
119 S_##DP_TYPE##_##PIPELINE##_##STAGE \
120 = OVN_STAGE_BUILD(DP_##DP_TYPE, P_##PIPELINE, TABLE),
121 PIPELINE_STAGES
122 #undef PIPELINE_STAGE
123 };
124
125 /* Due to various hard-coded priorities need to implement ACLs, the
126 * northbound database supports a smaller range of ACL priorities than
127 * are available to logical flows. This value is added to an ACL
128 * priority to determine the ACL's logical flow priority. */
129 #define OVN_ACL_PRI_OFFSET 1000
130
131 /* Returns an "enum ovn_stage" built from the arguments. */
132 static enum ovn_stage
133 ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline,
134 uint8_t table)
135 {
136 return OVN_STAGE_BUILD(dp_type, pipeline, table);
137 }
138
139 /* Returns the pipeline to which 'stage' belongs. */
140 static enum ovn_pipeline
141 ovn_stage_get_pipeline(enum ovn_stage stage)
142 {
143 return (stage >> 8) & 1;
144 }
145
146 /* Returns the table to which 'stage' belongs. */
147 static uint8_t
148 ovn_stage_get_table(enum ovn_stage stage)
149 {
150 return stage & 0xff;
151 }
152
153 /* Returns a string name for 'stage'. */
154 static const char *
155 ovn_stage_to_str(enum ovn_stage stage)
156 {
157 switch (stage) {
158 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
159 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return NAME;
160 PIPELINE_STAGES
161 #undef PIPELINE_STAGE
162 default: return "<unknown>";
163 }
164 }
165 \f
166 static void
167 usage(void)
168 {
169 printf("\
170 %s: OVN northbound management daemon\n\
171 usage: %s [OPTIONS]\n\
172 \n\
173 Options:\n\
174 --ovnnb-db=DATABASE connect to ovn-nb database at DATABASE\n\
175 (default: %s)\n\
176 --ovnsb-db=DATABASE connect to ovn-sb database at DATABASE\n\
177 (default: %s)\n\
178 -h, --help display this help message\n\
179 -o, --options list available options\n\
180 -V, --version display version information\n\
181 ", program_name, program_name, default_nb_db(), default_sb_db());
182 daemon_usage();
183 vlog_usage();
184 stream_usage("database", true, true, false);
185 }
186 \f
187 struct tnlid_node {
188 struct hmap_node hmap_node;
189 uint32_t tnlid;
190 };
191
192 static void
193 destroy_tnlids(struct hmap *tnlids)
194 {
195 struct tnlid_node *node;
196 HMAP_FOR_EACH_POP (node, hmap_node, tnlids) {
197 free(node);
198 }
199 hmap_destroy(tnlids);
200 }
201
202 static void
203 add_tnlid(struct hmap *set, uint32_t tnlid)
204 {
205 struct tnlid_node *node = xmalloc(sizeof *node);
206 hmap_insert(set, &node->hmap_node, hash_int(tnlid, 0));
207 node->tnlid = tnlid;
208 }
209
210 static bool
211 tnlid_in_use(const struct hmap *set, uint32_t tnlid)
212 {
213 const struct tnlid_node *node;
214 HMAP_FOR_EACH_IN_BUCKET (node, hmap_node, hash_int(tnlid, 0), set) {
215 if (node->tnlid == tnlid) {
216 return true;
217 }
218 }
219 return false;
220 }
221
222 static uint32_t
223 allocate_tnlid(struct hmap *set, const char *name, uint32_t max,
224 uint32_t *hint)
225 {
226 for (uint32_t tnlid = *hint + 1; tnlid != *hint;
227 tnlid = tnlid + 1 <= max ? tnlid + 1 : 1) {
228 if (!tnlid_in_use(set, tnlid)) {
229 add_tnlid(set, tnlid);
230 *hint = tnlid;
231 return tnlid;
232 }
233 }
234
235 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
236 VLOG_WARN_RL(&rl, "all %s tunnel ids exhausted", name);
237 return 0;
238 }
239 \f
240 /* The 'key' comes from nbs->header_.uuid or nbr->header_.uuid or
241 * sb->external_ids:logical-switch. */
242 struct ovn_datapath {
243 struct hmap_node key_node; /* Index on 'key'. */
244 struct uuid key; /* (nbs/nbr)->header_.uuid. */
245
246 const struct nbrec_logical_switch *nbs; /* May be NULL. */
247 const struct nbrec_logical_router *nbr; /* May be NULL. */
248 const struct sbrec_datapath_binding *sb; /* May be NULL. */
249
250 struct ovs_list list; /* In list of similar records. */
251
252 /* Logical router data (digested from nbr). */
253 const struct ovn_port *gateway_port;
254 ovs_be32 gateway;
255
256 /* Logical switch data. */
257 struct ovn_port **router_ports;
258 size_t n_router_ports;
259
260 struct hmap port_tnlids;
261 uint32_t port_key_hint;
262
263 bool has_unknown;
264 };
265
266 static struct ovn_datapath *
267 ovn_datapath_create(struct hmap *datapaths, const struct uuid *key,
268 const struct nbrec_logical_switch *nbs,
269 const struct nbrec_logical_router *nbr,
270 const struct sbrec_datapath_binding *sb)
271 {
272 struct ovn_datapath *od = xzalloc(sizeof *od);
273 od->key = *key;
274 od->sb = sb;
275 od->nbs = nbs;
276 od->nbr = nbr;
277 hmap_init(&od->port_tnlids);
278 od->port_key_hint = 0;
279 hmap_insert(datapaths, &od->key_node, uuid_hash(&od->key));
280 return od;
281 }
282
283 static void
284 ovn_datapath_destroy(struct hmap *datapaths, struct ovn_datapath *od)
285 {
286 if (od) {
287 /* Don't remove od->list. It is used within build_datapaths() as a
288 * private list and once we've exited that function it is not safe to
289 * use it. */
290 hmap_remove(datapaths, &od->key_node);
291 destroy_tnlids(&od->port_tnlids);
292 free(od->router_ports);
293 free(od);
294 }
295 }
296
297 static struct ovn_datapath *
298 ovn_datapath_find(struct hmap *datapaths, const struct uuid *uuid)
299 {
300 struct ovn_datapath *od;
301
302 HMAP_FOR_EACH_WITH_HASH (od, key_node, uuid_hash(uuid), datapaths) {
303 if (uuid_equals(uuid, &od->key)) {
304 return od;
305 }
306 }
307 return NULL;
308 }
309
310 static struct ovn_datapath *
311 ovn_datapath_from_sbrec(struct hmap *datapaths,
312 const struct sbrec_datapath_binding *sb)
313 {
314 struct uuid key;
315
316 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
317 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
318 return NULL;
319 }
320 return ovn_datapath_find(datapaths, &key);
321 }
322
323 static bool
324 lrouter_is_enabled(const struct nbrec_logical_router *lrouter)
325 {
326 return !lrouter->enabled || *lrouter->enabled;
327 }
328
329 static void
330 join_datapaths(struct northd_context *ctx, struct hmap *datapaths,
331 struct ovs_list *sb_only, struct ovs_list *nb_only,
332 struct ovs_list *both)
333 {
334 hmap_init(datapaths);
335 ovs_list_init(sb_only);
336 ovs_list_init(nb_only);
337 ovs_list_init(both);
338
339 const struct sbrec_datapath_binding *sb, *sb_next;
340 SBREC_DATAPATH_BINDING_FOR_EACH_SAFE (sb, sb_next, ctx->ovnsb_idl) {
341 struct uuid key;
342 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
343 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
344 ovsdb_idl_txn_add_comment(
345 ctx->ovnsb_txn,
346 "deleting Datapath_Binding "UUID_FMT" that lacks "
347 "external-ids:logical-switch and "
348 "external-ids:logical-router",
349 UUID_ARGS(&sb->header_.uuid));
350 sbrec_datapath_binding_delete(sb);
351 continue;
352 }
353
354 if (ovn_datapath_find(datapaths, &key)) {
355 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
356 VLOG_INFO_RL(
357 &rl, "deleting Datapath_Binding "UUID_FMT" with "
358 "duplicate external-ids:logical-switch/router "UUID_FMT,
359 UUID_ARGS(&sb->header_.uuid), UUID_ARGS(&key));
360 sbrec_datapath_binding_delete(sb);
361 continue;
362 }
363
364 struct ovn_datapath *od = ovn_datapath_create(datapaths, &key,
365 NULL, NULL, sb);
366 ovs_list_push_back(sb_only, &od->list);
367 }
368
369 const struct nbrec_logical_switch *nbs;
370 NBREC_LOGICAL_SWITCH_FOR_EACH (nbs, ctx->ovnnb_idl) {
371 struct ovn_datapath *od = ovn_datapath_find(datapaths,
372 &nbs->header_.uuid);
373 if (od) {
374 od->nbs = nbs;
375 ovs_list_remove(&od->list);
376 ovs_list_push_back(both, &od->list);
377 } else {
378 od = ovn_datapath_create(datapaths, &nbs->header_.uuid,
379 nbs, NULL, NULL);
380 ovs_list_push_back(nb_only, &od->list);
381 }
382 }
383
384 const struct nbrec_logical_router *nbr;
385 NBREC_LOGICAL_ROUTER_FOR_EACH (nbr, ctx->ovnnb_idl) {
386 if (!lrouter_is_enabled(nbr)) {
387 continue;
388 }
389
390 struct ovn_datapath *od = ovn_datapath_find(datapaths,
391 &nbr->header_.uuid);
392 if (od) {
393 if (!od->nbs) {
394 od->nbr = nbr;
395 ovs_list_remove(&od->list);
396 ovs_list_push_back(both, &od->list);
397 } else {
398 /* Can't happen! */
399 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
400 VLOG_WARN_RL(&rl,
401 "duplicate UUID "UUID_FMT" in OVN_Northbound",
402 UUID_ARGS(&nbr->header_.uuid));
403 continue;
404 }
405 } else {
406 od = ovn_datapath_create(datapaths, &nbr->header_.uuid,
407 NULL, nbr, NULL);
408 ovs_list_push_back(nb_only, &od->list);
409 }
410
411 od->gateway = 0;
412 if (nbr->default_gw) {
413 ovs_be32 ip;
414 if (!ip_parse(nbr->default_gw, &ip) || !ip) {
415 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
416 VLOG_WARN_RL(&rl, "bad 'gateway' %s", nbr->default_gw);
417 } else {
418 od->gateway = ip;
419 }
420 }
421
422 /* Set the gateway port to NULL. If there is a gateway, it will get
423 * filled in as we go through the ports later. */
424 od->gateway_port = NULL;
425 }
426 }
427
428 static uint32_t
429 ovn_datapath_allocate_key(struct hmap *dp_tnlids)
430 {
431 static uint32_t hint;
432 return allocate_tnlid(dp_tnlids, "datapath", (1u << 24) - 1, &hint);
433 }
434
435 /* Updates the southbound Datapath_Binding table so that it contains the
436 * logical switches and routers specified by the northbound database.
437 *
438 * Initializes 'datapaths' to contain a "struct ovn_datapath" for every logical
439 * switch and router. */
440 static void
441 build_datapaths(struct northd_context *ctx, struct hmap *datapaths)
442 {
443 struct ovs_list sb_only, nb_only, both;
444
445 join_datapaths(ctx, datapaths, &sb_only, &nb_only, &both);
446
447 if (!ovs_list_is_empty(&nb_only)) {
448 /* First index the in-use datapath tunnel IDs. */
449 struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
450 struct ovn_datapath *od;
451 LIST_FOR_EACH (od, list, &both) {
452 add_tnlid(&dp_tnlids, od->sb->tunnel_key);
453 }
454
455 /* Add southbound record for each unmatched northbound record. */
456 LIST_FOR_EACH (od, list, &nb_only) {
457 uint16_t tunnel_key = ovn_datapath_allocate_key(&dp_tnlids);
458 if (!tunnel_key) {
459 break;
460 }
461
462 od->sb = sbrec_datapath_binding_insert(ctx->ovnsb_txn);
463
464 char uuid_s[UUID_LEN + 1];
465 sprintf(uuid_s, UUID_FMT, UUID_ARGS(&od->key));
466 const char *key = od->nbs ? "logical-switch" : "logical-router";
467 const struct smap id = SMAP_CONST1(&id, key, uuid_s);
468 sbrec_datapath_binding_set_external_ids(od->sb, &id);
469
470 sbrec_datapath_binding_set_tunnel_key(od->sb, tunnel_key);
471 }
472 destroy_tnlids(&dp_tnlids);
473 }
474
475 /* Delete southbound records without northbound matches. */
476 struct ovn_datapath *od, *next;
477 LIST_FOR_EACH_SAFE (od, next, list, &sb_only) {
478 ovs_list_remove(&od->list);
479 sbrec_datapath_binding_delete(od->sb);
480 ovn_datapath_destroy(datapaths, od);
481 }
482 }
483 \f
484 struct ovn_port {
485 struct hmap_node key_node; /* Index on 'key'. */
486 char *key; /* nbs->name, nbr->name, sb->logical_port. */
487 char *json_key; /* 'key', quoted for use in JSON. */
488
489 const struct nbrec_logical_switch_port *nbs; /* May be NULL. */
490 const struct nbrec_logical_router_port *nbr; /* May be NULL. */
491 const struct sbrec_port_binding *sb; /* May be NULL. */
492
493 /* Logical router port data. */
494 ovs_be32 ip, mask; /* 192.168.10.123/24. */
495 ovs_be32 network; /* 192.168.10.0. */
496 ovs_be32 bcast; /* 192.168.10.255. */
497 struct eth_addr mac;
498 struct ovn_port *peer;
499
500 struct ovn_datapath *od;
501
502 struct ovs_list list; /* In list of similar records. */
503 };
504
505 static struct ovn_port *
506 ovn_port_create(struct hmap *ports, const char *key,
507 const struct nbrec_logical_switch_port *nbs,
508 const struct nbrec_logical_router_port *nbr,
509 const struct sbrec_port_binding *sb)
510 {
511 struct ovn_port *op = xzalloc(sizeof *op);
512
513 struct ds json_key = DS_EMPTY_INITIALIZER;
514 json_string_escape(key, &json_key);
515 op->json_key = ds_steal_cstr(&json_key);
516
517 op->key = xstrdup(key);
518 op->sb = sb;
519 op->nbs = nbs;
520 op->nbr = nbr;
521 hmap_insert(ports, &op->key_node, hash_string(op->key, 0));
522 return op;
523 }
524
525 static void
526 ovn_port_destroy(struct hmap *ports, struct ovn_port *port)
527 {
528 if (port) {
529 /* Don't remove port->list. It is used within build_ports() as a
530 * private list and once we've exited that function it is not safe to
531 * use it. */
532 hmap_remove(ports, &port->key_node);
533 free(port->json_key);
534 free(port->key);
535 free(port);
536 }
537 }
538
539 static struct ovn_port *
540 ovn_port_find(struct hmap *ports, const char *name)
541 {
542 struct ovn_port *op;
543
544 HMAP_FOR_EACH_WITH_HASH (op, key_node, hash_string(name, 0), ports) {
545 if (!strcmp(op->key, name)) {
546 return op;
547 }
548 }
549 return NULL;
550 }
551
552 static uint32_t
553 ovn_port_allocate_key(struct ovn_datapath *od)
554 {
555 return allocate_tnlid(&od->port_tnlids, "port",
556 (1u << 15) - 1, &od->port_key_hint);
557 }
558
559 static void
560 join_logical_ports(struct northd_context *ctx,
561 struct hmap *datapaths, struct hmap *ports,
562 struct ovs_list *sb_only, struct ovs_list *nb_only,
563 struct ovs_list *both)
564 {
565 hmap_init(ports);
566 ovs_list_init(sb_only);
567 ovs_list_init(nb_only);
568 ovs_list_init(both);
569
570 const struct sbrec_port_binding *sb;
571 SBREC_PORT_BINDING_FOR_EACH (sb, ctx->ovnsb_idl) {
572 struct ovn_port *op = ovn_port_create(ports, sb->logical_port,
573 NULL, NULL, sb);
574 ovs_list_push_back(sb_only, &op->list);
575 }
576
577 struct ovn_datapath *od;
578 HMAP_FOR_EACH (od, key_node, datapaths) {
579 if (od->nbs) {
580 for (size_t i = 0; i < od->nbs->n_ports; i++) {
581 const struct nbrec_logical_switch_port *nbs = od->nbs->ports[i];
582 struct ovn_port *op = ovn_port_find(ports, nbs->name);
583 if (op) {
584 if (op->nbs || op->nbr) {
585 static struct vlog_rate_limit rl
586 = VLOG_RATE_LIMIT_INIT(5, 1);
587 VLOG_WARN_RL(&rl, "duplicate logical port %s",
588 nbs->name);
589 continue;
590 }
591 op->nbs = nbs;
592 ovs_list_remove(&op->list);
593 ovs_list_push_back(both, &op->list);
594 } else {
595 op = ovn_port_create(ports, nbs->name, nbs, NULL, NULL);
596 ovs_list_push_back(nb_only, &op->list);
597 }
598
599 op->od = od;
600 }
601 } else {
602 for (size_t i = 0; i < od->nbr->n_ports; i++) {
603 const struct nbrec_logical_router_port *nbr
604 = od->nbr->ports[i];
605
606 struct eth_addr mac;
607 if (!eth_addr_from_string(nbr->mac, &mac)) {
608 static struct vlog_rate_limit rl
609 = VLOG_RATE_LIMIT_INIT(5, 1);
610 VLOG_WARN_RL(&rl, "bad 'mac' %s", nbr->mac);
611 continue;
612 }
613
614 ovs_be32 ip, mask;
615 char *error = ip_parse_masked(nbr->network, &ip, &mask);
616 if (error || mask == OVS_BE32_MAX || !ip_is_cidr(mask)) {
617 static struct vlog_rate_limit rl
618 = VLOG_RATE_LIMIT_INIT(5, 1);
619 VLOG_WARN_RL(&rl, "bad 'network' %s", nbr->network);
620 free(error);
621 continue;
622 }
623
624 struct ovn_port *op = ovn_port_find(ports, nbr->name);
625 if (op) {
626 if (op->nbs || op->nbr) {
627 static struct vlog_rate_limit rl
628 = VLOG_RATE_LIMIT_INIT(5, 1);
629 VLOG_WARN_RL(&rl, "duplicate logical router port %s",
630 nbr->name);
631 continue;
632 }
633 op->nbr = nbr;
634 ovs_list_remove(&op->list);
635 ovs_list_push_back(both, &op->list);
636 } else {
637 op = ovn_port_create(ports, nbr->name, NULL, nbr, NULL);
638 ovs_list_push_back(nb_only, &op->list);
639 }
640
641 op->ip = ip;
642 op->mask = mask;
643 op->network = ip & mask;
644 op->bcast = ip | ~mask;
645 op->mac = mac;
646
647 op->od = od;
648
649 /* If 'od' has a gateway and 'op' routes to it... */
650 if (od->gateway && !((op->network ^ od->gateway) & op->mask)) {
651 /* ...and if 'op' is a longer match than the current
652 * choice... */
653 const struct ovn_port *gw = od->gateway_port;
654 int len = gw ? ip_count_cidr_bits(gw->mask) : 0;
655 if (ip_count_cidr_bits(op->mask) > len) {
656 /* ...then it's the default gateway port. */
657 od->gateway_port = op;
658 }
659 }
660 }
661 }
662 }
663
664 /* Connect logical router ports, and logical switch ports of type "router",
665 * to their peers. */
666 struct ovn_port *op;
667 HMAP_FOR_EACH (op, key_node, ports) {
668 if (op->nbs && !strcmp(op->nbs->type, "router")) {
669 const char *peer_name = smap_get(&op->nbs->options, "router-port");
670 if (!peer_name) {
671 continue;
672 }
673
674 struct ovn_port *peer = ovn_port_find(ports, peer_name);
675 if (!peer || !peer->nbr) {
676 continue;
677 }
678
679 peer->peer = op;
680 op->peer = peer;
681 op->od->router_ports = xrealloc(
682 op->od->router_ports,
683 sizeof *op->od->router_ports * (op->od->n_router_ports + 1));
684 op->od->router_ports[op->od->n_router_ports++] = op;
685 } else if (op->nbr && op->nbr->peer) {
686 op->peer = ovn_port_find(ports, op->nbr->peer);
687 }
688 }
689 }
690
691 static void
692 ovn_port_update_sbrec(const struct ovn_port *op)
693 {
694 sbrec_port_binding_set_datapath(op->sb, op->od->sb);
695 if (op->nbr) {
696 /* If the router is for l3 gateway, it resides on a chassis
697 * and its port type is "gateway". */
698 const char *chassis = smap_get(&op->od->nbr->options, "chassis");
699 if (chassis) {
700 sbrec_port_binding_set_type(op->sb, "gateway");
701 } else {
702 sbrec_port_binding_set_type(op->sb, "patch");
703 }
704
705 const char *peer = op->peer ? op->peer->key : "<error>";
706 struct smap new;
707 smap_init(&new);
708 smap_add(&new, "peer", peer);
709 if (chassis) {
710 smap_add(&new, "gateway-chassis", chassis);
711 }
712 sbrec_port_binding_set_options(op->sb, &new);
713 smap_destroy(&new);
714
715 sbrec_port_binding_set_parent_port(op->sb, NULL);
716 sbrec_port_binding_set_tag(op->sb, NULL, 0);
717 sbrec_port_binding_set_mac(op->sb, NULL, 0);
718 } else {
719 if (strcmp(op->nbs->type, "router")) {
720 sbrec_port_binding_set_type(op->sb, op->nbs->type);
721 sbrec_port_binding_set_options(op->sb, &op->nbs->options);
722 } else {
723 const char *chassis = NULL;
724 if (op->peer && op->peer->od && op->peer->od->nbr) {
725 chassis = smap_get(&op->peer->od->nbr->options, "chassis");
726 }
727
728 /* A switch port connected to a gateway router is also of
729 * type "gateway". */
730 if (chassis) {
731 sbrec_port_binding_set_type(op->sb, "gateway");
732 } else {
733 sbrec_port_binding_set_type(op->sb, "patch");
734 }
735
736 const char *router_port = smap_get(&op->nbs->options,
737 "router-port");
738 if (!router_port) {
739 router_port = "<error>";
740 }
741 struct smap new;
742 smap_init(&new);
743 smap_add(&new, "peer", router_port);
744 if (chassis) {
745 smap_add(&new, "gateway-chassis", chassis);
746 }
747 sbrec_port_binding_set_options(op->sb, &new);
748 smap_destroy(&new);
749 }
750 sbrec_port_binding_set_parent_port(op->sb, op->nbs->parent_name);
751 sbrec_port_binding_set_tag(op->sb, op->nbs->tag, op->nbs->n_tag);
752 sbrec_port_binding_set_mac(op->sb, (const char **) op->nbs->addresses,
753 op->nbs->n_addresses);
754 }
755 }
756
757 /* Updates the southbound Port_Binding table so that it contains the logical
758 * switch ports specified by the northbound database.
759 *
760 * Initializes 'ports' to contain a "struct ovn_port" for every logical port,
761 * using the "struct ovn_datapath"s in 'datapaths' to look up logical
762 * datapaths. */
763 static void
764 build_ports(struct northd_context *ctx, struct hmap *datapaths,
765 struct hmap *ports)
766 {
767 struct ovs_list sb_only, nb_only, both;
768
769 join_logical_ports(ctx, datapaths, ports, &sb_only, &nb_only, &both);
770
771 /* For logical ports that are in both databases, update the southbound
772 * record based on northbound data. Also index the in-use tunnel_keys. */
773 struct ovn_port *op, *next;
774 LIST_FOR_EACH_SAFE (op, next, list, &both) {
775 ovn_port_update_sbrec(op);
776
777 add_tnlid(&op->od->port_tnlids, op->sb->tunnel_key);
778 if (op->sb->tunnel_key > op->od->port_key_hint) {
779 op->od->port_key_hint = op->sb->tunnel_key;
780 }
781 }
782
783 /* Add southbound record for each unmatched northbound record. */
784 LIST_FOR_EACH_SAFE (op, next, list, &nb_only) {
785 uint16_t tunnel_key = ovn_port_allocate_key(op->od);
786 if (!tunnel_key) {
787 continue;
788 }
789
790 op->sb = sbrec_port_binding_insert(ctx->ovnsb_txn);
791 ovn_port_update_sbrec(op);
792
793 sbrec_port_binding_set_logical_port(op->sb, op->key);
794 sbrec_port_binding_set_tunnel_key(op->sb, tunnel_key);
795 }
796
797 /* Delete southbound records without northbound matches. */
798 LIST_FOR_EACH_SAFE(op, next, list, &sb_only) {
799 ovs_list_remove(&op->list);
800 sbrec_port_binding_delete(op->sb);
801 ovn_port_destroy(ports, op);
802 }
803 }
804 \f
805 #define OVN_MIN_MULTICAST 32768
806 #define OVN_MAX_MULTICAST 65535
807
808 struct multicast_group {
809 const char *name;
810 uint16_t key; /* OVN_MIN_MULTICAST...OVN_MAX_MULTICAST. */
811 };
812
813 #define MC_FLOOD "_MC_flood"
814 static const struct multicast_group mc_flood = { MC_FLOOD, 65535 };
815
816 #define MC_UNKNOWN "_MC_unknown"
817 static const struct multicast_group mc_unknown = { MC_UNKNOWN, 65534 };
818
819 static bool
820 multicast_group_equal(const struct multicast_group *a,
821 const struct multicast_group *b)
822 {
823 return !strcmp(a->name, b->name) && a->key == b->key;
824 }
825
826 /* Multicast group entry. */
827 struct ovn_multicast {
828 struct hmap_node hmap_node; /* Index on 'datapath' and 'key'. */
829 struct ovn_datapath *datapath;
830 const struct multicast_group *group;
831
832 struct ovn_port **ports;
833 size_t n_ports, allocated_ports;
834 };
835
836 static uint32_t
837 ovn_multicast_hash(const struct ovn_datapath *datapath,
838 const struct multicast_group *group)
839 {
840 return hash_pointer(datapath, group->key);
841 }
842
843 static struct ovn_multicast *
844 ovn_multicast_find(struct hmap *mcgroups, struct ovn_datapath *datapath,
845 const struct multicast_group *group)
846 {
847 struct ovn_multicast *mc;
848
849 HMAP_FOR_EACH_WITH_HASH (mc, hmap_node,
850 ovn_multicast_hash(datapath, group), mcgroups) {
851 if (mc->datapath == datapath
852 && multicast_group_equal(mc->group, group)) {
853 return mc;
854 }
855 }
856 return NULL;
857 }
858
859 static void
860 ovn_multicast_add(struct hmap *mcgroups, const struct multicast_group *group,
861 struct ovn_port *port)
862 {
863 struct ovn_datapath *od = port->od;
864 struct ovn_multicast *mc = ovn_multicast_find(mcgroups, od, group);
865 if (!mc) {
866 mc = xmalloc(sizeof *mc);
867 hmap_insert(mcgroups, &mc->hmap_node, ovn_multicast_hash(od, group));
868 mc->datapath = od;
869 mc->group = group;
870 mc->n_ports = 0;
871 mc->allocated_ports = 4;
872 mc->ports = xmalloc(mc->allocated_ports * sizeof *mc->ports);
873 }
874 if (mc->n_ports >= mc->allocated_ports) {
875 mc->ports = x2nrealloc(mc->ports, &mc->allocated_ports,
876 sizeof *mc->ports);
877 }
878 mc->ports[mc->n_ports++] = port;
879 }
880
881 static void
882 ovn_multicast_destroy(struct hmap *mcgroups, struct ovn_multicast *mc)
883 {
884 if (mc) {
885 hmap_remove(mcgroups, &mc->hmap_node);
886 free(mc->ports);
887 free(mc);
888 }
889 }
890
891 static void
892 ovn_multicast_update_sbrec(const struct ovn_multicast *mc,
893 const struct sbrec_multicast_group *sb)
894 {
895 struct sbrec_port_binding **ports = xmalloc(mc->n_ports * sizeof *ports);
896 for (size_t i = 0; i < mc->n_ports; i++) {
897 ports[i] = CONST_CAST(struct sbrec_port_binding *, mc->ports[i]->sb);
898 }
899 sbrec_multicast_group_set_ports(sb, ports, mc->n_ports);
900 free(ports);
901 }
902 \f
903 /* Logical flow generation.
904 *
905 * This code generates the Logical_Flow table in the southbound database, as a
906 * function of most of the northbound database.
907 */
908
909 struct ovn_lflow {
910 struct hmap_node hmap_node;
911
912 struct ovn_datapath *od;
913 enum ovn_stage stage;
914 uint16_t priority;
915 char *match;
916 char *actions;
917 };
918
919 static size_t
920 ovn_lflow_hash(const struct ovn_lflow *lflow)
921 {
922 size_t hash = uuid_hash(&lflow->od->key);
923 hash = hash_2words((lflow->stage << 16) | lflow->priority, hash);
924 hash = hash_string(lflow->match, hash);
925 return hash_string(lflow->actions, hash);
926 }
927
928 static bool
929 ovn_lflow_equal(const struct ovn_lflow *a, const struct ovn_lflow *b)
930 {
931 return (a->od == b->od
932 && a->stage == b->stage
933 && a->priority == b->priority
934 && !strcmp(a->match, b->match)
935 && !strcmp(a->actions, b->actions));
936 }
937
938 static void
939 ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
940 enum ovn_stage stage, uint16_t priority,
941 char *match, char *actions)
942 {
943 lflow->od = od;
944 lflow->stage = stage;
945 lflow->priority = priority;
946 lflow->match = match;
947 lflow->actions = actions;
948 }
949
950 /* Adds a row with the specified contents to the Logical_Flow table. */
951 static void
952 ovn_lflow_add(struct hmap *lflow_map, struct ovn_datapath *od,
953 enum ovn_stage stage, uint16_t priority,
954 const char *match, const char *actions)
955 {
956 struct ovn_lflow *lflow = xmalloc(sizeof *lflow);
957 ovn_lflow_init(lflow, od, stage, priority,
958 xstrdup(match), xstrdup(actions));
959 hmap_insert(lflow_map, &lflow->hmap_node, ovn_lflow_hash(lflow));
960 }
961
962 static struct ovn_lflow *
963 ovn_lflow_find(struct hmap *lflows, struct ovn_datapath *od,
964 enum ovn_stage stage, uint16_t priority,
965 const char *match, const char *actions)
966 {
967 struct ovn_lflow target;
968 ovn_lflow_init(&target, od, stage, priority,
969 CONST_CAST(char *, match), CONST_CAST(char *, actions));
970
971 struct ovn_lflow *lflow;
972 HMAP_FOR_EACH_WITH_HASH (lflow, hmap_node, ovn_lflow_hash(&target),
973 lflows) {
974 if (ovn_lflow_equal(lflow, &target)) {
975 return lflow;
976 }
977 }
978 return NULL;
979 }
980
981 static void
982 ovn_lflow_destroy(struct hmap *lflows, struct ovn_lflow *lflow)
983 {
984 if (lflow) {
985 hmap_remove(lflows, &lflow->hmap_node);
986 free(lflow->match);
987 free(lflow->actions);
988 free(lflow);
989 }
990 }
991
992 /* Appends port security constraints on L2 address field 'eth_addr_field'
993 * (e.g. "eth.src" or "eth.dst") to 'match'. 'port_security', with
994 * 'n_port_security' elements, is the collection of port_security constraints
995 * from an OVN_NB Logical_Switch_Port row. */
996 static void
997 build_port_security_l2(const char *eth_addr_field,
998 char **port_security, size_t n_port_security,
999 struct ds *match)
1000 {
1001 size_t base_len = match->length;
1002 ds_put_format(match, " && %s == {", eth_addr_field);
1003
1004 size_t n = 0;
1005 for (size_t i = 0; i < n_port_security; i++) {
1006 struct eth_addr ea;
1007
1008 if (eth_addr_from_string(port_security[i], &ea)) {
1009 ds_put_format(match, ETH_ADDR_FMT, ETH_ADDR_ARGS(ea));
1010 ds_put_char(match, ' ');
1011 n++;
1012 }
1013 }
1014 ds_chomp(match, ' ');
1015 ds_put_cstr(match, "}");
1016
1017 if (!n) {
1018 match->length = base_len;
1019 }
1020 }
1021
1022 static void
1023 build_port_security_ipv6_nd_flow(
1024 struct ds *match, struct eth_addr ea, struct ipv6_netaddr *ipv6_addrs,
1025 int n_ipv6_addrs)
1026 {
1027 ds_put_format(match, " && ip6 && nd && ((nd.sll == "ETH_ADDR_FMT" || "
1028 "nd.sll == "ETH_ADDR_FMT") || ((nd.tll == "ETH_ADDR_FMT" || "
1029 "nd.tll == "ETH_ADDR_FMT")", ETH_ADDR_ARGS(eth_addr_zero),
1030 ETH_ADDR_ARGS(ea), ETH_ADDR_ARGS(eth_addr_zero),
1031 ETH_ADDR_ARGS(ea));
1032 if (!n_ipv6_addrs) {
1033 ds_put_cstr(match, "))");
1034 return;
1035 }
1036
1037 char ip6_str[INET6_ADDRSTRLEN + 1];
1038 struct in6_addr lla;
1039 in6_generate_lla(ea, &lla);
1040 memset(ip6_str, 0, sizeof(ip6_str));
1041 ipv6_string_mapped(ip6_str, &lla);
1042 ds_put_format(match, " && (nd.target == %s", ip6_str);
1043
1044 for(int i = 0; i < n_ipv6_addrs; i++) {
1045 memset(ip6_str, 0, sizeof(ip6_str));
1046 ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr);
1047 ds_put_format(match, " || nd.target == %s", ip6_str);
1048 }
1049
1050 ds_put_format(match, ")))");
1051 }
1052
1053 static void
1054 build_port_security_ipv6_flow(
1055 enum ovn_pipeline pipeline, struct ds *match, struct eth_addr ea,
1056 struct ipv6_netaddr *ipv6_addrs, int n_ipv6_addrs)
1057 {
1058 char ip6_str[INET6_ADDRSTRLEN + 1];
1059
1060 ds_put_format(match, " && %s == {",
1061 pipeline == P_IN ? "ip6.src" : "ip6.dst");
1062
1063 /* Allow link-local address. */
1064 struct in6_addr lla;
1065 in6_generate_lla(ea, &lla);
1066 ipv6_string_mapped(ip6_str, &lla);
1067 ds_put_format(match, "%s, ", ip6_str);
1068
1069 /* Allow ip6.dst=ff00::/8 for multicast packets */
1070 if (pipeline == P_OUT) {
1071 ds_put_cstr(match, "ff00::/8, ");
1072 }
1073 for(int i = 0; i < n_ipv6_addrs; i++) {
1074 ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr);
1075 ds_put_format(match, "%s, ", ip6_str);
1076 }
1077 /* Replace ", " by "}". */
1078 ds_chomp(match, ' ');
1079 ds_chomp(match, ',');
1080 ds_put_cstr(match, "}");
1081 }
1082
1083 /**
1084 * Build port security constraints on ARP and IPv6 ND fields
1085 * and add logical flows to S_SWITCH_IN_PORT_SEC_ND stage.
1086 *
1087 * For each port security of the logical port, following
1088 * logical flows are added
1089 * - If the port security has no IP (both IPv4 and IPv6) or
1090 * if it has IPv4 address(es)
1091 * - Priority 90 flow to allow ARP packets for known MAC addresses
1092 * in the eth.src and arp.spa fields. If the port security
1093 * has IPv4 addresses, allow known IPv4 addresses in the arp.tpa field.
1094 *
1095 * - If the port security has no IP (both IPv4 and IPv6) or
1096 * if it has IPv6 address(es)
1097 * - Priority 90 flow to allow IPv6 ND packets for known MAC addresses
1098 * in the eth.src and nd.sll/nd.tll fields. If the port security
1099 * has IPv6 addresses, allow known IPv6 addresses in the nd.target field
1100 * for IPv6 Neighbor Advertisement packet.
1101 *
1102 * - Priority 80 flow to drop ARP and IPv6 ND packets.
1103 */
1104 static void
1105 build_port_security_nd(struct ovn_port *op, struct hmap *lflows)
1106 {
1107 for (size_t i = 0; i < op->nbs->n_port_security; i++) {
1108 struct lport_addresses ps;
1109 if (!extract_lsp_addresses(op->nbs->port_security[i], &ps, true)) {
1110 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1111 VLOG_INFO_RL(&rl, "invalid syntax '%s' in port security. No MAC"
1112 " address found", op->nbs->port_security[i]);
1113 continue;
1114 }
1115
1116 bool no_ip = !(ps.n_ipv4_addrs || ps.n_ipv6_addrs);
1117 struct ds match = DS_EMPTY_INITIALIZER;
1118
1119 if (ps.n_ipv4_addrs || no_ip) {
1120 ds_put_format(
1121 &match, "inport == %s && eth.src == "ETH_ADDR_FMT" && arp.sha == "
1122 ETH_ADDR_FMT, op->json_key, ETH_ADDR_ARGS(ps.ea),
1123 ETH_ADDR_ARGS(ps.ea));
1124
1125 if (ps.n_ipv4_addrs) {
1126 ds_put_cstr(&match, " && (");
1127 for (size_t i = 0; i < ps.n_ipv4_addrs; i++) {
1128 ds_put_cstr(&match, "arp.spa == ");
1129 ovs_be32 mask = be32_prefix_mask(ps.ipv4_addrs[i].plen);
1130 /* When the netmask is applied, if the host portion is
1131 * non-zero, the host can only use the specified
1132 * address in the arp.spa. If zero, the host is allowed
1133 * to use any address in the subnet. */
1134 if (ps.ipv4_addrs[i].addr & ~mask) {
1135 ds_put_format(&match, IP_FMT,
1136 IP_ARGS(ps.ipv4_addrs[i].addr));
1137 } else {
1138 ip_format_masked(ps.ipv4_addrs[i].addr & mask, mask,
1139 &match);
1140 }
1141 ds_put_cstr(&match, " || ");
1142 }
1143 ds_chomp(&match, ' ');
1144 ds_chomp(&match, '|');
1145 ds_chomp(&match, '|');
1146 ds_put_cstr(&match, ")");
1147 }
1148 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90,
1149 ds_cstr(&match), "next;");
1150 ds_destroy(&match);
1151 }
1152
1153 if (ps.n_ipv6_addrs || no_ip) {
1154 ds_init(&match);
1155 ds_put_format(&match, "inport == %s && eth.src == "ETH_ADDR_FMT,
1156 op->json_key, ETH_ADDR_ARGS(ps.ea));
1157 build_port_security_ipv6_nd_flow(&match, ps.ea, ps.ipv6_addrs,
1158 ps.n_ipv6_addrs);
1159 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90,
1160 ds_cstr(&match), "next;");
1161 ds_destroy(&match);
1162 }
1163 free(ps.ipv4_addrs);
1164 free(ps.ipv6_addrs);
1165 }
1166
1167 char *match = xasprintf("inport == %s && (arp || nd)", op->json_key);
1168 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 80,
1169 match, "drop;");
1170 free(match);
1171 }
1172
1173 /**
1174 * Build port security constraints on IPv4 and IPv6 src and dst fields
1175 * and add logical flows to S_SWITCH_(IN/OUT)_PORT_SEC_IP stage.
1176 *
1177 * For each port security of the logical port, following
1178 * logical flows are added
1179 * - If the port security has IPv4 addresses,
1180 * - Priority 90 flow to allow IPv4 packets for known IPv4 addresses
1181 *
1182 * - If the port security has IPv6 addresses,
1183 * - Priority 90 flow to allow IPv6 packets for known IPv6 addresses
1184 *
1185 * - If the port security has IPv4 addresses or IPv6 addresses or both
1186 * - Priority 80 flow to drop all IPv4 and IPv6 traffic
1187 */
1188 static void
1189 build_port_security_ip(enum ovn_pipeline pipeline, struct ovn_port *op,
1190 struct hmap *lflows)
1191 {
1192 char *port_direction;
1193 enum ovn_stage stage;
1194 if (pipeline == P_IN) {
1195 port_direction = "inport";
1196 stage = S_SWITCH_IN_PORT_SEC_IP;
1197 } else {
1198 port_direction = "outport";
1199 stage = S_SWITCH_OUT_PORT_SEC_IP;
1200 }
1201
1202 for (size_t i = 0; i < op->nbs->n_port_security; i++) {
1203 struct lport_addresses ps;
1204 if (!extract_lsp_addresses(op->nbs->port_security[i], &ps, true)) {
1205 continue;
1206 }
1207
1208 if (!(ps.n_ipv4_addrs || ps.n_ipv6_addrs)) {
1209 continue;
1210 }
1211
1212 if (ps.n_ipv4_addrs) {
1213 struct ds match = DS_EMPTY_INITIALIZER;
1214 if (pipeline == P_IN) {
1215 /* Permit use of the unspecified address for DHCP discovery */
1216 struct ds dhcp_match = DS_EMPTY_INITIALIZER;
1217 ds_put_format(&dhcp_match, "inport == %s"
1218 " && eth.src == "ETH_ADDR_FMT
1219 " && ip4.src == 0.0.0.0"
1220 " && ip4.dst == 255.255.255.255"
1221 " && udp.src == 68 && udp.dst == 67", op->json_key,
1222 ETH_ADDR_ARGS(ps.ea));
1223 ovn_lflow_add(lflows, op->od, stage, 90,
1224 ds_cstr(&dhcp_match), "next;");
1225 ds_destroy(&dhcp_match);
1226 ds_put_format(&match, "inport == %s && eth.src == "ETH_ADDR_FMT
1227 " && ip4.src == {", op->json_key,
1228 ETH_ADDR_ARGS(ps.ea));
1229 } else {
1230 ds_put_format(&match, "outport == %s && eth.dst == "ETH_ADDR_FMT
1231 " && ip4.dst == {255.255.255.255, 224.0.0.0/4, ",
1232 op->json_key, ETH_ADDR_ARGS(ps.ea));
1233 }
1234
1235 for (int i = 0; i < ps.n_ipv4_addrs; i++) {
1236 ovs_be32 mask = be32_prefix_mask(ps.ipv4_addrs[i].plen);
1237 /* When the netmask is applied, if the host portion is
1238 * non-zero, the host can only use the specified
1239 * address. If zero, the host is allowed to use any
1240 * address in the subnet.
1241 * */
1242 if (ps.ipv4_addrs[i].addr & ~mask) {
1243 ds_put_format(&match, IP_FMT,
1244 IP_ARGS(ps.ipv4_addrs[i].addr));
1245 if (pipeline == P_OUT && ps.ipv4_addrs[i].plen != 32) {
1246 /* Host is also allowed to receive packets to the
1247 * broadcast address in the specified subnet.
1248 */
1249 ds_put_format(&match, ", "IP_FMT,
1250 IP_ARGS(ps.ipv4_addrs[i].addr | ~mask));
1251 }
1252 } else {
1253 /* host portion is zero */
1254 ip_format_masked(ps.ipv4_addrs[i].addr & mask, mask,
1255 &match);
1256 }
1257 ds_put_cstr(&match, ", ");
1258 }
1259
1260 /* Replace ", " by "}". */
1261 ds_chomp(&match, ' ');
1262 ds_chomp(&match, ',');
1263 ds_put_cstr(&match, "}");
1264 ovn_lflow_add(lflows, op->od, stage, 90, ds_cstr(&match), "next;");
1265 ds_destroy(&match);
1266 free(ps.ipv4_addrs);
1267 }
1268
1269 if (ps.n_ipv6_addrs) {
1270 struct ds match = DS_EMPTY_INITIALIZER;
1271 if (pipeline == P_IN) {
1272 /* Permit use of unspecified address for duplicate address
1273 * detection */
1274 struct ds dad_match = DS_EMPTY_INITIALIZER;
1275 ds_put_format(&dad_match, "inport == %s"
1276 " && eth.src == "ETH_ADDR_FMT
1277 " && ip6.src == ::"
1278 " && ip6.dst == ff02::/16"
1279 " && icmp6.type == {131, 135, 143}", op->json_key,
1280 ETH_ADDR_ARGS(ps.ea));
1281 ovn_lflow_add(lflows, op->od, stage, 90,
1282 ds_cstr(&dad_match), "next;");
1283 ds_destroy(&dad_match);
1284 }
1285 ds_put_format(&match, "%s == %s && %s == "ETH_ADDR_FMT"",
1286 port_direction, op->json_key,
1287 pipeline == P_IN ? "eth.src" : "eth.dst",
1288 ETH_ADDR_ARGS(ps.ea));
1289 build_port_security_ipv6_flow(pipeline, &match, ps.ea,
1290 ps.ipv6_addrs, ps.n_ipv6_addrs);
1291 ovn_lflow_add(lflows, op->od, stage, 90,
1292 ds_cstr(&match), "next;");
1293 ds_destroy(&match);
1294 free(ps.ipv6_addrs);
1295 }
1296
1297 char *match = xasprintf(
1298 "%s == %s && %s == "ETH_ADDR_FMT" && ip", port_direction,
1299 op->json_key, pipeline == P_IN ? "eth.src" : "eth.dst",
1300 ETH_ADDR_ARGS(ps.ea));
1301 ovn_lflow_add(lflows, op->od, stage, 80, match, "drop;");
1302 free(match);
1303 }
1304 }
1305
1306 static bool
1307 lsp_is_enabled(const struct nbrec_logical_switch_port *lsp)
1308 {
1309 return !lsp->enabled || *lsp->enabled;
1310 }
1311
1312 static bool
1313 lsp_is_up(const struct nbrec_logical_switch_port *lsp)
1314 {
1315 return !lsp->up || *lsp->up;
1316 }
1317
1318 static bool
1319 has_stateful_acl(struct ovn_datapath *od)
1320 {
1321 for (size_t i = 0; i < od->nbs->n_acls; i++) {
1322 struct nbrec_acl *acl = od->nbs->acls[i];
1323 if (!strcmp(acl->action, "allow-related")) {
1324 return true;
1325 }
1326 }
1327
1328 return false;
1329 }
1330
1331 static void
1332 build_acls(struct ovn_datapath *od, struct hmap *lflows, struct hmap *ports)
1333 {
1334 bool has_stateful = has_stateful_acl(od);
1335 struct ovn_port *op;
1336
1337 /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are
1338 * allowed by default. */
1339 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 0, "1", "next;");
1340 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 0, "1", "next;");
1341
1342 /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
1343 * default. A related rule at priority 1 is added below if there
1344 * are any stateful ACLs in this datapath. */
1345 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 0, "1", "next;");
1346 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 0, "1", "next;");
1347
1348 /* If there are any stateful ACL rules in this dapapath, we must
1349 * send all IP packets through the conntrack action, which handles
1350 * defragmentation, in order to match L4 headers. */
1351 if (has_stateful) {
1352 HMAP_FOR_EACH (op, key_node, ports) {
1353 if (op->od == od && !strcmp(op->nbs->type, "router")) {
1354 /* Can't use ct() for router ports. Consider the
1355 * following configuration: lp1(10.0.0.2) on
1356 * hostA--ls1--lr0--ls2--lp2(10.0.1.2) on hostB, For a
1357 * ping from lp1 to lp2, First, the response will go
1358 * through ct() with a zone for lp2 in the ls2 ingress
1359 * pipeline on hostB. That ct zone knows about this
1360 * connection. Next, it goes through ct() with the zone
1361 * for the router port in the egress pipeline of ls2 on
1362 * hostB. This zone does not know about the connection,
1363 * as the icmp request went through the logical router
1364 * on hostA, not hostB. This would only work with
1365 * distributed conntrack state across all chassis. */
1366 struct ds match_in = DS_EMPTY_INITIALIZER;
1367 struct ds match_out = DS_EMPTY_INITIALIZER;
1368
1369 ds_put_format(&match_in, "ip && inport == %s", op->json_key);
1370 ds_put_format(&match_out, "ip && outport == %s", op->json_key);
1371 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110,
1372 ds_cstr(&match_in), "next;");
1373 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110,
1374 ds_cstr(&match_out), "next;");
1375
1376 ds_destroy(&match_in);
1377 ds_destroy(&match_out);
1378 }
1379 }
1380
1381 /* Ingress and Egress Pre-ACL Table (Priority 100).
1382 *
1383 * Regardless of whether the ACL is "from-lport" or "to-lport",
1384 * we need rules in both the ingress and egress table, because
1385 * the return traffic needs to be followed. */
1386 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 100, "ip", "ct_next;");
1387 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip", "ct_next;");
1388
1389 /* Ingress and Egress Pre-ACL Table (Priority 110).
1390 *
1391 * Not to do conntrack on ND packets. */
1392 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110, "nd", "next;");
1393 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110, "nd", "next;");
1394
1395 /* Ingress and Egress ACL Table (Priority 1).
1396 *
1397 * By default, traffic is allowed. This is partially handled by
1398 * the Priority 0 ACL flows added earlier, but we also need to
1399 * commit IP flows. This is because, while the initiater's
1400 * direction may not have any stateful rules, the server's may
1401 * and then its return traffic would not have an associated
1402 * conntrack entry and would return "+invalid". */
1403 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 1, "ip",
1404 "ct_commit; next;");
1405 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 1, "ip",
1406 "ct_commit; next;");
1407
1408 /* Ingress and Egress ACL Table (Priority 65535).
1409 *
1410 * Always drop traffic that's in an invalid state. This is
1411 * enforced at a higher priority than ACLs can be defined. */
1412 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
1413 "ct.inv", "drop;");
1414 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
1415 "ct.inv", "drop;");
1416
1417 /* Ingress and Egress ACL Table (Priority 65535).
1418 *
1419 * Always allow traffic that is established to a committed
1420 * conntrack entry. This is enforced at a higher priority than
1421 * ACLs can be defined. */
1422 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
1423 "ct.est && !ct.rel && !ct.new && !ct.inv",
1424 "next;");
1425 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
1426 "ct.est && !ct.rel && !ct.new && !ct.inv",
1427 "next;");
1428
1429 /* Ingress and Egress ACL Table (Priority 65535).
1430 *
1431 * Always allow traffic that is related to an existing conntrack
1432 * entry. This is enforced at a higher priority than ACLs can
1433 * be defined.
1434 *
1435 * NOTE: This does not support related data sessions (eg,
1436 * a dynamically negotiated FTP data channel), but will allow
1437 * related traffic such as an ICMP Port Unreachable through
1438 * that's generated from a non-listening UDP port. */
1439 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
1440 "!ct.est && ct.rel && !ct.new && !ct.inv",
1441 "next;");
1442 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
1443 "!ct.est && ct.rel && !ct.new && !ct.inv",
1444 "next;");
1445
1446 /* Ingress and Egress ACL Table (Priority 65535).
1447 *
1448 * Not to do conntrack on ND packets. */
1449 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, "nd", "next;");
1450 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, "nd", "next;");
1451 }
1452
1453 /* Ingress or Egress ACL Table (Various priorities). */
1454 for (size_t i = 0; i < od->nbs->n_acls; i++) {
1455 struct nbrec_acl *acl = od->nbs->acls[i];
1456 bool ingress = !strcmp(acl->direction, "from-lport") ? true :false;
1457 enum ovn_stage stage = ingress ? S_SWITCH_IN_ACL : S_SWITCH_OUT_ACL;
1458
1459 if (!strcmp(acl->action, "allow")) {
1460 /* If there are any stateful flows, we must even commit "allow"
1461 * actions. This is because, while the initiater's
1462 * direction may not have any stateful rules, the server's
1463 * may and then its return traffic would not have an
1464 * associated conntrack entry and would return "+invalid". */
1465 const char *actions = has_stateful ? "ct_commit; next;" : "next;";
1466 ovn_lflow_add(lflows, od, stage,
1467 acl->priority + OVN_ACL_PRI_OFFSET,
1468 acl->match, actions);
1469 } else if (!strcmp(acl->action, "allow-related")) {
1470 struct ds match = DS_EMPTY_INITIALIZER;
1471
1472 /* Commit the connection tracking entry, which allows all
1473 * other traffic related to this entry to flow due to the
1474 * 65535 priority flow defined earlier. */
1475 ds_put_format(&match, "ct.new && (%s)", acl->match);
1476 ovn_lflow_add(lflows, od, stage,
1477 acl->priority + OVN_ACL_PRI_OFFSET,
1478 ds_cstr(&match), "ct_commit; next;");
1479
1480 ds_destroy(&match);
1481 } else if (!strcmp(acl->action, "drop")) {
1482 ovn_lflow_add(lflows, od, stage,
1483 acl->priority + OVN_ACL_PRI_OFFSET,
1484 acl->match, "drop;");
1485 } else if (!strcmp(acl->action, "reject")) {
1486 /* xxx Need to support "reject". */
1487 VLOG_INFO("reject is not a supported action");
1488 ovn_lflow_add(lflows, od, stage,
1489 acl->priority + OVN_ACL_PRI_OFFSET,
1490 acl->match, "drop;");
1491 }
1492 }
1493 }
1494
1495 static void
1496 build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
1497 struct hmap *lflows, struct hmap *mcgroups)
1498 {
1499 /* This flow table structure is documented in ovn-northd(8), so please
1500 * update ovn-northd.8.xml if you change anything. */
1501
1502 /* Build pre-ACL and ACL tables for both ingress and egress.
1503 * Ingress tables 3 and 4. Egress tables 0 and 1. */
1504 struct ovn_datapath *od;
1505 HMAP_FOR_EACH (od, key_node, datapaths) {
1506 if (!od->nbs) {
1507 continue;
1508 }
1509
1510 build_acls(od, lflows, ports);
1511 }
1512
1513 /* Logical switch ingress table 0: Admission control framework (priority
1514 * 100). */
1515 HMAP_FOR_EACH (od, key_node, datapaths) {
1516 if (!od->nbs) {
1517 continue;
1518 }
1519
1520 /* Logical VLANs not supported. */
1521 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "vlan.present",
1522 "drop;");
1523
1524 /* Broadcast/multicast source address is invalid. */
1525 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "eth.src[40]",
1526 "drop;");
1527
1528 /* Port security flows have priority 50 (see below) and will continue
1529 * to the next table if packet source is acceptable. */
1530 }
1531
1532 /* Logical switch ingress table 0: Ingress port security - L2
1533 * (priority 50).
1534 * Ingress table 1: Ingress port security - IP (priority 90 and 80)
1535 * Ingress table 2: Ingress port security - ND (priority 90 and 80)
1536 */
1537 struct ovn_port *op;
1538 HMAP_FOR_EACH (op, key_node, ports) {
1539 if (!op->nbs) {
1540 continue;
1541 }
1542
1543 if (!lsp_is_enabled(op->nbs)) {
1544 /* Drop packets from disabled logical ports (since logical flow
1545 * tables are default-drop). */
1546 continue;
1547 }
1548
1549 struct ds match = DS_EMPTY_INITIALIZER;
1550 ds_put_format(&match, "inport == %s", op->json_key);
1551 build_port_security_l2(
1552 "eth.src", op->nbs->port_security, op->nbs->n_port_security,
1553 &match);
1554 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_L2, 50,
1555 ds_cstr(&match), "next;");
1556 ds_destroy(&match);
1557
1558 if (op->nbs->n_port_security) {
1559 build_port_security_ip(P_IN, op, lflows);
1560 build_port_security_nd(op, lflows);
1561 }
1562 }
1563
1564 /* Ingress table 1 and 2: Port security - IP and ND, by default goto next.
1565 * (priority 0)*/
1566 HMAP_FOR_EACH (od, key_node, datapaths) {
1567 if (!od->nbs) {
1568 continue;
1569 }
1570
1571 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_ND, 0, "1", "next;");
1572 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_IP, 0, "1", "next;");
1573 }
1574
1575 /* Ingress table 3: ARP responder, skip requests coming from localnet ports.
1576 * (priority 100). */
1577 HMAP_FOR_EACH (op, key_node, ports) {
1578 if (!op->nbs) {
1579 continue;
1580 }
1581
1582 if (!strcmp(op->nbs->type, "localnet")) {
1583 char *match = xasprintf("inport == %s", op->json_key);
1584 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100,
1585 match, "next;");
1586 free(match);
1587 }
1588 }
1589
1590 /* Ingress table 5: ARP/ND responder, reply for known IPs.
1591 * (priority 50). */
1592 HMAP_FOR_EACH (op, key_node, ports) {
1593 if (!op->nbs) {
1594 continue;
1595 }
1596
1597 /*
1598 * Add ARP/ND reply flows if either the
1599 * - port is up or
1600 * - port type is router
1601 */
1602 if (!lsp_is_up(op->nbs) && strcmp(op->nbs->type, "router")) {
1603 continue;
1604 }
1605
1606 for (size_t i = 0; i < op->nbs->n_addresses; i++) {
1607 struct lport_addresses laddrs;
1608 if (!extract_lsp_addresses(op->nbs->addresses[i], &laddrs,
1609 true)) {
1610 continue;
1611 }
1612 for (size_t j = 0; j < laddrs.n_ipv4_addrs; j++) {
1613 char *match = xasprintf(
1614 "arp.tpa == "IP_FMT" && arp.op == 1",
1615 IP_ARGS(laddrs.ipv4_addrs[j].addr));
1616 char *actions = xasprintf(
1617 "eth.dst = eth.src; "
1618 "eth.src = "ETH_ADDR_FMT"; "
1619 "arp.op = 2; /* ARP reply */ "
1620 "arp.tha = arp.sha; "
1621 "arp.sha = "ETH_ADDR_FMT"; "
1622 "arp.tpa = arp.spa; "
1623 "arp.spa = "IP_FMT"; "
1624 "outport = inport; "
1625 "inport = \"\"; /* Allow sending out inport. */ "
1626 "output;",
1627 ETH_ADDR_ARGS(laddrs.ea),
1628 ETH_ADDR_ARGS(laddrs.ea),
1629 IP_ARGS(laddrs.ipv4_addrs[j].addr));
1630 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50,
1631 match, actions);
1632 free(match);
1633 free(actions);
1634 }
1635
1636 if (laddrs.n_ipv6_addrs > 0) {
1637 char ip6_str[INET6_ADDRSTRLEN + 1];
1638 struct ds match = DS_EMPTY_INITIALIZER;
1639 ds_put_cstr(&match, "icmp6 && icmp6.type == 135 && ");
1640 if (laddrs.n_ipv6_addrs == 1) {
1641 ipv6_string_mapped(ip6_str,
1642 &(laddrs.ipv6_addrs[0].addr));
1643 ds_put_format(&match, "nd.target == %s", ip6_str);
1644 } else {
1645 ds_put_cstr(&match, "(");
1646 for (size_t j = 0; j < laddrs.n_ipv6_addrs; j++) {
1647 ipv6_string_mapped(ip6_str,
1648 &(laddrs.ipv6_addrs[j].addr));
1649 ds_put_format(&match, "nd.target == %s || ", ip6_str);
1650 }
1651 ds_chomp(&match, ' ');
1652 ds_chomp(&match, '|');
1653 ds_chomp(&match, '|');
1654 ds_chomp(&match, ' ');
1655 ds_put_cstr(&match, ")");
1656 }
1657 char *actions = xasprintf(
1658 "na { eth.src = "ETH_ADDR_FMT"; "
1659 "nd.tll = "ETH_ADDR_FMT"; "
1660 "outport = inport; "
1661 "inport = \"\"; /* Allow sending out inport. */ "
1662 "output; };",
1663 ETH_ADDR_ARGS(laddrs.ea),
1664 ETH_ADDR_ARGS(laddrs.ea));
1665
1666 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50,
1667 ds_cstr(&match), actions);
1668
1669 ds_destroy(&match);
1670 }
1671
1672 free(laddrs.ipv4_addrs);
1673 free(laddrs.ipv6_addrs);
1674 }
1675 }
1676
1677 /* Ingress table 5: ARP/ND responder, by default goto next.
1678 * (priority 0)*/
1679 HMAP_FOR_EACH (od, key_node, datapaths) {
1680 if (!od->nbs) {
1681 continue;
1682 }
1683
1684 ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_ND_RSP, 0, "1", "next;");
1685 }
1686
1687 /* Ingress table 6: Destination lookup, broadcast and multicast handling
1688 * (priority 100). */
1689 HMAP_FOR_EACH (op, key_node, ports) {
1690 if (!op->nbs) {
1691 continue;
1692 }
1693
1694 if (lsp_is_enabled(op->nbs)) {
1695 ovn_multicast_add(mcgroups, &mc_flood, op);
1696 }
1697 }
1698 HMAP_FOR_EACH (od, key_node, datapaths) {
1699 if (!od->nbs) {
1700 continue;
1701 }
1702
1703 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 100, "eth.mcast",
1704 "outport = \""MC_FLOOD"\"; output;");
1705 }
1706
1707 /* Ingress table 6: Destination lookup, unicast handling (priority 50), */
1708 HMAP_FOR_EACH (op, key_node, ports) {
1709 if (!op->nbs) {
1710 continue;
1711 }
1712
1713 for (size_t i = 0; i < op->nbs->n_addresses; i++) {
1714 struct eth_addr mac;
1715
1716 if (eth_addr_from_string(op->nbs->addresses[i], &mac)) {
1717 struct ds match, actions;
1718
1719 ds_init(&match);
1720 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
1721 ETH_ADDR_ARGS(mac));
1722
1723 ds_init(&actions);
1724 ds_put_format(&actions, "outport = %s; output;", op->json_key);
1725 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
1726 ds_cstr(&match), ds_cstr(&actions));
1727 ds_destroy(&actions);
1728 ds_destroy(&match);
1729 } else if (!strcmp(op->nbs->addresses[i], "unknown")) {
1730 if (lsp_is_enabled(op->nbs)) {
1731 ovn_multicast_add(mcgroups, &mc_unknown, op);
1732 op->od->has_unknown = true;
1733 }
1734 } else {
1735 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1736
1737 VLOG_INFO_RL(&rl,
1738 "%s: invalid syntax '%s' in addresses column",
1739 op->nbs->name, op->nbs->addresses[i]);
1740 }
1741 }
1742 }
1743
1744 /* Ingress table 6: Destination lookup for unknown MACs (priority 0). */
1745 HMAP_FOR_EACH (od, key_node, datapaths) {
1746 if (!od->nbs) {
1747 continue;
1748 }
1749
1750 if (od->has_unknown) {
1751 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1",
1752 "outport = \""MC_UNKNOWN"\"; output;");
1753 }
1754 }
1755
1756 /* Egress table 2: Egress port security - IP (priority 0)
1757 * port security L2 - multicast/broadcast (priority
1758 * 100). */
1759 HMAP_FOR_EACH (od, key_node, datapaths) {
1760 if (!od->nbs) {
1761 continue;
1762 }
1763
1764 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_IP, 0, "1", "next;");
1765 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_L2, 100, "eth.mcast",
1766 "output;");
1767 }
1768
1769 /* Egress table 2: Egress port security - IP (priorities 90 and 80)
1770 * if port security enabled.
1771 *
1772 * Egress table 3: Egress port security - L2 (priorities 50 and 150).
1773 *
1774 * Priority 50 rules implement port security for enabled logical port.
1775 *
1776 * Priority 150 rules drop packets to disabled logical ports, so that they
1777 * don't even receive multicast or broadcast packets. */
1778 HMAP_FOR_EACH (op, key_node, ports) {
1779 if (!op->nbs) {
1780 continue;
1781 }
1782
1783 struct ds match = DS_EMPTY_INITIALIZER;
1784 ds_put_format(&match, "outport == %s", op->json_key);
1785 if (lsp_is_enabled(op->nbs)) {
1786 build_port_security_l2("eth.dst", op->nbs->port_security,
1787 op->nbs->n_port_security, &match);
1788 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 50,
1789 ds_cstr(&match), "output;");
1790 } else {
1791 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 150,
1792 ds_cstr(&match), "drop;");
1793 }
1794
1795 ds_destroy(&match);
1796
1797 if (op->nbs->n_port_security) {
1798 build_port_security_ip(P_OUT, op, lflows);
1799 }
1800 }
1801 }
1802
1803 static bool
1804 lrport_is_enabled(const struct nbrec_logical_router_port *lrport)
1805 {
1806 return !lrport->enabled || *lrport->enabled;
1807 }
1808
1809 static void
1810 add_route(struct hmap *lflows, const struct ovn_port *op,
1811 ovs_be32 network, ovs_be32 mask, ovs_be32 gateway)
1812 {
1813 char *match = xasprintf("ip4.dst == "IP_FMT"/"IP_FMT,
1814 IP_ARGS(network), IP_ARGS(mask));
1815
1816 struct ds actions = DS_EMPTY_INITIALIZER;
1817 ds_put_cstr(&actions, "ip.ttl--; reg0 = ");
1818 if (gateway) {
1819 ds_put_format(&actions, IP_FMT, IP_ARGS(gateway));
1820 } else {
1821 ds_put_cstr(&actions, "ip4.dst");
1822 }
1823 ds_put_format(&actions,
1824 "; "
1825 "reg1 = "IP_FMT"; "
1826 "eth.src = "ETH_ADDR_FMT"; "
1827 "outport = %s; "
1828 "next;",
1829 IP_ARGS(op->ip), ETH_ADDR_ARGS(op->mac), op->json_key);
1830
1831 /* The priority here is calculated to implement longest-prefix-match
1832 * routing. */
1833 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_ROUTING,
1834 count_1bits(ntohl(mask)), match, ds_cstr(&actions));
1835 ds_destroy(&actions);
1836 free(match);
1837 }
1838
1839 static void
1840 build_static_route_flow(struct hmap *lflows, struct ovn_datapath *od,
1841 struct hmap *ports,
1842 const struct nbrec_logical_router_static_route *route)
1843 {
1844 ovs_be32 prefix, next_hop, mask;
1845
1846 /* Verify that next hop is an IP address with 32 bits mask. */
1847 char *error = ip_parse_masked(route->nexthop, &next_hop, &mask);
1848 if (error || mask != OVS_BE32_MAX) {
1849 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1850 VLOG_WARN_RL(&rl, "bad next hop ip address %s", route->nexthop);
1851 free(error);
1852 return;
1853 }
1854
1855 /* Verify that ip prefix is a valid CIDR address. */
1856 error = ip_parse_masked(route->ip_prefix, &prefix, &mask);
1857 if (error || !ip_is_cidr(mask)) {
1858 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1859 VLOG_WARN_RL(&rl, "bad 'network' in static routes %s",
1860 route->ip_prefix);
1861 free(error);
1862 return;
1863 }
1864
1865 /* Find the outgoing port. */
1866 struct ovn_port *out_port = NULL;
1867 if (route->output_port) {
1868 out_port = ovn_port_find(ports, route->output_port);
1869 if (!out_port) {
1870 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1871 VLOG_WARN_RL(&rl, "Bad out port %s for static route %s",
1872 route->output_port, route->ip_prefix);
1873 return;
1874 }
1875 } else {
1876 /* output_port is not specified, find the
1877 * router port matching the next hop. */
1878 int i;
1879 for (i = 0; i < od->nbr->n_ports; i++) {
1880 struct nbrec_logical_router_port *lrp = od->nbr->ports[i];
1881 out_port = ovn_port_find(ports, lrp->name);
1882 if (!out_port) {
1883 /* This should not happen. */
1884 continue;
1885 }
1886
1887 if (out_port->network
1888 && !((out_port->network ^ next_hop) & out_port->mask)) {
1889 /* There should be only 1 interface that matches the next hop.
1890 * Otherwise, it's a configuration error, because subnets of
1891 * router's interfaces should NOT overlap. */
1892 break;
1893 }
1894 }
1895 if (i == od->nbr->n_ports) {
1896 /* There is no matched out port. */
1897 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1898 VLOG_WARN_RL(&rl, "No path for static route %s; next hop %s",
1899 route->ip_prefix, route->nexthop);
1900 return;
1901 }
1902 }
1903
1904 add_route(lflows, out_port, prefix, mask, next_hop);
1905 }
1906
1907 static void
1908 build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
1909 struct hmap *lflows)
1910 {
1911 /* This flow table structure is documented in ovn-northd(8), so please
1912 * update ovn-northd.8.xml if you change anything. */
1913
1914 /* Logical router ingress table 0: Admission control framework. */
1915 struct ovn_datapath *od;
1916 HMAP_FOR_EACH (od, key_node, datapaths) {
1917 if (!od->nbr) {
1918 continue;
1919 }
1920
1921 /* Logical VLANs not supported.
1922 * Broadcast/multicast source address is invalid. */
1923 ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100,
1924 "vlan.present || eth.src[40]", "drop;");
1925 }
1926
1927 /* Logical router ingress table 0: match (priority 50). */
1928 struct ovn_port *op;
1929 HMAP_FOR_EACH (op, key_node, ports) {
1930 if (!op->nbr) {
1931 continue;
1932 }
1933
1934 if (!lrport_is_enabled(op->nbr)) {
1935 /* Drop packets from disabled logical ports (since logical flow
1936 * tables are default-drop). */
1937 continue;
1938 }
1939
1940 char *match = xasprintf(
1941 "(eth.mcast || eth.dst == "ETH_ADDR_FMT") && inport == %s",
1942 ETH_ADDR_ARGS(op->mac), op->json_key);
1943 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
1944 match, "next;");
1945 free(match);
1946 }
1947
1948 /* Logical router ingress table 1: IP Input. */
1949 HMAP_FOR_EACH (od, key_node, datapaths) {
1950 if (!od->nbr) {
1951 continue;
1952 }
1953
1954 /* L3 admission control: drop multicast and broadcast source, localhost
1955 * source or destination, and zero network source or destination
1956 * (priority 100). */
1957 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100,
1958 "ip4.mcast || "
1959 "ip4.src == 255.255.255.255 || "
1960 "ip4.src == 127.0.0.0/8 || "
1961 "ip4.dst == 127.0.0.0/8 || "
1962 "ip4.src == 0.0.0.0/8 || "
1963 "ip4.dst == 0.0.0.0/8",
1964 "drop;");
1965
1966 /* ARP reply handling. Use ARP replies to populate the logical
1967 * router's ARP table. */
1968 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "arp.op == 2",
1969 "put_arp(inport, arp.spa, arp.sha);");
1970
1971 /* Drop Ethernet local broadcast. By definition this traffic should
1972 * not be forwarded.*/
1973 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
1974 "eth.bcast", "drop;");
1975
1976 /* Drop IP multicast. */
1977 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
1978 "ip4.mcast", "drop;");
1979
1980 /* TTL discard.
1981 *
1982 * XXX Need to send ICMP time exceeded if !ip.later_frag. */
1983 char *match = xasprintf("ip4 && ip.ttl == {0, 1}");
1984 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30, match, "drop;");
1985 free(match);
1986
1987 /* Pass other traffic not already handled to the next table for
1988 * routing. */
1989 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;");
1990 }
1991
1992 HMAP_FOR_EACH (op, key_node, ports) {
1993 if (!op->nbr) {
1994 continue;
1995 }
1996
1997 /* L3 admission control: drop packets that originate from an IP address
1998 * owned by the router or a broadcast address known to the router
1999 * (priority 100). */
2000 char *match = xasprintf("ip4.src == {"IP_FMT", "IP_FMT"}",
2001 IP_ARGS(op->ip), IP_ARGS(op->bcast));
2002 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
2003 match, "drop;");
2004 free(match);
2005
2006 /* ICMP echo reply. These flows reply to ICMP echo requests
2007 * received for the router's IP address. Since packets only
2008 * get here as part of the logical router datapath, the inport
2009 * (i.e. the incoming locally attached net) does not matter.
2010 * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */
2011 match = xasprintf(
2012 "ip4.dst == "IP_FMT" && icmp4.type == 8 && icmp4.code == 0",
2013 IP_ARGS(op->ip));
2014 char *actions = xasprintf(
2015 "ip4.dst = ip4.src; "
2016 "ip4.src = "IP_FMT"; "
2017 "ip.ttl = 255; "
2018 "icmp4.type = 0; "
2019 "inport = \"\"; /* Allow sending out inport. */ "
2020 "next; ",
2021 IP_ARGS(op->ip));
2022 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
2023 match, actions);
2024 free(match);
2025 free(actions);
2026
2027 /* ARP reply. These flows reply to ARP requests for the router's own
2028 * IP address. */
2029 match = xasprintf(
2030 "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1",
2031 op->json_key, IP_ARGS(op->ip));
2032 actions = xasprintf(
2033 "eth.dst = eth.src; "
2034 "eth.src = "ETH_ADDR_FMT"; "
2035 "arp.op = 2; /* ARP reply */ "
2036 "arp.tha = arp.sha; "
2037 "arp.sha = "ETH_ADDR_FMT"; "
2038 "arp.tpa = arp.spa; "
2039 "arp.spa = "IP_FMT"; "
2040 "outport = %s; "
2041 "inport = \"\"; /* Allow sending out inport. */ "
2042 "output;",
2043 ETH_ADDR_ARGS(op->mac),
2044 ETH_ADDR_ARGS(op->mac),
2045 IP_ARGS(op->ip),
2046 op->json_key);
2047 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
2048 match, actions);
2049 free(match);
2050 free(actions);
2051
2052 /* ARP handling for external IP addresses.
2053 *
2054 * DNAT IP addresses are external IP addresses that need ARP
2055 * handling. */
2056 for (int i = 0; i < op->od->nbr->n_nat; i++) {
2057 const struct nbrec_nat *nat;
2058
2059 nat = op->od->nbr->nat[i];
2060
2061 if(!strcmp(nat->type, "snat")) {
2062 continue;
2063 }
2064
2065 ovs_be32 ip;
2066 if (!ip_parse(nat->external_ip, &ip) || !ip) {
2067 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2068 VLOG_WARN_RL(&rl, "bad ip address %s in dnat configuration "
2069 "for router %s", nat->external_ip, op->key);
2070 continue;
2071 }
2072
2073 match = xasprintf(
2074 "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1",
2075 op->json_key, IP_ARGS(ip));
2076 actions = xasprintf(
2077 "eth.dst = eth.src; "
2078 "eth.src = "ETH_ADDR_FMT"; "
2079 "arp.op = 2; /* ARP reply */ "
2080 "arp.tha = arp.sha; "
2081 "arp.sha = "ETH_ADDR_FMT"; "
2082 "arp.tpa = arp.spa; "
2083 "arp.spa = "IP_FMT"; "
2084 "outport = %s; "
2085 "inport = \"\"; /* Allow sending out inport. */ "
2086 "output;",
2087 ETH_ADDR_ARGS(op->mac),
2088 ETH_ADDR_ARGS(op->mac),
2089 IP_ARGS(ip),
2090 op->json_key);
2091 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
2092 match, actions);
2093 free(match);
2094 free(actions);
2095 }
2096
2097 /* Drop IP traffic to this router, unless the router ip is used as
2098 * SNAT ip. */
2099 bool snat_ip_is_router_ip = false;
2100 for (int i = 0; i < op->od->nbr->n_nat; i++) {
2101 const struct nbrec_nat *nat;
2102 ovs_be32 ip;
2103
2104 nat = op->od->nbr->nat[i];
2105 if (strcmp(nat->type, "snat")) {
2106 continue;
2107 }
2108
2109 if (!ip_parse(nat->external_ip, &ip) || !ip) {
2110 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2111 VLOG_WARN_RL(&rl, "bad ip address %s in snat configuration "
2112 "for router %s", nat->external_ip, op->key);
2113 continue;
2114 }
2115
2116 if (ip == op->ip) {
2117 snat_ip_is_router_ip = true;
2118 break;
2119 }
2120 }
2121
2122 if (!snat_ip_is_router_ip) {
2123 match = xasprintf("ip4.dst == "IP_FMT, IP_ARGS(op->ip));
2124 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60, match,
2125 "drop;");
2126 free(match);
2127 }
2128 }
2129
2130 /* NAT in Gateway routers. */
2131 HMAP_FOR_EACH (od, key_node, datapaths) {
2132 if (!od->nbr) {
2133 continue;
2134 }
2135
2136 /* Packets are allowed by default. */
2137 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;");
2138 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;");
2139 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;");
2140
2141 /* NAT rules are only valid on Gateway routers. */
2142 if (!smap_get(&od->nbr->options, "chassis")) {
2143 continue;
2144 }
2145
2146 for (int i = 0; i < od->nbr->n_nat; i++) {
2147 const struct nbrec_nat *nat;
2148
2149 nat = od->nbr->nat[i];
2150
2151 ovs_be32 ip, mask;
2152
2153 char *error = ip_parse_masked(nat->external_ip, &ip, &mask);
2154 if (error || mask != OVS_BE32_MAX) {
2155 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2156 VLOG_WARN_RL(&rl, "bad external ip %s for nat",
2157 nat->external_ip);
2158 free(error);
2159 continue;
2160 }
2161
2162 /* Check the validity of nat->logical_ip. 'logical_ip' can
2163 * be a subnet when the type is "snat". */
2164 error = ip_parse_masked(nat->logical_ip, &ip, &mask);
2165 if (!strcmp(nat->type, "snat")) {
2166 if (error) {
2167 static struct vlog_rate_limit rl =
2168 VLOG_RATE_LIMIT_INIT(5, 1);
2169 VLOG_WARN_RL(&rl, "bad ip network or ip %s for snat "
2170 "in router "UUID_FMT"",
2171 nat->logical_ip, UUID_ARGS(&od->key));
2172 free(error);
2173 continue;
2174 }
2175 } else {
2176 if (error || mask != OVS_BE32_MAX) {
2177 static struct vlog_rate_limit rl =
2178 VLOG_RATE_LIMIT_INIT(5, 1);
2179 VLOG_WARN_RL(&rl, "bad ip %s for dnat in router "
2180 ""UUID_FMT"", nat->logical_ip, UUID_ARGS(&od->key));
2181 free(error);
2182 continue;
2183 }
2184 }
2185
2186
2187 char *match, *actions;
2188
2189 /* Ingress UNSNAT table: It is for already established connections'
2190 * reverse traffic. i.e., SNAT has already been done in egress
2191 * pipeline and now the packet has entered the ingress pipeline as
2192 * part of a reply. We undo the SNAT here.
2193 *
2194 * Undoing SNAT has to happen before DNAT processing. This is
2195 * because when the packet was DNATed in ingress pipeline, it did
2196 * not know about the possibility of eventual additional SNAT in
2197 * egress pipeline. */
2198 if (!strcmp(nat->type, "snat")
2199 || !strcmp(nat->type, "dnat_and_snat")) {
2200 match = xasprintf("ip && ip4.dst == %s", nat->external_ip);
2201 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 100,
2202 match, "ct_snat; next;");
2203 free(match);
2204 }
2205
2206 /* Ingress DNAT table: Packets enter the pipeline with destination
2207 * IP address that needs to be DNATted from a external IP address
2208 * to a logical IP address. */
2209 if (!strcmp(nat->type, "dnat")
2210 || !strcmp(nat->type, "dnat_and_snat")) {
2211 /* Packet when it goes from the initiator to destination.
2212 * We need to zero the inport because the router can
2213 * send the packet back through the same interface. */
2214 match = xasprintf("ip && ip4.dst == %s", nat->external_ip);
2215 actions = xasprintf("inport = \"\"; ct_dnat(%s);",
2216 nat->logical_ip);
2217 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100,
2218 match, actions);
2219 free(match);
2220 free(actions);
2221 }
2222
2223 /* Egress SNAT table: Packets enter the egress pipeline with
2224 * source ip address that needs to be SNATted to a external ip
2225 * address. */
2226 if (!strcmp(nat->type, "snat")
2227 || !strcmp(nat->type, "dnat_and_snat")) {
2228 match = xasprintf("ip && ip4.src == %s", nat->logical_ip);
2229 actions = xasprintf("ct_snat(%s);", nat->external_ip);
2230
2231 /* The priority here is calculated such that the
2232 * nat->logical_ip with the longest mask gets a higher
2233 * priority. */
2234 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT,
2235 count_1bits(ntohl(mask)) + 1, match, actions);
2236 free(match);
2237 free(actions);
2238 }
2239 }
2240
2241 /* Re-circulate every packet through the DNAT zone.
2242 * This helps with two things.
2243 *
2244 * 1. Any packet that needs to be unDNATed in the reverse
2245 * direction gets unDNATed. Ideally this could be done in
2246 * the egress pipeline. But since the gateway router
2247 * does not have any feature that depends on the source
2248 * ip address being external IP address for IP routing,
2249 * we can do it here, saving a future re-circulation.
2250 *
2251 * 2. Any packet that was sent through SNAT zone in the
2252 * previous table automatically gets re-circulated to get
2253 * back the new destination IP address that is needed for
2254 * routing in the openflow pipeline. */
2255 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50,
2256 "ip", "inport = \"\"; ct_dnat;");
2257 }
2258
2259 /* Logical router ingress table 2: IP Routing.
2260 *
2261 * A packet that arrives at this table is an IP packet that should be
2262 * routed to the address in ip4.dst. This table sets outport to the correct
2263 * output port, eth.src to the output port's MAC address, and reg0 to the
2264 * next-hop IP address (leaving ip4.dst, the packet’s final destination,
2265 * unchanged), and advances to the next table for ARP resolution. */
2266 HMAP_FOR_EACH (op, key_node, ports) {
2267 if (!op->nbr) {
2268 continue;
2269 }
2270
2271 add_route(lflows, op, op->network, op->mask, 0);
2272 }
2273 HMAP_FOR_EACH (od, key_node, datapaths) {
2274 if (!od->nbr) {
2275 continue;
2276 }
2277
2278 /* Convert the static routes to flows. */
2279 for (int i = 0; i < od->nbr->n_static_routes; i++) {
2280 const struct nbrec_logical_router_static_route *route;
2281
2282 route = od->nbr->static_routes[i];
2283 build_static_route_flow(lflows, od, ports, route);
2284 }
2285
2286 if (od->gateway && od->gateway_port) {
2287 add_route(lflows, od->gateway_port, 0, 0, od->gateway);
2288 }
2289 }
2290 /* XXX destination unreachable */
2291
2292 /* Local router ingress table 3: ARP Resolution.
2293 *
2294 * Any packet that reaches this table is an IP packet whose next-hop IP
2295 * address is in reg0. (ip4.dst is the final destination.) This table
2296 * resolves the IP address in reg0 into an output port in outport and an
2297 * Ethernet address in eth.dst. */
2298 HMAP_FOR_EACH (op, key_node, ports) {
2299 if (op->nbr) {
2300 /* This is a logical router port. If next-hop IP address in 'reg0'
2301 * matches ip address of this router port, then the packet is
2302 * intended to eventually be sent to this logical port. Set the
2303 * destination mac address using this port's mac address.
2304 *
2305 * The packet is still in peer's logical pipeline. So the match
2306 * should be on peer's outport. */
2307 if (op->nbr->peer) {
2308 struct ovn_port *peer = ovn_port_find(ports, op->nbr->peer);
2309 if (!peer) {
2310 continue;
2311 }
2312
2313 if (!peer->ip || !op->ip) {
2314 continue;
2315 }
2316 char *match = xasprintf("outport == %s && reg0 == "IP_FMT,
2317 peer->json_key, IP_ARGS(op->ip));
2318 char *actions = xasprintf("eth.dst = "ETH_ADDR_FMT"; "
2319 "next;", ETH_ADDR_ARGS(op->mac));
2320 ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE,
2321 100, match, actions);
2322 free(actions);
2323 free(match);
2324 }
2325 } else if (op->od->n_router_ports && strcmp(op->nbs->type, "router")) {
2326 /* This is a logical switch port that backs a VM or a container.
2327 * Extract its addresses. For each of the address, go through all
2328 * the router ports attached to the switch (to which this port
2329 * connects) and if the address in question is reachable from the
2330 * router port, add an ARP entry in that router's pipeline. */
2331
2332 for (size_t i = 0; i < op->nbs->n_addresses; i++) {
2333 struct lport_addresses laddrs;
2334 if (!extract_lsp_addresses(op->nbs->addresses[i], &laddrs,
2335 false)) {
2336 continue;
2337 }
2338
2339 for (size_t k = 0; k < laddrs.n_ipv4_addrs; k++) {
2340 ovs_be32 ip = laddrs.ipv4_addrs[k].addr;
2341 for (size_t j = 0; j < op->od->n_router_ports; j++) {
2342 /* Get the Logical_Router_Port that the
2343 * Logical_Switch_Port is connected to, as
2344 * 'peer'. */
2345 const char *peer_name = smap_get(
2346 &op->od->router_ports[j]->nbs->options,
2347 "router-port");
2348 if (!peer_name) {
2349 continue;
2350 }
2351
2352 struct ovn_port *peer
2353 = ovn_port_find(ports, peer_name);
2354 if (!peer || !peer->nbr) {
2355 continue;
2356 }
2357
2358 /* Make sure that 'ip' is in 'peer''s network. */
2359 if ((ip ^ peer->network) & peer->mask) {
2360 continue;
2361 }
2362
2363 char *match = xasprintf(
2364 "outport == %s && reg0 == "IP_FMT,
2365 peer->json_key, IP_ARGS(ip));
2366 char *actions = xasprintf("eth.dst = "ETH_ADDR_FMT"; "
2367 "next;",
2368 ETH_ADDR_ARGS(laddrs.ea));
2369 ovn_lflow_add(lflows, peer->od,
2370 S_ROUTER_IN_ARP_RESOLVE,
2371 100, match, actions);
2372 free(actions);
2373 free(match);
2374 break;
2375 }
2376 }
2377
2378 free(laddrs.ipv4_addrs);
2379 }
2380 } else if (!strcmp(op->nbs->type, "router")) {
2381 /* This is a logical switch port that connects to a router. */
2382
2383 /* The peer of this switch port is the router port for which
2384 * we need to add logical flows such that it can resolve
2385 * ARP entries for all the other router ports connected to
2386 * the switch in question. */
2387
2388 const char *peer_name = smap_get(&op->nbs->options,
2389 "router-port");
2390 if (!peer_name) {
2391 continue;
2392 }
2393
2394 struct ovn_port *peer = ovn_port_find(ports, peer_name);
2395 if (!peer || !peer->nbr || !peer->ip) {
2396 continue;
2397 }
2398
2399 for (size_t j = 0; j < op->od->n_router_ports; j++) {
2400 const char *router_port_name = smap_get(
2401 &op->od->router_ports[j]->nbs->options,
2402 "router-port");
2403 struct ovn_port *router_port = ovn_port_find(ports,
2404 router_port_name);
2405 if (!router_port || !router_port->nbr || !router_port->ip) {
2406 continue;
2407 }
2408
2409 /* Skip the router port under consideration. */
2410 if (router_port == peer) {
2411 continue;
2412 }
2413
2414 if (!router_port->ip) {
2415 continue;
2416 }
2417 char *match = xasprintf("outport == %s && reg0 == "IP_FMT,
2418 peer->json_key,
2419 IP_ARGS(router_port->ip));
2420 char *actions = xasprintf("eth.dst = "ETH_ADDR_FMT"; next;",
2421 ETH_ADDR_ARGS(router_port->mac));
2422 ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE,
2423 100, match, actions);
2424 free(actions);
2425 free(match);
2426 }
2427 }
2428 }
2429
2430 HMAP_FOR_EACH (od, key_node, datapaths) {
2431 if (!od->nbr) {
2432 continue;
2433 }
2434
2435 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "1",
2436 "get_arp(outport, reg0); next;");
2437 }
2438
2439 /* Local router ingress table 4: ARP request.
2440 *
2441 * In the common case where the Ethernet destination has been resolved,
2442 * this table outputs the packet (priority 100). Otherwise, it composes
2443 * and sends an ARP request (priority 0). */
2444 HMAP_FOR_EACH (od, key_node, datapaths) {
2445 if (!od->nbr) {
2446 continue;
2447 }
2448
2449 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100,
2450 "eth.dst == 00:00:00:00:00:00",
2451 "arp { "
2452 "eth.dst = ff:ff:ff:ff:ff:ff; "
2453 "arp.spa = reg1; "
2454 "arp.op = 1; " /* ARP request */
2455 "output; "
2456 "};");
2457 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1", "output;");
2458 }
2459
2460 /* Logical router egress table 1: Delivery (priority 100).
2461 *
2462 * Priority 100 rules deliver packets to enabled logical ports. */
2463 HMAP_FOR_EACH (op, key_node, ports) {
2464 if (!op->nbr) {
2465 continue;
2466 }
2467
2468 if (!lrport_is_enabled(op->nbr)) {
2469 /* Drop packets to disabled logical ports (since logical flow
2470 * tables are default-drop). */
2471 continue;
2472 }
2473
2474 char *match = xasprintf("outport == %s", op->json_key);
2475 ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100,
2476 match, "output;");
2477 free(match);
2478 }
2479 }
2480
2481 /* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
2482 * constructing their contents based on the OVN_NB database. */
2483 static void
2484 build_lflows(struct northd_context *ctx, struct hmap *datapaths,
2485 struct hmap *ports)
2486 {
2487 struct hmap lflows = HMAP_INITIALIZER(&lflows);
2488 struct hmap mcgroups = HMAP_INITIALIZER(&mcgroups);
2489
2490 build_lswitch_flows(datapaths, ports, &lflows, &mcgroups);
2491 build_lrouter_flows(datapaths, ports, &lflows);
2492
2493 /* Push changes to the Logical_Flow table to database. */
2494 const struct sbrec_logical_flow *sbflow, *next_sbflow;
2495 SBREC_LOGICAL_FLOW_FOR_EACH_SAFE (sbflow, next_sbflow, ctx->ovnsb_idl) {
2496 struct ovn_datapath *od
2497 = ovn_datapath_from_sbrec(datapaths, sbflow->logical_datapath);
2498 if (!od) {
2499 sbrec_logical_flow_delete(sbflow);
2500 continue;
2501 }
2502
2503 enum ovn_datapath_type dp_type = od->nbs ? DP_SWITCH : DP_ROUTER;
2504 enum ovn_pipeline pipeline
2505 = !strcmp(sbflow->pipeline, "ingress") ? P_IN : P_OUT;
2506 struct ovn_lflow *lflow = ovn_lflow_find(
2507 &lflows, od, ovn_stage_build(dp_type, pipeline, sbflow->table_id),
2508 sbflow->priority, sbflow->match, sbflow->actions);
2509 if (lflow) {
2510 ovn_lflow_destroy(&lflows, lflow);
2511 } else {
2512 sbrec_logical_flow_delete(sbflow);
2513 }
2514 }
2515 struct ovn_lflow *lflow, *next_lflow;
2516 HMAP_FOR_EACH_SAFE (lflow, next_lflow, hmap_node, &lflows) {
2517 enum ovn_pipeline pipeline = ovn_stage_get_pipeline(lflow->stage);
2518 uint8_t table = ovn_stage_get_table(lflow->stage);
2519
2520 sbflow = sbrec_logical_flow_insert(ctx->ovnsb_txn);
2521 sbrec_logical_flow_set_logical_datapath(sbflow, lflow->od->sb);
2522 sbrec_logical_flow_set_pipeline(
2523 sbflow, pipeline == P_IN ? "ingress" : "egress");
2524 sbrec_logical_flow_set_table_id(sbflow, table);
2525 sbrec_logical_flow_set_priority(sbflow, lflow->priority);
2526 sbrec_logical_flow_set_match(sbflow, lflow->match);
2527 sbrec_logical_flow_set_actions(sbflow, lflow->actions);
2528
2529 const struct smap ids = SMAP_CONST1(&ids, "stage-name",
2530 ovn_stage_to_str(lflow->stage));
2531 sbrec_logical_flow_set_external_ids(sbflow, &ids);
2532
2533 ovn_lflow_destroy(&lflows, lflow);
2534 }
2535 hmap_destroy(&lflows);
2536
2537 /* Push changes to the Multicast_Group table to database. */
2538 const struct sbrec_multicast_group *sbmc, *next_sbmc;
2539 SBREC_MULTICAST_GROUP_FOR_EACH_SAFE (sbmc, next_sbmc, ctx->ovnsb_idl) {
2540 struct ovn_datapath *od = ovn_datapath_from_sbrec(datapaths,
2541 sbmc->datapath);
2542 if (!od) {
2543 sbrec_multicast_group_delete(sbmc);
2544 continue;
2545 }
2546
2547 struct multicast_group group = { .name = sbmc->name,
2548 .key = sbmc->tunnel_key };
2549 struct ovn_multicast *mc = ovn_multicast_find(&mcgroups, od, &group);
2550 if (mc) {
2551 ovn_multicast_update_sbrec(mc, sbmc);
2552 ovn_multicast_destroy(&mcgroups, mc);
2553 } else {
2554 sbrec_multicast_group_delete(sbmc);
2555 }
2556 }
2557 struct ovn_multicast *mc, *next_mc;
2558 HMAP_FOR_EACH_SAFE (mc, next_mc, hmap_node, &mcgroups) {
2559 sbmc = sbrec_multicast_group_insert(ctx->ovnsb_txn);
2560 sbrec_multicast_group_set_datapath(sbmc, mc->datapath->sb);
2561 sbrec_multicast_group_set_name(sbmc, mc->group->name);
2562 sbrec_multicast_group_set_tunnel_key(sbmc, mc->group->key);
2563 ovn_multicast_update_sbrec(mc, sbmc);
2564 ovn_multicast_destroy(&mcgroups, mc);
2565 }
2566 hmap_destroy(&mcgroups);
2567 }
2568 \f
2569 static void
2570 ovnnb_db_run(struct northd_context *ctx)
2571 {
2572 if (!ctx->ovnsb_txn) {
2573 return;
2574 }
2575 struct hmap datapaths, ports;
2576 build_datapaths(ctx, &datapaths);
2577 build_ports(ctx, &datapaths, &ports);
2578 build_lflows(ctx, &datapaths, &ports);
2579
2580 struct ovn_datapath *dp, *next_dp;
2581 HMAP_FOR_EACH_SAFE (dp, next_dp, key_node, &datapaths) {
2582 ovn_datapath_destroy(&datapaths, dp);
2583 }
2584 hmap_destroy(&datapaths);
2585
2586 struct ovn_port *port, *next_port;
2587 HMAP_FOR_EACH_SAFE (port, next_port, key_node, &ports) {
2588 ovn_port_destroy(&ports, port);
2589 }
2590 hmap_destroy(&ports);
2591 }
2592
2593 /*
2594 * The only change we get notified about is if the 'chassis' column of the
2595 * 'Port_Binding' table changes. When this column is not empty, it means we
2596 * need to set the corresponding logical port as 'up' in the northbound DB.
2597 */
2598 static void
2599 ovnsb_db_run(struct northd_context *ctx)
2600 {
2601 if (!ctx->ovnnb_txn) {
2602 return;
2603 }
2604 struct hmap lports_hmap;
2605 const struct sbrec_port_binding *sb;
2606 const struct nbrec_logical_switch_port *nb;
2607
2608 struct lport_hash_node {
2609 struct hmap_node node;
2610 const struct nbrec_logical_switch_port *nb;
2611 } *hash_node;
2612
2613 hmap_init(&lports_hmap);
2614
2615 NBREC_LOGICAL_SWITCH_PORT_FOR_EACH(nb, ctx->ovnnb_idl) {
2616 hash_node = xzalloc(sizeof *hash_node);
2617 hash_node->nb = nb;
2618 hmap_insert(&lports_hmap, &hash_node->node, hash_string(nb->name, 0));
2619 }
2620
2621 SBREC_PORT_BINDING_FOR_EACH(sb, ctx->ovnsb_idl) {
2622 nb = NULL;
2623 HMAP_FOR_EACH_WITH_HASH(hash_node, node,
2624 hash_string(sb->logical_port, 0),
2625 &lports_hmap) {
2626 if (!strcmp(sb->logical_port, hash_node->nb->name)) {
2627 nb = hash_node->nb;
2628 break;
2629 }
2630 }
2631
2632 if (!nb) {
2633 /* The logical port doesn't exist for this port binding. This can
2634 * happen under normal circumstances when ovn-northd hasn't gotten
2635 * around to pruning the Port_Binding yet. */
2636 continue;
2637 }
2638
2639 if (sb->chassis && (!nb->up || !*nb->up)) {
2640 bool up = true;
2641 nbrec_logical_switch_port_set_up(nb, &up, 1);
2642 } else if (!sb->chassis && (!nb->up || *nb->up)) {
2643 bool up = false;
2644 nbrec_logical_switch_port_set_up(nb, &up, 1);
2645 }
2646 }
2647
2648 HMAP_FOR_EACH_POP(hash_node, node, &lports_hmap) {
2649 free(hash_node);
2650 }
2651 hmap_destroy(&lports_hmap);
2652 }
2653 \f
2654
2655 static char *default_nb_db_;
2656
2657 static const char *
2658 default_nb_db(void)
2659 {
2660 if (!default_nb_db_) {
2661 default_nb_db_ = xasprintf("unix:%s/ovnnb_db.sock", ovs_rundir());
2662 }
2663 return default_nb_db_;
2664 }
2665
2666 static char *default_sb_db_;
2667
2668 static const char *
2669 default_sb_db(void)
2670 {
2671 if (!default_sb_db_) {
2672 default_sb_db_ = xasprintf("unix:%s/ovnsb_db.sock", ovs_rundir());
2673 }
2674 return default_sb_db_;
2675 }
2676
2677 static void
2678 parse_options(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
2679 {
2680 enum {
2681 DAEMON_OPTION_ENUMS,
2682 VLOG_OPTION_ENUMS,
2683 };
2684 static const struct option long_options[] = {
2685 {"ovnsb-db", required_argument, NULL, 'd'},
2686 {"ovnnb-db", required_argument, NULL, 'D'},
2687 {"help", no_argument, NULL, 'h'},
2688 {"options", no_argument, NULL, 'o'},
2689 {"version", no_argument, NULL, 'V'},
2690 DAEMON_LONG_OPTIONS,
2691 VLOG_LONG_OPTIONS,
2692 STREAM_SSL_LONG_OPTIONS,
2693 {NULL, 0, NULL, 0},
2694 };
2695 char *short_options = ovs_cmdl_long_options_to_short_options(long_options);
2696
2697 for (;;) {
2698 int c;
2699
2700 c = getopt_long(argc, argv, short_options, long_options, NULL);
2701 if (c == -1) {
2702 break;
2703 }
2704
2705 switch (c) {
2706 DAEMON_OPTION_HANDLERS;
2707 VLOG_OPTION_HANDLERS;
2708 STREAM_SSL_OPTION_HANDLERS;
2709
2710 case 'd':
2711 ovnsb_db = optarg;
2712 break;
2713
2714 case 'D':
2715 ovnnb_db = optarg;
2716 break;
2717
2718 case 'h':
2719 usage();
2720 exit(EXIT_SUCCESS);
2721
2722 case 'o':
2723 ovs_cmdl_print_options(long_options);
2724 exit(EXIT_SUCCESS);
2725
2726 case 'V':
2727 ovs_print_version(0, 0);
2728 exit(EXIT_SUCCESS);
2729
2730 default:
2731 break;
2732 }
2733 }
2734
2735 if (!ovnsb_db) {
2736 ovnsb_db = default_sb_db();
2737 }
2738
2739 if (!ovnnb_db) {
2740 ovnnb_db = default_nb_db();
2741 }
2742
2743 free(short_options);
2744 }
2745
2746 static void
2747 add_column_noalert(struct ovsdb_idl *idl,
2748 const struct ovsdb_idl_column *column)
2749 {
2750 ovsdb_idl_add_column(idl, column);
2751 ovsdb_idl_omit_alert(idl, column);
2752 }
2753
2754 int
2755 main(int argc, char *argv[])
2756 {
2757 int res = EXIT_SUCCESS;
2758 struct unixctl_server *unixctl;
2759 int retval;
2760 bool exiting;
2761
2762 fatal_ignore_sigpipe();
2763 set_program_name(argv[0]);
2764 service_start(&argc, &argv);
2765 parse_options(argc, argv);
2766
2767 daemonize_start(false);
2768
2769 retval = unixctl_server_create(NULL, &unixctl);
2770 if (retval) {
2771 exit(EXIT_FAILURE);
2772 }
2773 unixctl_command_register("exit", "", 0, 0, ovn_northd_exit, &exiting);
2774
2775 daemonize_complete();
2776
2777 nbrec_init();
2778 sbrec_init();
2779
2780 /* We want to detect all changes to the ovn-nb db. */
2781 struct ovsdb_idl_loop ovnnb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
2782 ovsdb_idl_create(ovnnb_db, &nbrec_idl_class, true, true));
2783
2784 struct ovsdb_idl_loop ovnsb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
2785 ovsdb_idl_create(ovnsb_db, &sbrec_idl_class, false, true));
2786
2787 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_logical_flow);
2788 add_column_noalert(ovnsb_idl_loop.idl,
2789 &sbrec_logical_flow_col_logical_datapath);
2790 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_pipeline);
2791 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_table_id);
2792 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_priority);
2793 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_match);
2794 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_actions);
2795
2796 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_multicast_group);
2797 add_column_noalert(ovnsb_idl_loop.idl,
2798 &sbrec_multicast_group_col_datapath);
2799 add_column_noalert(ovnsb_idl_loop.idl,
2800 &sbrec_multicast_group_col_tunnel_key);
2801 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_name);
2802 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_ports);
2803
2804 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_datapath_binding);
2805 add_column_noalert(ovnsb_idl_loop.idl,
2806 &sbrec_datapath_binding_col_tunnel_key);
2807 add_column_noalert(ovnsb_idl_loop.idl,
2808 &sbrec_datapath_binding_col_external_ids);
2809
2810 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_port_binding);
2811 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_datapath);
2812 add_column_noalert(ovnsb_idl_loop.idl,
2813 &sbrec_port_binding_col_logical_port);
2814 add_column_noalert(ovnsb_idl_loop.idl,
2815 &sbrec_port_binding_col_tunnel_key);
2816 add_column_noalert(ovnsb_idl_loop.idl,
2817 &sbrec_port_binding_col_parent_port);
2818 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_tag);
2819 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_type);
2820 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_options);
2821 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_mac);
2822 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_port_binding_col_chassis);
2823
2824 /* Main loop. */
2825 exiting = false;
2826 while (!exiting) {
2827 struct northd_context ctx = {
2828 .ovnnb_idl = ovnnb_idl_loop.idl,
2829 .ovnnb_txn = ovsdb_idl_loop_run(&ovnnb_idl_loop),
2830 .ovnsb_idl = ovnsb_idl_loop.idl,
2831 .ovnsb_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop),
2832 };
2833
2834 ovnnb_db_run(&ctx);
2835 ovnsb_db_run(&ctx);
2836
2837 unixctl_server_run(unixctl);
2838 unixctl_server_wait(unixctl);
2839 if (exiting) {
2840 poll_immediate_wake();
2841 }
2842 ovsdb_idl_loop_commit_and_wait(&ovnnb_idl_loop);
2843 ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);
2844
2845 poll_block();
2846 if (should_service_stop()) {
2847 exiting = true;
2848 }
2849 }
2850
2851 unixctl_server_destroy(unixctl);
2852 ovsdb_idl_loop_destroy(&ovnnb_idl_loop);
2853 ovsdb_idl_loop_destroy(&ovnsb_idl_loop);
2854 service_stop();
2855
2856 free(default_nb_db_);
2857 free(default_sb_db_);
2858 exit(res);
2859 }
2860
2861 static void
2862 ovn_northd_exit(struct unixctl_conn *conn, int argc OVS_UNUSED,
2863 const char *argv[] OVS_UNUSED, void *exiting_)
2864 {
2865 bool *exiting = exiting_;
2866 *exiting = true;
2867
2868 unixctl_command_reply(conn, NULL);
2869 }