]> git.proxmox.com Git - ovs.git/blob - ovn/northd/ovn-northd.c
ovn-northd: fix monitor process naming
[ovs.git] / ovn / northd / ovn-northd.c
1 /*
2 * Licensed under the Apache License, Version 2.0 (the "License");
3 * you may not use this file except in compliance with the License.
4 * You may obtain a copy of the License at:
5 *
6 * http://www.apache.org/licenses/LICENSE-2.0
7 *
8 * Unless required by applicable law or agreed to in writing, software
9 * distributed under the License is distributed on an "AS IS" BASIS,
10 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 * See the License for the specific language governing permissions and
12 * limitations under the License.
13 */
14
15 #include <config.h>
16
17 #include <getopt.h>
18 #include <stdlib.h>
19 #include <stdio.h>
20
21 #include "bitmap.h"
22 #include "command-line.h"
23 #include "daemon.h"
24 #include "dirs.h"
25 #include "openvswitch/dynamic-string.h"
26 #include "fatal-signal.h"
27 #include "hash.h"
28 #include "openvswitch/hmap.h"
29 #include "openvswitch/json.h"
30 #include "ovn/lex.h"
31 #include "ovn/lib/ovn-dhcp.h"
32 #include "ovn/lib/ovn-nb-idl.h"
33 #include "ovn/lib/ovn-sb-idl.h"
34 #include "ovn/lib/ovn-util.h"
35 #include "ovn/actions.h"
36 #include "packets.h"
37 #include "poll-loop.h"
38 #include "smap.h"
39 #include "sset.h"
40 #include "stream.h"
41 #include "stream-ssl.h"
42 #include "unixctl.h"
43 #include "util.h"
44 #include "uuid.h"
45 #include "openvswitch/vlog.h"
46
47 VLOG_DEFINE_THIS_MODULE(ovn_northd);
48
49 static unixctl_cb_func ovn_northd_exit;
50
51 struct northd_context {
52 struct ovsdb_idl *ovnnb_idl;
53 struct ovsdb_idl *ovnsb_idl;
54 struct ovsdb_idl_txn *ovnnb_txn;
55 struct ovsdb_idl_txn *ovnsb_txn;
56 };
57
58 static const char *ovnnb_db;
59 static const char *ovnsb_db;
60
61 #define MAC_ADDR_PREFIX 0x0A0000000000ULL
62 #define MAC_ADDR_SPACE 0xffffff
63
64 /* MAC address management (macam) table of "struct eth_addr"s, that holds the
65 * MAC addresses allocated by the OVN ipam module. */
66 static struct hmap macam = HMAP_INITIALIZER(&macam);
67
68 #define MAX_OVN_TAGS 4096
69 \f
70 /* Pipeline stages. */
71
72 /* The two pipelines in an OVN logical flow table. */
73 enum ovn_pipeline {
74 P_IN, /* Ingress pipeline. */
75 P_OUT /* Egress pipeline. */
76 };
77
78 /* The two purposes for which ovn-northd uses OVN logical datapaths. */
79 enum ovn_datapath_type {
80 DP_SWITCH, /* OVN logical switch. */
81 DP_ROUTER /* OVN logical router. */
82 };
83
84 /* Returns an "enum ovn_stage" built from the arguments.
85 *
86 * (It's better to use ovn_stage_build() for type-safety reasons, but inline
87 * functions can't be used in enums or switch cases.) */
88 #define OVN_STAGE_BUILD(DP_TYPE, PIPELINE, TABLE) \
89 (((DP_TYPE) << 9) | ((PIPELINE) << 8) | (TABLE))
90
91 /* A stage within an OVN logical switch or router.
92 *
93 * An "enum ovn_stage" indicates whether the stage is part of a logical switch
94 * or router, whether the stage is part of the ingress or egress pipeline, and
95 * the table within that pipeline. The first three components are combined to
96 * form the stage's full name, e.g. S_SWITCH_IN_PORT_SEC_L2,
97 * S_ROUTER_OUT_DELIVERY. */
98 enum ovn_stage {
99 #define PIPELINE_STAGES \
100 /* Logical switch ingress stages. */ \
101 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_L2, 0, "ls_in_port_sec_l2") \
102 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_IP, 1, "ls_in_port_sec_ip") \
103 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_ND, 2, "ls_in_port_sec_nd") \
104 PIPELINE_STAGE(SWITCH, IN, PRE_ACL, 3, "ls_in_pre_acl") \
105 PIPELINE_STAGE(SWITCH, IN, PRE_LB, 4, "ls_in_pre_lb") \
106 PIPELINE_STAGE(SWITCH, IN, PRE_STATEFUL, 5, "ls_in_pre_stateful") \
107 PIPELINE_STAGE(SWITCH, IN, ACL, 6, "ls_in_acl") \
108 PIPELINE_STAGE(SWITCH, IN, QOS_MARK, 7, "ls_in_qos_mark") \
109 PIPELINE_STAGE(SWITCH, IN, LB, 8, "ls_in_lb") \
110 PIPELINE_STAGE(SWITCH, IN, STATEFUL, 9, "ls_in_stateful") \
111 PIPELINE_STAGE(SWITCH, IN, ARP_ND_RSP, 10, "ls_in_arp_rsp") \
112 PIPELINE_STAGE(SWITCH, IN, DHCP_OPTIONS, 11, "ls_in_dhcp_options") \
113 PIPELINE_STAGE(SWITCH, IN, DHCP_RESPONSE, 12, "ls_in_dhcp_response") \
114 PIPELINE_STAGE(SWITCH, IN, L2_LKUP, 13, "ls_in_l2_lkup") \
115 \
116 /* Logical switch egress stages. */ \
117 PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 0, "ls_out_pre_lb") \
118 PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 1, "ls_out_pre_acl") \
119 PIPELINE_STAGE(SWITCH, OUT, PRE_STATEFUL, 2, "ls_out_pre_stateful") \
120 PIPELINE_STAGE(SWITCH, OUT, LB, 3, "ls_out_lb") \
121 PIPELINE_STAGE(SWITCH, OUT, ACL, 4, "ls_out_acl") \
122 PIPELINE_STAGE(SWITCH, OUT, QOS_MARK, 5, "ls_out_qos_mark") \
123 PIPELINE_STAGE(SWITCH, OUT, STATEFUL, 6, "ls_out_stateful") \
124 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_IP, 7, "ls_out_port_sec_ip") \
125 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_L2, 8, "ls_out_port_sec_l2") \
126 \
127 /* Logical router ingress stages. */ \
128 PIPELINE_STAGE(ROUTER, IN, ADMISSION, 0, "lr_in_admission") \
129 PIPELINE_STAGE(ROUTER, IN, IP_INPUT, 1, "lr_in_ip_input") \
130 PIPELINE_STAGE(ROUTER, IN, DEFRAG, 2, "lr_in_defrag") \
131 PIPELINE_STAGE(ROUTER, IN, UNSNAT, 3, "lr_in_unsnat") \
132 PIPELINE_STAGE(ROUTER, IN, DNAT, 4, "lr_in_dnat") \
133 PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 5, "lr_in_ip_routing") \
134 PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 6, "lr_in_arp_resolve") \
135 PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 7, "lr_in_arp_request") \
136 \
137 /* Logical router egress stages. */ \
138 PIPELINE_STAGE(ROUTER, OUT, SNAT, 0, "lr_out_snat") \
139 PIPELINE_STAGE(ROUTER, OUT, DELIVERY, 1, "lr_out_delivery")
140
141 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
142 S_##DP_TYPE##_##PIPELINE##_##STAGE \
143 = OVN_STAGE_BUILD(DP_##DP_TYPE, P_##PIPELINE, TABLE),
144 PIPELINE_STAGES
145 #undef PIPELINE_STAGE
146 };
147
148 /* Due to various hard-coded priorities need to implement ACLs, the
149 * northbound database supports a smaller range of ACL priorities than
150 * are available to logical flows. This value is added to an ACL
151 * priority to determine the ACL's logical flow priority. */
152 #define OVN_ACL_PRI_OFFSET 1000
153
154 #define REGBIT_CONNTRACK_DEFRAG "reg0[0]"
155 #define REGBIT_CONNTRACK_COMMIT "reg0[1]"
156 #define REGBIT_CONNTRACK_NAT "reg0[2]"
157 #define REGBIT_DHCP_OPTS_RESULT "reg0[3]"
158
159 /* Returns an "enum ovn_stage" built from the arguments. */
160 static enum ovn_stage
161 ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline,
162 uint8_t table)
163 {
164 return OVN_STAGE_BUILD(dp_type, pipeline, table);
165 }
166
167 /* Returns the pipeline to which 'stage' belongs. */
168 static enum ovn_pipeline
169 ovn_stage_get_pipeline(enum ovn_stage stage)
170 {
171 return (stage >> 8) & 1;
172 }
173
174 /* Returns the table to which 'stage' belongs. */
175 static uint8_t
176 ovn_stage_get_table(enum ovn_stage stage)
177 {
178 return stage & 0xff;
179 }
180
181 /* Returns a string name for 'stage'. */
182 static const char *
183 ovn_stage_to_str(enum ovn_stage stage)
184 {
185 switch (stage) {
186 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
187 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return NAME;
188 PIPELINE_STAGES
189 #undef PIPELINE_STAGE
190 default: return "<unknown>";
191 }
192 }
193
194 /* Returns the type of the datapath to which a flow with the given 'stage' may
195 * be added. */
196 static enum ovn_datapath_type
197 ovn_stage_to_datapath_type(enum ovn_stage stage)
198 {
199 switch (stage) {
200 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
201 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return DP_##DP_TYPE;
202 PIPELINE_STAGES
203 #undef PIPELINE_STAGE
204 default: OVS_NOT_REACHED();
205 }
206 }
207 \f
208 static void
209 usage(void)
210 {
211 printf("\
212 %s: OVN northbound management daemon\n\
213 usage: %s [OPTIONS]\n\
214 \n\
215 Options:\n\
216 --ovnnb-db=DATABASE connect to ovn-nb database at DATABASE\n\
217 (default: %s)\n\
218 --ovnsb-db=DATABASE connect to ovn-sb database at DATABASE\n\
219 (default: %s)\n\
220 -h, --help display this help message\n\
221 -o, --options list available options\n\
222 -V, --version display version information\n\
223 ", program_name, program_name, default_nb_db(), default_sb_db());
224 daemon_usage();
225 vlog_usage();
226 stream_usage("database", true, true, false);
227 }
228 \f
229 struct tnlid_node {
230 struct hmap_node hmap_node;
231 uint32_t tnlid;
232 };
233
234 static void
235 destroy_tnlids(struct hmap *tnlids)
236 {
237 struct tnlid_node *node;
238 HMAP_FOR_EACH_POP (node, hmap_node, tnlids) {
239 free(node);
240 }
241 hmap_destroy(tnlids);
242 }
243
244 static void
245 add_tnlid(struct hmap *set, uint32_t tnlid)
246 {
247 struct tnlid_node *node = xmalloc(sizeof *node);
248 hmap_insert(set, &node->hmap_node, hash_int(tnlid, 0));
249 node->tnlid = tnlid;
250 }
251
252 static bool
253 tnlid_in_use(const struct hmap *set, uint32_t tnlid)
254 {
255 const struct tnlid_node *node;
256 HMAP_FOR_EACH_IN_BUCKET (node, hmap_node, hash_int(tnlid, 0), set) {
257 if (node->tnlid == tnlid) {
258 return true;
259 }
260 }
261 return false;
262 }
263
264 static uint32_t
265 allocate_tnlid(struct hmap *set, const char *name, uint32_t max,
266 uint32_t *hint)
267 {
268 for (uint32_t tnlid = *hint + 1; tnlid != *hint;
269 tnlid = tnlid + 1 <= max ? tnlid + 1 : 1) {
270 if (!tnlid_in_use(set, tnlid)) {
271 add_tnlid(set, tnlid);
272 *hint = tnlid;
273 return tnlid;
274 }
275 }
276
277 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
278 VLOG_WARN_RL(&rl, "all %s tunnel ids exhausted", name);
279 return 0;
280 }
281 \f
282 struct ovn_chassis_qdisc_queues {
283 struct hmap_node key_node;
284 uint32_t queue_id;
285 struct uuid chassis_uuid;
286 };
287
288 static void
289 destroy_chassis_queues(struct hmap *set)
290 {
291 struct ovn_chassis_qdisc_queues *node;
292 HMAP_FOR_EACH_POP (node, key_node, set) {
293 free(node);
294 }
295 hmap_destroy(set);
296 }
297
298 static void
299 add_chassis_queue(struct hmap *set, struct uuid *chassis_uuid,
300 uint32_t queue_id)
301 {
302 struct ovn_chassis_qdisc_queues *node = xmalloc(sizeof *node);
303 node->queue_id = queue_id;
304 memcpy(&node->chassis_uuid, chassis_uuid, sizeof node->chassis_uuid);
305 hmap_insert(set, &node->key_node, uuid_hash(chassis_uuid));
306 }
307
308 static bool
309 chassis_queueid_in_use(const struct hmap *set, struct uuid *chassis_uuid,
310 uint32_t queue_id)
311 {
312 const struct ovn_chassis_qdisc_queues *node;
313 HMAP_FOR_EACH_WITH_HASH (node, key_node, uuid_hash(chassis_uuid), set) {
314 if (uuid_equals(chassis_uuid, &node->chassis_uuid)
315 && node->queue_id == queue_id) {
316 return true;
317 }
318 }
319 return false;
320 }
321
322 static uint32_t
323 allocate_chassis_queueid(struct hmap *set, struct sbrec_chassis *chassis)
324 {
325 for (uint32_t queue_id = QDISC_MIN_QUEUE_ID + 1;
326 queue_id <= QDISC_MAX_QUEUE_ID;
327 queue_id++) {
328 if (!chassis_queueid_in_use(set, &chassis->header_.uuid, queue_id)) {
329 add_chassis_queue(set, &chassis->header_.uuid, queue_id);
330 return queue_id;
331 }
332 }
333
334 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
335 VLOG_WARN_RL(&rl, "all %s queue ids exhausted", chassis->name);
336 return 0;
337 }
338
339 static void
340 free_chassis_queueid(struct hmap *set, struct sbrec_chassis *chassis,
341 uint32_t queue_id)
342 {
343 struct ovn_chassis_qdisc_queues *node;
344 HMAP_FOR_EACH_WITH_HASH (node, key_node,
345 uuid_hash(&chassis->header_.uuid),
346 set) {
347 if (uuid_equals(&chassis->header_.uuid, &node->chassis_uuid)
348 && node->queue_id == queue_id) {
349 hmap_remove(set, &node->key_node);
350 break;
351 }
352 }
353 }
354
355 static inline bool
356 port_has_qos_params(const struct smap *opts)
357 {
358 return (smap_get(opts, "qos_max_rate") ||
359 smap_get(opts, "qos_burst"));
360 }
361 \f
362 /* The 'key' comes from nbs->header_.uuid or nbr->header_.uuid or
363 * sb->external_ids:logical-switch. */
364 struct ovn_datapath {
365 struct hmap_node key_node; /* Index on 'key'. */
366 struct uuid key; /* (nbs/nbr)->header_.uuid. */
367
368 const struct nbrec_logical_switch *nbs; /* May be NULL. */
369 const struct nbrec_logical_router *nbr; /* May be NULL. */
370 const struct sbrec_datapath_binding *sb; /* May be NULL. */
371
372 struct ovs_list list; /* In list of similar records. */
373
374 /* Logical switch data. */
375 struct ovn_port **router_ports;
376 size_t n_router_ports;
377
378 struct hmap port_tnlids;
379 uint32_t port_key_hint;
380
381 bool has_unknown;
382
383 /* IPAM data. */
384 struct hmap ipam;
385 };
386
387 struct macam_node {
388 struct hmap_node hmap_node;
389 struct eth_addr mac_addr; /* Allocated MAC address. */
390 };
391
392 static void
393 cleanup_macam(struct hmap *macam)
394 {
395 struct macam_node *node;
396 HMAP_FOR_EACH_POP (node, hmap_node, macam) {
397 free(node);
398 }
399 }
400
401 struct ipam_node {
402 struct hmap_node hmap_node;
403 uint32_t ip_addr; /* Allocated IP address. */
404 };
405
406 static void
407 destroy_ipam(struct hmap *ipam)
408 {
409 struct ipam_node *node;
410 HMAP_FOR_EACH_POP (node, hmap_node, ipam) {
411 free(node);
412 }
413 hmap_destroy(ipam);
414 }
415
416 static struct ovn_datapath *
417 ovn_datapath_create(struct hmap *datapaths, const struct uuid *key,
418 const struct nbrec_logical_switch *nbs,
419 const struct nbrec_logical_router *nbr,
420 const struct sbrec_datapath_binding *sb)
421 {
422 struct ovn_datapath *od = xzalloc(sizeof *od);
423 od->key = *key;
424 od->sb = sb;
425 od->nbs = nbs;
426 od->nbr = nbr;
427 hmap_init(&od->port_tnlids);
428 hmap_init(&od->ipam);
429 od->port_key_hint = 0;
430 hmap_insert(datapaths, &od->key_node, uuid_hash(&od->key));
431 return od;
432 }
433
434 static void
435 ovn_datapath_destroy(struct hmap *datapaths, struct ovn_datapath *od)
436 {
437 if (od) {
438 /* Don't remove od->list. It is used within build_datapaths() as a
439 * private list and once we've exited that function it is not safe to
440 * use it. */
441 hmap_remove(datapaths, &od->key_node);
442 destroy_tnlids(&od->port_tnlids);
443 destroy_ipam(&od->ipam);
444 free(od->router_ports);
445 free(od);
446 }
447 }
448
449 /* Returns 'od''s datapath type. */
450 static enum ovn_datapath_type
451 ovn_datapath_get_type(const struct ovn_datapath *od)
452 {
453 return od->nbs ? DP_SWITCH : DP_ROUTER;
454 }
455
456 static struct ovn_datapath *
457 ovn_datapath_find(struct hmap *datapaths, const struct uuid *uuid)
458 {
459 struct ovn_datapath *od;
460
461 HMAP_FOR_EACH_WITH_HASH (od, key_node, uuid_hash(uuid), datapaths) {
462 if (uuid_equals(uuid, &od->key)) {
463 return od;
464 }
465 }
466 return NULL;
467 }
468
469 static struct ovn_datapath *
470 ovn_datapath_from_sbrec(struct hmap *datapaths,
471 const struct sbrec_datapath_binding *sb)
472 {
473 struct uuid key;
474
475 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
476 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
477 return NULL;
478 }
479 return ovn_datapath_find(datapaths, &key);
480 }
481
482 static bool
483 lrouter_is_enabled(const struct nbrec_logical_router *lrouter)
484 {
485 return !lrouter->enabled || *lrouter->enabled;
486 }
487
488 static void
489 join_datapaths(struct northd_context *ctx, struct hmap *datapaths,
490 struct ovs_list *sb_only, struct ovs_list *nb_only,
491 struct ovs_list *both)
492 {
493 hmap_init(datapaths);
494 ovs_list_init(sb_only);
495 ovs_list_init(nb_only);
496 ovs_list_init(both);
497
498 const struct sbrec_datapath_binding *sb, *sb_next;
499 SBREC_DATAPATH_BINDING_FOR_EACH_SAFE (sb, sb_next, ctx->ovnsb_idl) {
500 struct uuid key;
501 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
502 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
503 ovsdb_idl_txn_add_comment(
504 ctx->ovnsb_txn,
505 "deleting Datapath_Binding "UUID_FMT" that lacks "
506 "external-ids:logical-switch and "
507 "external-ids:logical-router",
508 UUID_ARGS(&sb->header_.uuid));
509 sbrec_datapath_binding_delete(sb);
510 continue;
511 }
512
513 if (ovn_datapath_find(datapaths, &key)) {
514 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
515 VLOG_INFO_RL(
516 &rl, "deleting Datapath_Binding "UUID_FMT" with "
517 "duplicate external-ids:logical-switch/router "UUID_FMT,
518 UUID_ARGS(&sb->header_.uuid), UUID_ARGS(&key));
519 sbrec_datapath_binding_delete(sb);
520 continue;
521 }
522
523 struct ovn_datapath *od = ovn_datapath_create(datapaths, &key,
524 NULL, NULL, sb);
525 ovs_list_push_back(sb_only, &od->list);
526 }
527
528 const struct nbrec_logical_switch *nbs;
529 NBREC_LOGICAL_SWITCH_FOR_EACH (nbs, ctx->ovnnb_idl) {
530 struct ovn_datapath *od = ovn_datapath_find(datapaths,
531 &nbs->header_.uuid);
532 if (od) {
533 od->nbs = nbs;
534 ovs_list_remove(&od->list);
535 ovs_list_push_back(both, &od->list);
536 } else {
537 od = ovn_datapath_create(datapaths, &nbs->header_.uuid,
538 nbs, NULL, NULL);
539 ovs_list_push_back(nb_only, &od->list);
540 }
541 }
542
543 const struct nbrec_logical_router *nbr;
544 NBREC_LOGICAL_ROUTER_FOR_EACH (nbr, ctx->ovnnb_idl) {
545 if (!lrouter_is_enabled(nbr)) {
546 continue;
547 }
548
549 struct ovn_datapath *od = ovn_datapath_find(datapaths,
550 &nbr->header_.uuid);
551 if (od) {
552 if (!od->nbs) {
553 od->nbr = nbr;
554 ovs_list_remove(&od->list);
555 ovs_list_push_back(both, &od->list);
556 } else {
557 /* Can't happen! */
558 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
559 VLOG_WARN_RL(&rl,
560 "duplicate UUID "UUID_FMT" in OVN_Northbound",
561 UUID_ARGS(&nbr->header_.uuid));
562 continue;
563 }
564 } else {
565 od = ovn_datapath_create(datapaths, &nbr->header_.uuid,
566 NULL, nbr, NULL);
567 ovs_list_push_back(nb_only, &od->list);
568 }
569 }
570 }
571
572 static uint32_t
573 ovn_datapath_allocate_key(struct hmap *dp_tnlids)
574 {
575 static uint32_t hint;
576 return allocate_tnlid(dp_tnlids, "datapath", (1u << 24) - 1, &hint);
577 }
578
579 /* Updates the southbound Datapath_Binding table so that it contains the
580 * logical switches and routers specified by the northbound database.
581 *
582 * Initializes 'datapaths' to contain a "struct ovn_datapath" for every logical
583 * switch and router. */
584 static void
585 build_datapaths(struct northd_context *ctx, struct hmap *datapaths)
586 {
587 struct ovs_list sb_only, nb_only, both;
588
589 join_datapaths(ctx, datapaths, &sb_only, &nb_only, &both);
590
591 if (!ovs_list_is_empty(&nb_only)) {
592 /* First index the in-use datapath tunnel IDs. */
593 struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
594 struct ovn_datapath *od;
595 LIST_FOR_EACH (od, list, &both) {
596 add_tnlid(&dp_tnlids, od->sb->tunnel_key);
597 }
598
599 /* Add southbound record for each unmatched northbound record. */
600 LIST_FOR_EACH (od, list, &nb_only) {
601 uint16_t tunnel_key = ovn_datapath_allocate_key(&dp_tnlids);
602 if (!tunnel_key) {
603 break;
604 }
605
606 od->sb = sbrec_datapath_binding_insert(ctx->ovnsb_txn);
607
608 /* Get the logical-switch or logical-router UUID to set in
609 * external-ids. */
610 char uuid_s[UUID_LEN + 1];
611 sprintf(uuid_s, UUID_FMT, UUID_ARGS(&od->key));
612 const char *key = od->nbs ? "logical-switch" : "logical-router";
613
614 /* Get name to set in external-ids. */
615 const char *name = od->nbs ? od->nbs->name : od->nbr->name;
616
617 /* Set external-ids. */
618 struct smap ids = SMAP_INITIALIZER(&ids);
619 smap_add(&ids, key, uuid_s);
620 if (*name) {
621 smap_add(&ids, "name", name);
622 }
623 sbrec_datapath_binding_set_external_ids(od->sb, &ids);
624 smap_destroy(&ids);
625
626 sbrec_datapath_binding_set_tunnel_key(od->sb, tunnel_key);
627 }
628 destroy_tnlids(&dp_tnlids);
629 }
630
631 /* Delete southbound records without northbound matches. */
632 struct ovn_datapath *od, *next;
633 LIST_FOR_EACH_SAFE (od, next, list, &sb_only) {
634 ovs_list_remove(&od->list);
635 sbrec_datapath_binding_delete(od->sb);
636 ovn_datapath_destroy(datapaths, od);
637 }
638 }
639 \f
640 struct ovn_port {
641 struct hmap_node key_node; /* Index on 'key'. */
642 char *key; /* nbs->name, nbr->name, sb->logical_port. */
643 char *json_key; /* 'key', quoted for use in JSON. */
644
645 const struct sbrec_port_binding *sb; /* May be NULL. */
646
647 /* Logical switch port data. */
648 const struct nbrec_logical_switch_port *nbsp; /* May be NULL. */
649
650 struct lport_addresses *lsp_addrs; /* Logical switch port addresses. */
651 unsigned int n_lsp_addrs;
652
653 struct lport_addresses *ps_addrs; /* Port security addresses. */
654 unsigned int n_ps_addrs;
655
656 /* Logical router port data. */
657 const struct nbrec_logical_router_port *nbrp; /* May be NULL. */
658
659 struct lport_addresses lrp_networks;
660
661 /* The port's peer:
662 *
663 * - A switch port S of type "router" has a router port R as a peer,
664 * and R in turn has S has its peer.
665 *
666 * - Two connected logical router ports have each other as peer. */
667 struct ovn_port *peer;
668
669 struct ovn_datapath *od;
670
671 struct ovs_list list; /* In list of similar records. */
672 };
673
674 static struct ovn_port *
675 ovn_port_create(struct hmap *ports, const char *key,
676 const struct nbrec_logical_switch_port *nbsp,
677 const struct nbrec_logical_router_port *nbrp,
678 const struct sbrec_port_binding *sb)
679 {
680 struct ovn_port *op = xzalloc(sizeof *op);
681
682 struct ds json_key = DS_EMPTY_INITIALIZER;
683 json_string_escape(key, &json_key);
684 op->json_key = ds_steal_cstr(&json_key);
685
686 op->key = xstrdup(key);
687 op->sb = sb;
688 op->nbsp = nbsp;
689 op->nbrp = nbrp;
690 hmap_insert(ports, &op->key_node, hash_string(op->key, 0));
691 return op;
692 }
693
694 static void
695 ovn_port_destroy(struct hmap *ports, struct ovn_port *port)
696 {
697 if (port) {
698 /* Don't remove port->list. It is used within build_ports() as a
699 * private list and once we've exited that function it is not safe to
700 * use it. */
701 hmap_remove(ports, &port->key_node);
702
703 for (int i = 0; i < port->n_lsp_addrs; i++) {
704 destroy_lport_addresses(&port->lsp_addrs[i]);
705 }
706 free(port->lsp_addrs);
707
708 for (int i = 0; i < port->n_ps_addrs; i++) {
709 destroy_lport_addresses(&port->ps_addrs[i]);
710 }
711 free(port->ps_addrs);
712
713 destroy_lport_addresses(&port->lrp_networks);
714 free(port->json_key);
715 free(port->key);
716 free(port);
717 }
718 }
719
720 static struct ovn_port *
721 ovn_port_find(struct hmap *ports, const char *name)
722 {
723 struct ovn_port *op;
724
725 HMAP_FOR_EACH_WITH_HASH (op, key_node, hash_string(name, 0), ports) {
726 if (!strcmp(op->key, name)) {
727 return op;
728 }
729 }
730 return NULL;
731 }
732
733 static uint32_t
734 ovn_port_allocate_key(struct ovn_datapath *od)
735 {
736 return allocate_tnlid(&od->port_tnlids, "port",
737 (1u << 15) - 1, &od->port_key_hint);
738 }
739
740 static bool
741 ipam_is_duplicate_mac(struct eth_addr *ea, uint64_t mac64, bool warn)
742 {
743 struct macam_node *macam_node;
744 HMAP_FOR_EACH_WITH_HASH (macam_node, hmap_node, hash_uint64(mac64),
745 &macam) {
746 if (eth_addr_equals(*ea, macam_node->mac_addr)) {
747 if (warn) {
748 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
749 VLOG_WARN_RL(&rl, "Duplicate MAC set: "ETH_ADDR_FMT,
750 ETH_ADDR_ARGS(macam_node->mac_addr));
751 }
752 return true;
753 }
754 }
755 return false;
756 }
757
758 static bool
759 ipam_is_duplicate_ip(struct ovn_datapath *od, uint32_t ip, bool warn)
760 {
761 struct ipam_node *ipam_node;
762 HMAP_FOR_EACH_WITH_HASH (ipam_node, hmap_node, hash_int(ip, 0),
763 &od->ipam) {
764 if (ipam_node->ip_addr == ip) {
765 if (warn) {
766 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
767 VLOG_WARN_RL(&rl, "Duplicate IP set: "IP_FMT,
768 IP_ARGS(htonl(ip)));
769 }
770 return true;
771 }
772 }
773 return false;
774 }
775
776 static void
777 ipam_insert_mac(struct eth_addr *ea, bool check)
778 {
779 if (!ea) {
780 return;
781 }
782
783 uint64_t mac64 = eth_addr_to_uint64(*ea);
784 /* If the new MAC was not assigned by this address management system or
785 * check is true and the new MAC is a duplicate, do not insert it into the
786 * macam hmap. */
787 if (((mac64 ^ MAC_ADDR_PREFIX) >> 24)
788 || (check && ipam_is_duplicate_mac(ea, mac64, true))) {
789 return;
790 }
791
792 struct macam_node *new_macam_node = xmalloc(sizeof *new_macam_node);
793 new_macam_node->mac_addr = *ea;
794 hmap_insert(&macam, &new_macam_node->hmap_node, hash_uint64(mac64));
795 }
796
797 static void
798 ipam_insert_ip(struct ovn_datapath *od, uint32_t ip, bool check)
799 {
800 if (!od) {
801 return;
802 }
803
804 if (check && ipam_is_duplicate_ip(od, ip, true)) {
805 return;
806 }
807
808 struct ipam_node *new_ipam_node = xmalloc(sizeof *new_ipam_node);
809 new_ipam_node->ip_addr = ip;
810 hmap_insert(&od->ipam, &new_ipam_node->hmap_node, hash_int(ip, 0));
811 }
812
813 static void
814 ipam_insert_lsp_addresses(struct ovn_datapath *od, struct ovn_port *op,
815 char *address)
816 {
817 if (!od || !op || !address || !strcmp(address, "unknown")
818 || is_dynamic_lsp_address(address)) {
819 return;
820 }
821
822 struct lport_addresses laddrs;
823 if (!extract_lsp_addresses(address, &laddrs)) {
824 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
825 VLOG_WARN_RL(&rl, "Extract addresses failed.");
826 return;
827 }
828 ipam_insert_mac(&laddrs.ea, true);
829
830 /* IP is only added to IPAM if the switch's subnet option
831 * is set, whereas MAC is always added to MACAM. */
832 if (!smap_get(&od->nbs->other_config, "subnet")) {
833 destroy_lport_addresses(&laddrs);
834 return;
835 }
836
837 for (size_t j = 0; j < laddrs.n_ipv4_addrs; j++) {
838 uint32_t ip = ntohl(laddrs.ipv4_addrs[j].addr);
839 ipam_insert_ip(od, ip, true);
840 }
841
842 destroy_lport_addresses(&laddrs);
843 }
844
845 static void
846 ipam_add_port_addresses(struct ovn_datapath *od, struct ovn_port *op)
847 {
848 if (!od || !op) {
849 return;
850 }
851
852 if (op->nbsp) {
853 /* Add all the port's addresses to address data structures. */
854 for (size_t i = 0; i < op->nbsp->n_addresses; i++) {
855 ipam_insert_lsp_addresses(od, op, op->nbsp->addresses[i]);
856 }
857 if (op->nbsp->dynamic_addresses) {
858 ipam_insert_lsp_addresses(od, op, op->nbsp->dynamic_addresses);
859 }
860 } else if (op->nbrp) {
861 struct lport_addresses lrp_networks;
862 if (!extract_lrp_networks(op->nbrp, &lrp_networks)) {
863 static struct vlog_rate_limit rl
864 = VLOG_RATE_LIMIT_INIT(1, 1);
865 VLOG_WARN_RL(&rl, "Extract addresses failed.");
866 return;
867 }
868 ipam_insert_mac(&lrp_networks.ea, true);
869
870 if (!op->peer || !op->peer->nbsp || !op->peer->od || !op->peer->od->nbs
871 || !smap_get(&op->peer->od->nbs->other_config, "subnet")) {
872 destroy_lport_addresses(&lrp_networks);
873 return;
874 }
875
876 for (size_t i = 0; i < lrp_networks.n_ipv4_addrs; i++) {
877 uint32_t ip = ntohl(lrp_networks.ipv4_addrs[i].addr);
878 ipam_insert_ip(op->peer->od, ip, true);
879 }
880
881 destroy_lport_addresses(&lrp_networks);
882 }
883 }
884
885 static uint64_t
886 ipam_get_unused_mac(void)
887 {
888 /* Stores the suffix of the most recently ipam-allocated MAC address. */
889 static uint32_t last_mac;
890
891 uint64_t mac64;
892 struct eth_addr mac;
893 uint32_t mac_addr_suffix, i;
894 for (i = 0; i < MAC_ADDR_SPACE - 1; i++) {
895 /* The tentative MAC's suffix will be in the interval (1, 0xfffffe). */
896 mac_addr_suffix = ((last_mac + i) % (MAC_ADDR_SPACE - 1)) + 1;
897 mac64 = MAC_ADDR_PREFIX | mac_addr_suffix;
898 eth_addr_from_uint64(mac64, &mac);
899 if (!ipam_is_duplicate_mac(&mac, mac64, false)) {
900 last_mac = mac_addr_suffix;
901 break;
902 }
903 }
904
905 if (i == MAC_ADDR_SPACE) {
906 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
907 VLOG_WARN_RL(&rl, "MAC address space exhausted.");
908 mac64 = 0;
909 }
910
911 return mac64;
912 }
913
914 static uint32_t
915 ipam_get_unused_ip(struct ovn_datapath *od, uint32_t subnet, uint32_t mask)
916 {
917 if (!od) {
918 return 0;
919 }
920
921 uint32_t ip = 0;
922
923 /* Find an unused IP address in subnet. x.x.x.1 is reserved for a
924 * logical router port. */
925 for (uint32_t i = 2; i < ~mask; i++) {
926 uint32_t tentative_ip = subnet + i;
927 if (!ipam_is_duplicate_ip(od, tentative_ip, false)) {
928 ip = tentative_ip;
929 break;
930 }
931 }
932
933 if (!ip) {
934 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
935 VLOG_WARN_RL( &rl, "Subnet address space has been exhausted.");
936 }
937
938 return ip;
939 }
940
941 static bool
942 ipam_allocate_addresses(struct ovn_datapath *od, struct ovn_port *op,
943 const char *addrspec, ovs_be32 subnet, ovs_be32 mask)
944 {
945 if (!od || !op || !op->nbsp) {
946 return false;
947 }
948
949 uint32_t ip = ipam_get_unused_ip(od, ntohl(subnet), ntohl(mask));
950 if (!ip) {
951 return false;
952 }
953
954 struct eth_addr mac;
955 bool check_mac;
956 int n = 0;
957
958 if (ovs_scan(addrspec, ETH_ADDR_SCAN_FMT" dynamic%n",
959 ETH_ADDR_SCAN_ARGS(mac), &n)
960 && addrspec[n] == '\0') {
961 check_mac = true;
962 } else {
963 uint64_t mac64 = ipam_get_unused_mac();
964 if (!mac64) {
965 return false;
966 }
967 eth_addr_from_uint64(mac64, &mac);
968 check_mac = false;
969 }
970
971 /* Add MAC/IP to MACAM/IPAM hmaps if both addresses were allocated
972 * successfully. */
973 ipam_insert_ip(od, ip, false);
974 ipam_insert_mac(&mac, check_mac);
975
976 char *new_addr = xasprintf(ETH_ADDR_FMT" "IP_FMT,
977 ETH_ADDR_ARGS(mac), IP_ARGS(htonl(ip)));
978 nbrec_logical_switch_port_set_dynamic_addresses(op->nbsp, new_addr);
979 free(new_addr);
980
981 return true;
982 }
983
984 static void
985 build_ipam(struct hmap *datapaths, struct hmap *ports)
986 {
987 /* IPAM generally stands for IP address management. In non-virtualized
988 * world, MAC addresses come with the hardware. But, with virtualized
989 * workloads, they need to be assigned and managed. This function
990 * does both IP address management (ipam) and MAC address management
991 * (macam). */
992
993 /* If the switch's other_config:subnet is set, allocate new addresses for
994 * ports that have the "dynamic" keyword in their addresses column. */
995 struct ovn_datapath *od;
996 HMAP_FOR_EACH (od, key_node, datapaths) {
997 if (od->nbs) {
998 const char *subnet_str = smap_get(&od->nbs->other_config,
999 "subnet");
1000 if (!subnet_str) {
1001 continue;
1002 }
1003
1004 ovs_be32 subnet, mask;
1005 char *error = ip_parse_masked(subnet_str, &subnet, &mask);
1006 if (error || mask == OVS_BE32_MAX || !ip_is_cidr(mask)) {
1007 static struct vlog_rate_limit rl
1008 = VLOG_RATE_LIMIT_INIT(5, 1);
1009 VLOG_WARN_RL(&rl, "bad 'subnet' %s", subnet_str);
1010 free(error);
1011 continue;
1012 }
1013
1014 struct ovn_port *op;
1015 for (size_t i = 0; i < od->nbs->n_ports; i++) {
1016 const struct nbrec_logical_switch_port *nbsp =
1017 od->nbs->ports[i];
1018
1019 if (!nbsp) {
1020 continue;
1021 }
1022
1023 op = ovn_port_find(ports, nbsp->name);
1024 if (!op || (op->nbsp && op->peer)) {
1025 /* Do not allocate addresses for logical switch ports that
1026 * have a peer. */
1027 continue;
1028 }
1029
1030 for (size_t j = 0; j < nbsp->n_addresses; j++) {
1031 if (is_dynamic_lsp_address(nbsp->addresses[j])
1032 && !nbsp->dynamic_addresses) {
1033 if (!ipam_allocate_addresses(od, op,
1034 nbsp->addresses[j], subnet, mask)
1035 || !extract_lsp_addresses(nbsp->dynamic_addresses,
1036 &op->lsp_addrs[op->n_lsp_addrs])) {
1037 static struct vlog_rate_limit rl
1038 = VLOG_RATE_LIMIT_INIT(1, 1);
1039 VLOG_INFO_RL(&rl, "Failed to allocate address.");
1040 } else {
1041 op->n_lsp_addrs++;
1042 }
1043 break;
1044 }
1045 }
1046 }
1047 }
1048 }
1049 }
1050 \f
1051 /* Tag allocation for nested containers.
1052 *
1053 * For a logical switch port with 'parent_name' and a request to allocate tags,
1054 * keeps a track of all allocated tags. */
1055 struct tag_alloc_node {
1056 struct hmap_node hmap_node;
1057 char *parent_name;
1058 unsigned long *allocated_tags; /* A bitmap to track allocated tags. */
1059 };
1060
1061 static void
1062 tag_alloc_destroy(struct hmap *tag_alloc_table)
1063 {
1064 struct tag_alloc_node *node;
1065 HMAP_FOR_EACH_POP (node, hmap_node, tag_alloc_table) {
1066 bitmap_free(node->allocated_tags);
1067 free(node->parent_name);
1068 free(node);
1069 }
1070 hmap_destroy(tag_alloc_table);
1071 }
1072
1073 static struct tag_alloc_node *
1074 tag_alloc_get_node(struct hmap *tag_alloc_table, const char *parent_name)
1075 {
1076 /* If a node for the 'parent_name' exists, return it. */
1077 struct tag_alloc_node *tag_alloc_node;
1078 HMAP_FOR_EACH_WITH_HASH (tag_alloc_node, hmap_node,
1079 hash_string(parent_name, 0),
1080 tag_alloc_table) {
1081 if (!strcmp(tag_alloc_node->parent_name, parent_name)) {
1082 return tag_alloc_node;
1083 }
1084 }
1085
1086 /* Create a new node. */
1087 tag_alloc_node = xmalloc(sizeof *tag_alloc_node);
1088 tag_alloc_node->parent_name = xstrdup(parent_name);
1089 tag_alloc_node->allocated_tags = bitmap_allocate(MAX_OVN_TAGS);
1090 /* Tag 0 is invalid for nested containers. */
1091 bitmap_set1(tag_alloc_node->allocated_tags, 0);
1092 hmap_insert(tag_alloc_table, &tag_alloc_node->hmap_node,
1093 hash_string(parent_name, 0));
1094
1095 return tag_alloc_node;
1096 }
1097
1098 static void
1099 tag_alloc_add_existing_tags(struct hmap *tag_alloc_table,
1100 const struct nbrec_logical_switch_port *nbsp)
1101 {
1102 /* Add the tags of already existing nested containers. If there is no
1103 * 'nbsp->parent_name' or no 'nbsp->tag' set, there is nothing to do. */
1104 if (!nbsp->parent_name || !nbsp->parent_name[0] || !nbsp->tag) {
1105 return;
1106 }
1107
1108 struct tag_alloc_node *tag_alloc_node;
1109 tag_alloc_node = tag_alloc_get_node(tag_alloc_table, nbsp->parent_name);
1110 bitmap_set1(tag_alloc_node->allocated_tags, *nbsp->tag);
1111 }
1112
1113 static void
1114 tag_alloc_create_new_tag(struct hmap *tag_alloc_table,
1115 const struct nbrec_logical_switch_port *nbsp)
1116 {
1117 if (!nbsp->tag_request) {
1118 return;
1119 }
1120
1121 if (nbsp->parent_name && nbsp->parent_name[0]
1122 && *nbsp->tag_request == 0) {
1123 /* For nested containers that need allocation, do the allocation. */
1124
1125 if (nbsp->tag) {
1126 /* This has already been allocated. */
1127 return;
1128 }
1129
1130 struct tag_alloc_node *tag_alloc_node;
1131 int64_t tag;
1132 tag_alloc_node = tag_alloc_get_node(tag_alloc_table,
1133 nbsp->parent_name);
1134 tag = bitmap_scan(tag_alloc_node->allocated_tags, 0, 1, MAX_OVN_TAGS);
1135 if (tag == MAX_OVN_TAGS) {
1136 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1137 VLOG_ERR_RL(&rl, "out of vlans for logical switch ports with "
1138 "parent %s", nbsp->parent_name);
1139 return;
1140 }
1141 bitmap_set1(tag_alloc_node->allocated_tags, tag);
1142 nbrec_logical_switch_port_set_tag(nbsp, &tag, 1);
1143 } else if (*nbsp->tag_request != 0) {
1144 /* For everything else, copy the contents of 'tag_request' to 'tag'. */
1145 nbrec_logical_switch_port_set_tag(nbsp, nbsp->tag_request, 1);
1146 }
1147 }
1148 \f
1149
1150 static void
1151 join_logical_ports(struct northd_context *ctx,
1152 struct hmap *datapaths, struct hmap *ports,
1153 struct hmap *chassis_qdisc_queues,
1154 struct hmap *tag_alloc_table, struct ovs_list *sb_only,
1155 struct ovs_list *nb_only, struct ovs_list *both)
1156 {
1157 hmap_init(ports);
1158 ovs_list_init(sb_only);
1159 ovs_list_init(nb_only);
1160 ovs_list_init(both);
1161
1162 const struct sbrec_port_binding *sb;
1163 SBREC_PORT_BINDING_FOR_EACH (sb, ctx->ovnsb_idl) {
1164 struct ovn_port *op = ovn_port_create(ports, sb->logical_port,
1165 NULL, NULL, sb);
1166 ovs_list_push_back(sb_only, &op->list);
1167 }
1168
1169 struct ovn_datapath *od;
1170 HMAP_FOR_EACH (od, key_node, datapaths) {
1171 if (od->nbs) {
1172 for (size_t i = 0; i < od->nbs->n_ports; i++) {
1173 const struct nbrec_logical_switch_port *nbsp
1174 = od->nbs->ports[i];
1175 struct ovn_port *op = ovn_port_find(ports, nbsp->name);
1176 if (op) {
1177 if (op->nbsp || op->nbrp) {
1178 static struct vlog_rate_limit rl
1179 = VLOG_RATE_LIMIT_INIT(5, 1);
1180 VLOG_WARN_RL(&rl, "duplicate logical port %s",
1181 nbsp->name);
1182 continue;
1183 }
1184 op->nbsp = nbsp;
1185 ovs_list_remove(&op->list);
1186
1187 uint32_t queue_id = smap_get_int(&op->sb->options,
1188 "qdisc_queue_id", 0);
1189 if (queue_id && op->sb->chassis) {
1190 add_chassis_queue(
1191 chassis_qdisc_queues, &op->sb->chassis->header_.uuid,
1192 queue_id);
1193 }
1194
1195 ovs_list_push_back(both, &op->list);
1196
1197 /* This port exists due to a SB binding, but should
1198 * not have been initialized fully. */
1199 ovs_assert(!op->n_lsp_addrs && !op->n_ps_addrs);
1200 } else {
1201 op = ovn_port_create(ports, nbsp->name, nbsp, NULL, NULL);
1202 ovs_list_push_back(nb_only, &op->list);
1203 }
1204
1205 op->lsp_addrs
1206 = xmalloc(sizeof *op->lsp_addrs * nbsp->n_addresses);
1207 for (size_t j = 0; j < nbsp->n_addresses; j++) {
1208 if (!strcmp(nbsp->addresses[j], "unknown")) {
1209 continue;
1210 }
1211 if (is_dynamic_lsp_address(nbsp->addresses[j])) {
1212 if (nbsp->dynamic_addresses) {
1213 if (!extract_lsp_addresses(nbsp->dynamic_addresses,
1214 &op->lsp_addrs[op->n_lsp_addrs])) {
1215 static struct vlog_rate_limit rl
1216 = VLOG_RATE_LIMIT_INIT(1, 1);
1217 VLOG_INFO_RL(&rl, "invalid syntax '%s' in "
1218 "logical switch port "
1219 "dynamic_addresses. No "
1220 "MAC address found",
1221 op->nbsp->dynamic_addresses);
1222 continue;
1223 }
1224 } else {
1225 continue;
1226 }
1227 } else if (!extract_lsp_addresses(nbsp->addresses[j],
1228 &op->lsp_addrs[op->n_lsp_addrs])) {
1229 static struct vlog_rate_limit rl
1230 = VLOG_RATE_LIMIT_INIT(1, 1);
1231 VLOG_INFO_RL(&rl, "invalid syntax '%s' in logical "
1232 "switch port addresses. No MAC "
1233 "address found",
1234 op->nbsp->addresses[j]);
1235 continue;
1236 }
1237 op->n_lsp_addrs++;
1238 }
1239
1240 op->ps_addrs
1241 = xmalloc(sizeof *op->ps_addrs * nbsp->n_port_security);
1242 for (size_t j = 0; j < nbsp->n_port_security; j++) {
1243 if (!extract_lsp_addresses(nbsp->port_security[j],
1244 &op->ps_addrs[op->n_ps_addrs])) {
1245 static struct vlog_rate_limit rl
1246 = VLOG_RATE_LIMIT_INIT(1, 1);
1247 VLOG_INFO_RL(&rl, "invalid syntax '%s' in port "
1248 "security. No MAC address found",
1249 op->nbsp->port_security[j]);
1250 continue;
1251 }
1252 op->n_ps_addrs++;
1253 }
1254
1255 op->od = od;
1256 ipam_add_port_addresses(od, op);
1257 tag_alloc_add_existing_tags(tag_alloc_table, nbsp);
1258 }
1259 } else {
1260 for (size_t i = 0; i < od->nbr->n_ports; i++) {
1261 const struct nbrec_logical_router_port *nbrp
1262 = od->nbr->ports[i];
1263
1264 struct lport_addresses lrp_networks;
1265 if (!extract_lrp_networks(nbrp, &lrp_networks)) {
1266 static struct vlog_rate_limit rl
1267 = VLOG_RATE_LIMIT_INIT(5, 1);
1268 VLOG_WARN_RL(&rl, "bad 'mac' %s", nbrp->mac);
1269 continue;
1270 }
1271
1272 if (!lrp_networks.n_ipv4_addrs && !lrp_networks.n_ipv6_addrs) {
1273 continue;
1274 }
1275
1276 struct ovn_port *op = ovn_port_find(ports, nbrp->name);
1277 if (op) {
1278 if (op->nbsp || op->nbrp) {
1279 static struct vlog_rate_limit rl
1280 = VLOG_RATE_LIMIT_INIT(5, 1);
1281 VLOG_WARN_RL(&rl, "duplicate logical router port %s",
1282 nbrp->name);
1283 continue;
1284 }
1285 op->nbrp = nbrp;
1286 ovs_list_remove(&op->list);
1287 ovs_list_push_back(both, &op->list);
1288
1289 /* This port exists but should not have been
1290 * initialized fully. */
1291 ovs_assert(!op->lrp_networks.n_ipv4_addrs
1292 && !op->lrp_networks.n_ipv6_addrs);
1293 } else {
1294 op = ovn_port_create(ports, nbrp->name, NULL, nbrp, NULL);
1295 ovs_list_push_back(nb_only, &op->list);
1296 }
1297
1298 op->lrp_networks = lrp_networks;
1299 op->od = od;
1300 ipam_add_port_addresses(op->od, op);
1301 }
1302 }
1303 }
1304
1305 /* Connect logical router ports, and logical switch ports of type "router",
1306 * to their peers. */
1307 struct ovn_port *op;
1308 HMAP_FOR_EACH (op, key_node, ports) {
1309 if (op->nbsp && !strcmp(op->nbsp->type, "router")) {
1310 const char *peer_name = smap_get(&op->nbsp->options, "router-port");
1311 if (!peer_name) {
1312 continue;
1313 }
1314
1315 struct ovn_port *peer = ovn_port_find(ports, peer_name);
1316 if (!peer || !peer->nbrp) {
1317 continue;
1318 }
1319
1320 peer->peer = op;
1321 op->peer = peer;
1322 op->od->router_ports = xrealloc(
1323 op->od->router_ports,
1324 sizeof *op->od->router_ports * (op->od->n_router_ports + 1));
1325 op->od->router_ports[op->od->n_router_ports++] = op;
1326 } else if (op->nbrp && op->nbrp->peer) {
1327 struct ovn_port *peer = ovn_port_find(ports, op->nbrp->peer);
1328 if (peer) {
1329 if (peer->nbrp) {
1330 op->peer = peer;
1331 } else if (peer->nbsp) {
1332 /* An ovn_port for a switch port of type "router" does have
1333 * a router port as its peer (see the case above for
1334 * "router" ports), but this is set via options:router-port
1335 * in Logical_Switch_Port and does not involve the
1336 * Logical_Router_Port's 'peer' column. */
1337 static struct vlog_rate_limit rl =
1338 VLOG_RATE_LIMIT_INIT(5, 1);
1339 VLOG_WARN_RL(&rl, "Bad configuration: The peer of router "
1340 "port %s is a switch port", op->key);
1341 }
1342 }
1343 }
1344 }
1345 }
1346
1347 static void
1348 ovn_port_update_sbrec(const struct ovn_port *op,
1349 struct hmap *chassis_qdisc_queues)
1350 {
1351 sbrec_port_binding_set_datapath(op->sb, op->od->sb);
1352 if (op->nbrp) {
1353 /* If the router is for l3 gateway, it resides on a chassis
1354 * and its port type is "l3gateway". */
1355 const char *chassis = smap_get(&op->od->nbr->options, "chassis");
1356 if (chassis) {
1357 sbrec_port_binding_set_type(op->sb, "l3gateway");
1358 } else {
1359 sbrec_port_binding_set_type(op->sb, "patch");
1360 }
1361
1362 const char *peer = op->peer ? op->peer->key : "<error>";
1363 struct smap new;
1364 smap_init(&new);
1365 smap_add(&new, "peer", peer);
1366 if (chassis) {
1367 smap_add(&new, "l3gateway-chassis", chassis);
1368 }
1369 sbrec_port_binding_set_options(op->sb, &new);
1370 smap_destroy(&new);
1371
1372 sbrec_port_binding_set_parent_port(op->sb, NULL);
1373 sbrec_port_binding_set_tag(op->sb, NULL, 0);
1374 sbrec_port_binding_set_mac(op->sb, NULL, 0);
1375 } else {
1376 if (strcmp(op->nbsp->type, "router")) {
1377 uint32_t queue_id = smap_get_int(
1378 &op->sb->options, "qdisc_queue_id", 0);
1379 bool has_qos = port_has_qos_params(&op->nbsp->options);
1380 struct smap options;
1381
1382 if (op->sb->chassis && has_qos && !queue_id) {
1383 queue_id = allocate_chassis_queueid(chassis_qdisc_queues,
1384 op->sb->chassis);
1385 } else if (!has_qos && queue_id) {
1386 free_chassis_queueid(chassis_qdisc_queues,
1387 op->sb->chassis,
1388 queue_id);
1389 queue_id = 0;
1390 }
1391
1392 smap_clone(&options, &op->nbsp->options);
1393 if (queue_id) {
1394 smap_add_format(&options,
1395 "qdisc_queue_id", "%d", queue_id);
1396 }
1397 sbrec_port_binding_set_options(op->sb, &options);
1398 smap_destroy(&options);
1399 sbrec_port_binding_set_type(op->sb, op->nbsp->type);
1400 } else {
1401 const char *chassis = NULL;
1402 if (op->peer && op->peer->od && op->peer->od->nbr) {
1403 chassis = smap_get(&op->peer->od->nbr->options, "chassis");
1404 }
1405
1406 /* A switch port connected to a gateway router is also of
1407 * type "l3gateway". */
1408 if (chassis) {
1409 sbrec_port_binding_set_type(op->sb, "l3gateway");
1410 } else {
1411 sbrec_port_binding_set_type(op->sb, "patch");
1412 }
1413
1414 const char *router_port = smap_get_def(&op->nbsp->options,
1415 "router-port", "<error>");
1416 struct smap new;
1417 smap_init(&new);
1418 smap_add(&new, "peer", router_port);
1419 if (chassis) {
1420 smap_add(&new, "l3gateway-chassis", chassis);
1421 }
1422
1423 const char *nat_addresses = smap_get(&op->nbsp->options,
1424 "nat-addresses");
1425 if (nat_addresses) {
1426 struct lport_addresses laddrs;
1427 if (!extract_lsp_addresses(nat_addresses, &laddrs)) {
1428 static struct vlog_rate_limit rl =
1429 VLOG_RATE_LIMIT_INIT(1, 1);
1430 VLOG_WARN_RL(&rl, "Error extracting nat-addresses.");
1431 } else {
1432 smap_add(&new, "nat-addresses", nat_addresses);
1433 destroy_lport_addresses(&laddrs);
1434 }
1435 }
1436 sbrec_port_binding_set_options(op->sb, &new);
1437 smap_destroy(&new);
1438 }
1439 sbrec_port_binding_set_parent_port(op->sb, op->nbsp->parent_name);
1440 sbrec_port_binding_set_tag(op->sb, op->nbsp->tag, op->nbsp->n_tag);
1441 sbrec_port_binding_set_mac(op->sb, (const char **) op->nbsp->addresses,
1442 op->nbsp->n_addresses);
1443 }
1444 }
1445
1446 /* Remove mac_binding entries that refer to logical_ports which are
1447 * deleted. */
1448 static void
1449 cleanup_mac_bindings(struct northd_context *ctx, struct hmap *ports)
1450 {
1451 const struct sbrec_mac_binding *b, *n;
1452 SBREC_MAC_BINDING_FOR_EACH_SAFE (b, n, ctx->ovnsb_idl) {
1453 if (!ovn_port_find(ports, b->logical_port)) {
1454 sbrec_mac_binding_delete(b);
1455 }
1456 }
1457 }
1458
1459 /* Updates the southbound Port_Binding table so that it contains the logical
1460 * switch ports specified by the northbound database.
1461 *
1462 * Initializes 'ports' to contain a "struct ovn_port" for every logical port,
1463 * using the "struct ovn_datapath"s in 'datapaths' to look up logical
1464 * datapaths. */
1465 static void
1466 build_ports(struct northd_context *ctx, struct hmap *datapaths,
1467 struct hmap *ports)
1468 {
1469 struct ovs_list sb_only, nb_only, both;
1470 struct hmap tag_alloc_table = HMAP_INITIALIZER(&tag_alloc_table);
1471 struct hmap chassis_qdisc_queues = HMAP_INITIALIZER(&chassis_qdisc_queues);
1472
1473 join_logical_ports(ctx, datapaths, ports, &chassis_qdisc_queues,
1474 &tag_alloc_table, &sb_only, &nb_only, &both);
1475
1476 struct ovn_port *op, *next;
1477 /* For logical ports that are in both databases, update the southbound
1478 * record based on northbound data. Also index the in-use tunnel_keys.
1479 * For logical ports that are in NB database, do any tag allocation
1480 * needed. */
1481 LIST_FOR_EACH_SAFE (op, next, list, &both) {
1482 if (op->nbsp) {
1483 tag_alloc_create_new_tag(&tag_alloc_table, op->nbsp);
1484 }
1485 ovn_port_update_sbrec(op, &chassis_qdisc_queues);
1486
1487 add_tnlid(&op->od->port_tnlids, op->sb->tunnel_key);
1488 if (op->sb->tunnel_key > op->od->port_key_hint) {
1489 op->od->port_key_hint = op->sb->tunnel_key;
1490 }
1491 }
1492
1493 /* Add southbound record for each unmatched northbound record. */
1494 LIST_FOR_EACH_SAFE (op, next, list, &nb_only) {
1495 uint16_t tunnel_key = ovn_port_allocate_key(op->od);
1496 if (!tunnel_key) {
1497 continue;
1498 }
1499
1500 op->sb = sbrec_port_binding_insert(ctx->ovnsb_txn);
1501 ovn_port_update_sbrec(op, &chassis_qdisc_queues);
1502
1503 sbrec_port_binding_set_logical_port(op->sb, op->key);
1504 sbrec_port_binding_set_tunnel_key(op->sb, tunnel_key);
1505 }
1506
1507 bool remove_mac_bindings = false;
1508 if (!ovs_list_is_empty(&sb_only)) {
1509 remove_mac_bindings = true;
1510 }
1511
1512 /* Delete southbound records without northbound matches. */
1513 LIST_FOR_EACH_SAFE(op, next, list, &sb_only) {
1514 ovs_list_remove(&op->list);
1515 sbrec_port_binding_delete(op->sb);
1516 ovn_port_destroy(ports, op);
1517 }
1518 if (remove_mac_bindings) {
1519 cleanup_mac_bindings(ctx, ports);
1520 }
1521
1522 tag_alloc_destroy(&tag_alloc_table);
1523 destroy_chassis_queues(&chassis_qdisc_queues);
1524 }
1525 \f
1526 #define OVN_MIN_MULTICAST 32768
1527 #define OVN_MAX_MULTICAST 65535
1528
1529 struct multicast_group {
1530 const char *name;
1531 uint16_t key; /* OVN_MIN_MULTICAST...OVN_MAX_MULTICAST. */
1532 };
1533
1534 #define MC_FLOOD "_MC_flood"
1535 static const struct multicast_group mc_flood = { MC_FLOOD, 65535 };
1536
1537 #define MC_UNKNOWN "_MC_unknown"
1538 static const struct multicast_group mc_unknown = { MC_UNKNOWN, 65534 };
1539
1540 static bool
1541 multicast_group_equal(const struct multicast_group *a,
1542 const struct multicast_group *b)
1543 {
1544 return !strcmp(a->name, b->name) && a->key == b->key;
1545 }
1546
1547 /* Multicast group entry. */
1548 struct ovn_multicast {
1549 struct hmap_node hmap_node; /* Index on 'datapath' and 'key'. */
1550 struct ovn_datapath *datapath;
1551 const struct multicast_group *group;
1552
1553 struct ovn_port **ports;
1554 size_t n_ports, allocated_ports;
1555 };
1556
1557 static uint32_t
1558 ovn_multicast_hash(const struct ovn_datapath *datapath,
1559 const struct multicast_group *group)
1560 {
1561 return hash_pointer(datapath, group->key);
1562 }
1563
1564 static struct ovn_multicast *
1565 ovn_multicast_find(struct hmap *mcgroups, struct ovn_datapath *datapath,
1566 const struct multicast_group *group)
1567 {
1568 struct ovn_multicast *mc;
1569
1570 HMAP_FOR_EACH_WITH_HASH (mc, hmap_node,
1571 ovn_multicast_hash(datapath, group), mcgroups) {
1572 if (mc->datapath == datapath
1573 && multicast_group_equal(mc->group, group)) {
1574 return mc;
1575 }
1576 }
1577 return NULL;
1578 }
1579
1580 static void
1581 ovn_multicast_add(struct hmap *mcgroups, const struct multicast_group *group,
1582 struct ovn_port *port)
1583 {
1584 struct ovn_datapath *od = port->od;
1585 struct ovn_multicast *mc = ovn_multicast_find(mcgroups, od, group);
1586 if (!mc) {
1587 mc = xmalloc(sizeof *mc);
1588 hmap_insert(mcgroups, &mc->hmap_node, ovn_multicast_hash(od, group));
1589 mc->datapath = od;
1590 mc->group = group;
1591 mc->n_ports = 0;
1592 mc->allocated_ports = 4;
1593 mc->ports = xmalloc(mc->allocated_ports * sizeof *mc->ports);
1594 }
1595 if (mc->n_ports >= mc->allocated_ports) {
1596 mc->ports = x2nrealloc(mc->ports, &mc->allocated_ports,
1597 sizeof *mc->ports);
1598 }
1599 mc->ports[mc->n_ports++] = port;
1600 }
1601
1602 static void
1603 ovn_multicast_destroy(struct hmap *mcgroups, struct ovn_multicast *mc)
1604 {
1605 if (mc) {
1606 hmap_remove(mcgroups, &mc->hmap_node);
1607 free(mc->ports);
1608 free(mc);
1609 }
1610 }
1611
1612 static void
1613 ovn_multicast_update_sbrec(const struct ovn_multicast *mc,
1614 const struct sbrec_multicast_group *sb)
1615 {
1616 struct sbrec_port_binding **ports = xmalloc(mc->n_ports * sizeof *ports);
1617 for (size_t i = 0; i < mc->n_ports; i++) {
1618 ports[i] = CONST_CAST(struct sbrec_port_binding *, mc->ports[i]->sb);
1619 }
1620 sbrec_multicast_group_set_ports(sb, ports, mc->n_ports);
1621 free(ports);
1622 }
1623 \f
1624 /* Logical flow generation.
1625 *
1626 * This code generates the Logical_Flow table in the southbound database, as a
1627 * function of most of the northbound database.
1628 */
1629
1630 struct ovn_lflow {
1631 struct hmap_node hmap_node;
1632
1633 struct ovn_datapath *od;
1634 enum ovn_stage stage;
1635 uint16_t priority;
1636 char *match;
1637 char *actions;
1638 const char *where;
1639 };
1640
1641 static size_t
1642 ovn_lflow_hash(const struct ovn_lflow *lflow)
1643 {
1644 size_t hash = uuid_hash(&lflow->od->key);
1645 hash = hash_2words((lflow->stage << 16) | lflow->priority, hash);
1646 hash = hash_string(lflow->match, hash);
1647 return hash_string(lflow->actions, hash);
1648 }
1649
1650 static bool
1651 ovn_lflow_equal(const struct ovn_lflow *a, const struct ovn_lflow *b)
1652 {
1653 return (a->od == b->od
1654 && a->stage == b->stage
1655 && a->priority == b->priority
1656 && !strcmp(a->match, b->match)
1657 && !strcmp(a->actions, b->actions));
1658 }
1659
1660 static void
1661 ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
1662 enum ovn_stage stage, uint16_t priority,
1663 char *match, char *actions, const char *where)
1664 {
1665 lflow->od = od;
1666 lflow->stage = stage;
1667 lflow->priority = priority;
1668 lflow->match = match;
1669 lflow->actions = actions;
1670 lflow->where = where;
1671 }
1672
1673 /* Adds a row with the specified contents to the Logical_Flow table. */
1674 static void
1675 ovn_lflow_add_at(struct hmap *lflow_map, struct ovn_datapath *od,
1676 enum ovn_stage stage, uint16_t priority,
1677 const char *match, const char *actions, const char *where)
1678 {
1679 ovs_assert(ovn_stage_to_datapath_type(stage) == ovn_datapath_get_type(od));
1680
1681 struct ovn_lflow *lflow = xmalloc(sizeof *lflow);
1682 ovn_lflow_init(lflow, od, stage, priority,
1683 xstrdup(match), xstrdup(actions), where);
1684 hmap_insert(lflow_map, &lflow->hmap_node, ovn_lflow_hash(lflow));
1685 }
1686
1687 /* Adds a row with the specified contents to the Logical_Flow table. */
1688 #define ovn_lflow_add(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, ACTIONS) \
1689 ovn_lflow_add_at(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, ACTIONS, \
1690 OVS_SOURCE_LOCATOR)
1691
1692 static struct ovn_lflow *
1693 ovn_lflow_find(struct hmap *lflows, struct ovn_datapath *od,
1694 enum ovn_stage stage, uint16_t priority,
1695 const char *match, const char *actions)
1696 {
1697 struct ovn_lflow target;
1698 ovn_lflow_init(&target, od, stage, priority,
1699 CONST_CAST(char *, match), CONST_CAST(char *, actions),
1700 NULL);
1701
1702 struct ovn_lflow *lflow;
1703 HMAP_FOR_EACH_WITH_HASH (lflow, hmap_node, ovn_lflow_hash(&target),
1704 lflows) {
1705 if (ovn_lflow_equal(lflow, &target)) {
1706 return lflow;
1707 }
1708 }
1709 return NULL;
1710 }
1711
1712 static void
1713 ovn_lflow_destroy(struct hmap *lflows, struct ovn_lflow *lflow)
1714 {
1715 if (lflow) {
1716 hmap_remove(lflows, &lflow->hmap_node);
1717 free(lflow->match);
1718 free(lflow->actions);
1719 free(lflow);
1720 }
1721 }
1722
1723 /* Appends port security constraints on L2 address field 'eth_addr_field'
1724 * (e.g. "eth.src" or "eth.dst") to 'match'. 'ps_addrs', with 'n_ps_addrs'
1725 * elements, is the collection of port_security constraints from an
1726 * OVN_NB Logical_Switch_Port row generated by extract_lsp_addresses(). */
1727 static void
1728 build_port_security_l2(const char *eth_addr_field,
1729 struct lport_addresses *ps_addrs,
1730 unsigned int n_ps_addrs,
1731 struct ds *match)
1732 {
1733 if (!n_ps_addrs) {
1734 return;
1735 }
1736
1737 ds_put_format(match, " && %s == {", eth_addr_field);
1738
1739 for (size_t i = 0; i < n_ps_addrs; i++) {
1740 ds_put_format(match, "%s ", ps_addrs[i].ea_s);
1741 }
1742 ds_chomp(match, ' ');
1743 ds_put_cstr(match, "}");
1744 }
1745
1746 static void
1747 build_port_security_ipv6_nd_flow(
1748 struct ds *match, struct eth_addr ea, struct ipv6_netaddr *ipv6_addrs,
1749 int n_ipv6_addrs)
1750 {
1751 ds_put_format(match, " && ip6 && nd && ((nd.sll == "ETH_ADDR_FMT" || "
1752 "nd.sll == "ETH_ADDR_FMT") || ((nd.tll == "ETH_ADDR_FMT" || "
1753 "nd.tll == "ETH_ADDR_FMT")", ETH_ADDR_ARGS(eth_addr_zero),
1754 ETH_ADDR_ARGS(ea), ETH_ADDR_ARGS(eth_addr_zero),
1755 ETH_ADDR_ARGS(ea));
1756 if (!n_ipv6_addrs) {
1757 ds_put_cstr(match, "))");
1758 return;
1759 }
1760
1761 char ip6_str[INET6_ADDRSTRLEN + 1];
1762 struct in6_addr lla;
1763 in6_generate_lla(ea, &lla);
1764 memset(ip6_str, 0, sizeof(ip6_str));
1765 ipv6_string_mapped(ip6_str, &lla);
1766 ds_put_format(match, " && (nd.target == %s", ip6_str);
1767
1768 for(int i = 0; i < n_ipv6_addrs; i++) {
1769 memset(ip6_str, 0, sizeof(ip6_str));
1770 ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr);
1771 ds_put_format(match, " || nd.target == %s", ip6_str);
1772 }
1773
1774 ds_put_format(match, ")))");
1775 }
1776
1777 static void
1778 build_port_security_ipv6_flow(
1779 enum ovn_pipeline pipeline, struct ds *match, struct eth_addr ea,
1780 struct ipv6_netaddr *ipv6_addrs, int n_ipv6_addrs)
1781 {
1782 char ip6_str[INET6_ADDRSTRLEN + 1];
1783
1784 ds_put_format(match, " && %s == {",
1785 pipeline == P_IN ? "ip6.src" : "ip6.dst");
1786
1787 /* Allow link-local address. */
1788 struct in6_addr lla;
1789 in6_generate_lla(ea, &lla);
1790 ipv6_string_mapped(ip6_str, &lla);
1791 ds_put_format(match, "%s, ", ip6_str);
1792
1793 /* Allow ip6.dst=ff00::/8 for multicast packets */
1794 if (pipeline == P_OUT) {
1795 ds_put_cstr(match, "ff00::/8, ");
1796 }
1797 for(int i = 0; i < n_ipv6_addrs; i++) {
1798 ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr);
1799 ds_put_format(match, "%s, ", ip6_str);
1800 }
1801 /* Replace ", " by "}". */
1802 ds_chomp(match, ' ');
1803 ds_chomp(match, ',');
1804 ds_put_cstr(match, "}");
1805 }
1806
1807 /**
1808 * Build port security constraints on ARP and IPv6 ND fields
1809 * and add logical flows to S_SWITCH_IN_PORT_SEC_ND stage.
1810 *
1811 * For each port security of the logical port, following
1812 * logical flows are added
1813 * - If the port security has no IP (both IPv4 and IPv6) or
1814 * if it has IPv4 address(es)
1815 * - Priority 90 flow to allow ARP packets for known MAC addresses
1816 * in the eth.src and arp.spa fields. If the port security
1817 * has IPv4 addresses, allow known IPv4 addresses in the arp.tpa field.
1818 *
1819 * - If the port security has no IP (both IPv4 and IPv6) or
1820 * if it has IPv6 address(es)
1821 * - Priority 90 flow to allow IPv6 ND packets for known MAC addresses
1822 * in the eth.src and nd.sll/nd.tll fields. If the port security
1823 * has IPv6 addresses, allow known IPv6 addresses in the nd.target field
1824 * for IPv6 Neighbor Advertisement packet.
1825 *
1826 * - Priority 80 flow to drop ARP and IPv6 ND packets.
1827 */
1828 static void
1829 build_port_security_nd(struct ovn_port *op, struct hmap *lflows)
1830 {
1831 struct ds match = DS_EMPTY_INITIALIZER;
1832
1833 for (size_t i = 0; i < op->n_ps_addrs; i++) {
1834 struct lport_addresses *ps = &op->ps_addrs[i];
1835
1836 bool no_ip = !(ps->n_ipv4_addrs || ps->n_ipv6_addrs);
1837
1838 ds_clear(&match);
1839 if (ps->n_ipv4_addrs || no_ip) {
1840 ds_put_format(&match,
1841 "inport == %s && eth.src == %s && arp.sha == %s",
1842 op->json_key, ps->ea_s, ps->ea_s);
1843
1844 if (ps->n_ipv4_addrs) {
1845 ds_put_cstr(&match, " && arp.spa == {");
1846 for (size_t j = 0; j < ps->n_ipv4_addrs; j++) {
1847 /* When the netmask is applied, if the host portion is
1848 * non-zero, the host can only use the specified
1849 * address in the arp.spa. If zero, the host is allowed
1850 * to use any address in the subnet. */
1851 if (ps->ipv4_addrs[j].plen == 32
1852 || ps->ipv4_addrs[j].addr & ~ps->ipv4_addrs[j].mask) {
1853 ds_put_cstr(&match, ps->ipv4_addrs[j].addr_s);
1854 } else {
1855 ds_put_format(&match, "%s/%d",
1856 ps->ipv4_addrs[j].network_s,
1857 ps->ipv4_addrs[j].plen);
1858 }
1859 ds_put_cstr(&match, ", ");
1860 }
1861 ds_chomp(&match, ' ');
1862 ds_chomp(&match, ',');
1863 ds_put_cstr(&match, "}");
1864 }
1865 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90,
1866 ds_cstr(&match), "next;");
1867 }
1868
1869 if (ps->n_ipv6_addrs || no_ip) {
1870 ds_clear(&match);
1871 ds_put_format(&match, "inport == %s && eth.src == %s",
1872 op->json_key, ps->ea_s);
1873 build_port_security_ipv6_nd_flow(&match, ps->ea, ps->ipv6_addrs,
1874 ps->n_ipv6_addrs);
1875 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90,
1876 ds_cstr(&match), "next;");
1877 }
1878 }
1879
1880 ds_clear(&match);
1881 ds_put_format(&match, "inport == %s && (arp || nd)", op->json_key);
1882 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 80,
1883 ds_cstr(&match), "drop;");
1884 ds_destroy(&match);
1885 }
1886
1887 /**
1888 * Build port security constraints on IPv4 and IPv6 src and dst fields
1889 * and add logical flows to S_SWITCH_(IN/OUT)_PORT_SEC_IP stage.
1890 *
1891 * For each port security of the logical port, following
1892 * logical flows are added
1893 * - If the port security has IPv4 addresses,
1894 * - Priority 90 flow to allow IPv4 packets for known IPv4 addresses
1895 *
1896 * - If the port security has IPv6 addresses,
1897 * - Priority 90 flow to allow IPv6 packets for known IPv6 addresses
1898 *
1899 * - If the port security has IPv4 addresses or IPv6 addresses or both
1900 * - Priority 80 flow to drop all IPv4 and IPv6 traffic
1901 */
1902 static void
1903 build_port_security_ip(enum ovn_pipeline pipeline, struct ovn_port *op,
1904 struct hmap *lflows)
1905 {
1906 char *port_direction;
1907 enum ovn_stage stage;
1908 if (pipeline == P_IN) {
1909 port_direction = "inport";
1910 stage = S_SWITCH_IN_PORT_SEC_IP;
1911 } else {
1912 port_direction = "outport";
1913 stage = S_SWITCH_OUT_PORT_SEC_IP;
1914 }
1915
1916 for (size_t i = 0; i < op->n_ps_addrs; i++) {
1917 struct lport_addresses *ps = &op->ps_addrs[i];
1918
1919 if (!(ps->n_ipv4_addrs || ps->n_ipv6_addrs)) {
1920 continue;
1921 }
1922
1923 if (ps->n_ipv4_addrs) {
1924 struct ds match = DS_EMPTY_INITIALIZER;
1925 if (pipeline == P_IN) {
1926 /* Permit use of the unspecified address for DHCP discovery */
1927 struct ds dhcp_match = DS_EMPTY_INITIALIZER;
1928 ds_put_format(&dhcp_match, "inport == %s"
1929 " && eth.src == %s"
1930 " && ip4.src == 0.0.0.0"
1931 " && ip4.dst == 255.255.255.255"
1932 " && udp.src == 68 && udp.dst == 67",
1933 op->json_key, ps->ea_s);
1934 ovn_lflow_add(lflows, op->od, stage, 90,
1935 ds_cstr(&dhcp_match), "next;");
1936 ds_destroy(&dhcp_match);
1937 ds_put_format(&match, "inport == %s && eth.src == %s"
1938 " && ip4.src == {", op->json_key,
1939 ps->ea_s);
1940 } else {
1941 ds_put_format(&match, "outport == %s && eth.dst == %s"
1942 " && ip4.dst == {255.255.255.255, 224.0.0.0/4, ",
1943 op->json_key, ps->ea_s);
1944 }
1945
1946 for (int j = 0; j < ps->n_ipv4_addrs; j++) {
1947 ovs_be32 mask = ps->ipv4_addrs[j].mask;
1948 /* When the netmask is applied, if the host portion is
1949 * non-zero, the host can only use the specified
1950 * address. If zero, the host is allowed to use any
1951 * address in the subnet.
1952 */
1953 if (ps->ipv4_addrs[j].plen == 32
1954 || ps->ipv4_addrs[j].addr & ~mask) {
1955 ds_put_format(&match, "%s", ps->ipv4_addrs[j].addr_s);
1956 if (pipeline == P_OUT && ps->ipv4_addrs[j].plen != 32) {
1957 /* Host is also allowed to receive packets to the
1958 * broadcast address in the specified subnet. */
1959 ds_put_format(&match, ", %s",
1960 ps->ipv4_addrs[j].bcast_s);
1961 }
1962 } else {
1963 /* host portion is zero */
1964 ds_put_format(&match, "%s/%d", ps->ipv4_addrs[j].network_s,
1965 ps->ipv4_addrs[j].plen);
1966 }
1967 ds_put_cstr(&match, ", ");
1968 }
1969
1970 /* Replace ", " by "}". */
1971 ds_chomp(&match, ' ');
1972 ds_chomp(&match, ',');
1973 ds_put_cstr(&match, "}");
1974 ovn_lflow_add(lflows, op->od, stage, 90, ds_cstr(&match), "next;");
1975 ds_destroy(&match);
1976 }
1977
1978 if (ps->n_ipv6_addrs) {
1979 struct ds match = DS_EMPTY_INITIALIZER;
1980 if (pipeline == P_IN) {
1981 /* Permit use of unspecified address for duplicate address
1982 * detection */
1983 struct ds dad_match = DS_EMPTY_INITIALIZER;
1984 ds_put_format(&dad_match, "inport == %s"
1985 " && eth.src == %s"
1986 " && ip6.src == ::"
1987 " && ip6.dst == ff02::/16"
1988 " && icmp6.type == {131, 135, 143}", op->json_key,
1989 ps->ea_s);
1990 ovn_lflow_add(lflows, op->od, stage, 90,
1991 ds_cstr(&dad_match), "next;");
1992 ds_destroy(&dad_match);
1993 }
1994 ds_put_format(&match, "%s == %s && %s == %s",
1995 port_direction, op->json_key,
1996 pipeline == P_IN ? "eth.src" : "eth.dst", ps->ea_s);
1997 build_port_security_ipv6_flow(pipeline, &match, ps->ea,
1998 ps->ipv6_addrs, ps->n_ipv6_addrs);
1999 ovn_lflow_add(lflows, op->od, stage, 90,
2000 ds_cstr(&match), "next;");
2001 ds_destroy(&match);
2002 }
2003
2004 char *match = xasprintf("%s == %s && %s == %s && ip",
2005 port_direction, op->json_key,
2006 pipeline == P_IN ? "eth.src" : "eth.dst",
2007 ps->ea_s);
2008 ovn_lflow_add(lflows, op->od, stage, 80, match, "drop;");
2009 free(match);
2010 }
2011
2012 }
2013
2014 static bool
2015 lsp_is_enabled(const struct nbrec_logical_switch_port *lsp)
2016 {
2017 return !lsp->enabled || *lsp->enabled;
2018 }
2019
2020 static bool
2021 lsp_is_up(const struct nbrec_logical_switch_port *lsp)
2022 {
2023 return !lsp->up || *lsp->up;
2024 }
2025
2026 static bool
2027 build_dhcpv4_action(struct ovn_port *op, ovs_be32 offer_ip,
2028 struct ds *options_action, struct ds *response_action)
2029 {
2030 if (!op->nbsp->dhcpv4_options) {
2031 /* CMS has disabled native DHCPv4 for this lport. */
2032 return false;
2033 }
2034
2035 ovs_be32 host_ip, mask;
2036 char *error = ip_parse_masked(op->nbsp->dhcpv4_options->cidr, &host_ip,
2037 &mask);
2038 if (error || ((offer_ip ^ host_ip) & mask)) {
2039 /* Either
2040 * - cidr defined is invalid or
2041 * - the offer ip of the logical port doesn't belong to the cidr
2042 * defined in the DHCPv4 options.
2043 * */
2044 free(error);
2045 return false;
2046 }
2047
2048 const char *server_ip = smap_get(
2049 &op->nbsp->dhcpv4_options->options, "server_id");
2050 const char *server_mac = smap_get(
2051 &op->nbsp->dhcpv4_options->options, "server_mac");
2052 const char *lease_time = smap_get(
2053 &op->nbsp->dhcpv4_options->options, "lease_time");
2054 const char *router = smap_get(
2055 &op->nbsp->dhcpv4_options->options, "router");
2056
2057 if (!(server_ip && server_mac && lease_time && router)) {
2058 /* "server_id", "server_mac", "lease_time" and "router" should be
2059 * present in the dhcp_options. */
2060 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
2061 VLOG_WARN_RL(&rl, "Required DHCPv4 options not defined for lport - %s",
2062 op->json_key);
2063 return false;
2064 }
2065
2066 struct smap dhcpv4_options = SMAP_INITIALIZER(&dhcpv4_options);
2067 smap_clone(&dhcpv4_options, &op->nbsp->dhcpv4_options->options);
2068
2069 /* server_mac is not DHCPv4 option, delete it from the smap. */
2070 smap_remove(&dhcpv4_options, "server_mac");
2071 char *netmask = xasprintf(IP_FMT, IP_ARGS(mask));
2072 smap_add(&dhcpv4_options, "netmask", netmask);
2073 free(netmask);
2074
2075 ds_put_format(options_action,
2076 REGBIT_DHCP_OPTS_RESULT" = put_dhcp_opts(offerip = "
2077 IP_FMT", ", IP_ARGS(offer_ip));
2078
2079 /* We're not using SMAP_FOR_EACH because we want a consistent order of the
2080 * options on different architectures (big or little endian, SSE4.2) */
2081 const struct smap_node **sorted_opts = smap_sort(&dhcpv4_options);
2082 for (size_t i = 0; i < smap_count(&dhcpv4_options); i++) {
2083 const struct smap_node *node = sorted_opts[i];
2084 ds_put_format(options_action, "%s = %s, ", node->key, node->value);
2085 }
2086 free(sorted_opts);
2087
2088 ds_chomp(options_action, ' ');
2089 ds_chomp(options_action, ',');
2090 ds_put_cstr(options_action, "); next;");
2091
2092 ds_put_format(response_action, "eth.dst = eth.src; eth.src = %s; "
2093 "ip4.dst = "IP_FMT"; ip4.src = %s; udp.src = 67; "
2094 "udp.dst = 68; outport = inport; flags.loopback = 1; "
2095 "output;",
2096 server_mac, IP_ARGS(offer_ip), server_ip);
2097
2098 smap_destroy(&dhcpv4_options);
2099 return true;
2100 }
2101
2102 static bool
2103 build_dhcpv6_action(struct ovn_port *op, struct in6_addr *offer_ip,
2104 struct ds *options_action, struct ds *response_action)
2105 {
2106 if (!op->nbsp->dhcpv6_options) {
2107 /* CMS has disabled native DHCPv6 for this lport. */
2108 return false;
2109 }
2110
2111 struct in6_addr host_ip, mask;
2112
2113 char *error = ipv6_parse_masked(op->nbsp->dhcpv6_options->cidr, &host_ip,
2114 &mask);
2115 if (error) {
2116 free(error);
2117 return false;
2118 }
2119 struct in6_addr ip6_mask = ipv6_addr_bitxor(offer_ip, &host_ip);
2120 ip6_mask = ipv6_addr_bitand(&ip6_mask, &mask);
2121 if (!ipv6_mask_is_any(&ip6_mask)) {
2122 /* offer_ip doesn't belongs to the cidr defined in lport's DHCPv6
2123 * options.*/
2124 return false;
2125 }
2126
2127 const struct smap *options_map = &op->nbsp->dhcpv6_options->options;
2128 /* "server_id" should be the MAC address. */
2129 const char *server_mac = smap_get(options_map, "server_id");
2130 struct eth_addr ea;
2131 if (!server_mac || !eth_addr_from_string(server_mac, &ea)) {
2132 /* "server_id" should be present in the dhcpv6_options. */
2133 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2134 VLOG_WARN_RL(&rl, "server_id not present in the DHCPv6 options"
2135 " for lport %s", op->json_key);
2136 return false;
2137 }
2138
2139 /* Get the link local IP of the DHCPv6 server from the server MAC. */
2140 struct in6_addr lla;
2141 in6_generate_lla(ea, &lla);
2142
2143 char server_ip[INET6_ADDRSTRLEN + 1];
2144 ipv6_string_mapped(server_ip, &lla);
2145
2146 char ia_addr[INET6_ADDRSTRLEN + 1];
2147 ipv6_string_mapped(ia_addr, offer_ip);
2148
2149 ds_put_format(options_action,
2150 REGBIT_DHCP_OPTS_RESULT" = put_dhcpv6_opts(");
2151
2152 /* Check whether the dhcpv6 options should be configured as stateful.
2153 * Only reply with ia_addr option for dhcpv6 stateful address mode. */
2154 if (!smap_get_bool(options_map, "dhcpv6_stateless", false)) {
2155 char ia_addr[INET6_ADDRSTRLEN + 1];
2156 ipv6_string_mapped(ia_addr, offer_ip);
2157
2158 ds_put_format(options_action, "ia_addr = %s, ", ia_addr);
2159 }
2160
2161 /* We're not using SMAP_FOR_EACH because we want a consistent order of the
2162 * options on different architectures (big or little endian, SSE4.2) */
2163 const struct smap_node **sorted_opts = smap_sort(options_map);
2164 for (size_t i = 0; i < smap_count(options_map); i++) {
2165 const struct smap_node *node = sorted_opts[i];
2166 if (strcmp(node->key, "dhcpv6_stateless")) {
2167 ds_put_format(options_action, "%s = %s, ", node->key, node->value);
2168 }
2169 }
2170 free(sorted_opts);
2171
2172 ds_chomp(options_action, ' ');
2173 ds_chomp(options_action, ',');
2174 ds_put_cstr(options_action, "); next;");
2175
2176 ds_put_format(response_action, "eth.dst = eth.src; eth.src = %s; "
2177 "ip6.dst = ip6.src; ip6.src = %s; udp.src = 547; "
2178 "udp.dst = 546; outport = inport; flags.loopback = 1; "
2179 "output;",
2180 server_mac, server_ip);
2181
2182 return true;
2183 }
2184
2185 static bool
2186 has_stateful_acl(struct ovn_datapath *od)
2187 {
2188 for (size_t i = 0; i < od->nbs->n_acls; i++) {
2189 struct nbrec_acl *acl = od->nbs->acls[i];
2190 if (!strcmp(acl->action, "allow-related")) {
2191 return true;
2192 }
2193 }
2194
2195 return false;
2196 }
2197
2198 static void
2199 build_pre_acls(struct ovn_datapath *od, struct hmap *lflows)
2200 {
2201 bool has_stateful = has_stateful_acl(od);
2202
2203 /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are
2204 * allowed by default. */
2205 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 0, "1", "next;");
2206 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 0, "1", "next;");
2207
2208 /* If there are any stateful ACL rules in this datapath, we must
2209 * send all IP packets through the conntrack action, which handles
2210 * defragmentation, in order to match L4 headers. */
2211 if (has_stateful) {
2212 for (size_t i = 0; i < od->n_router_ports; i++) {
2213 struct ovn_port *op = od->router_ports[i];
2214 /* Can't use ct() for router ports. Consider the
2215 * following configuration: lp1(10.0.0.2) on
2216 * hostA--ls1--lr0--ls2--lp2(10.0.1.2) on hostB, For a
2217 * ping from lp1 to lp2, First, the response will go
2218 * through ct() with a zone for lp2 in the ls2 ingress
2219 * pipeline on hostB. That ct zone knows about this
2220 * connection. Next, it goes through ct() with the zone
2221 * for the router port in the egress pipeline of ls2 on
2222 * hostB. This zone does not know about the connection,
2223 * as the icmp request went through the logical router
2224 * on hostA, not hostB. This would only work with
2225 * distributed conntrack state across all chassis. */
2226 struct ds match_in = DS_EMPTY_INITIALIZER;
2227 struct ds match_out = DS_EMPTY_INITIALIZER;
2228
2229 ds_put_format(&match_in, "ip && inport == %s", op->json_key);
2230 ds_put_format(&match_out, "ip && outport == %s", op->json_key);
2231 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110,
2232 ds_cstr(&match_in), "next;");
2233 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110,
2234 ds_cstr(&match_out), "next;");
2235
2236 ds_destroy(&match_in);
2237 ds_destroy(&match_out);
2238 }
2239 /* Ingress and Egress Pre-ACL Table (Priority 110).
2240 *
2241 * Not to do conntrack on ND packets. */
2242 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110, "nd", "next;");
2243 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110, "nd", "next;");
2244
2245 /* Ingress and Egress Pre-ACL Table (Priority 100).
2246 *
2247 * Regardless of whether the ACL is "from-lport" or "to-lport",
2248 * we need rules in both the ingress and egress table, because
2249 * the return traffic needs to be followed.
2250 *
2251 * 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send
2252 * it to conntrack for tracking and defragmentation. */
2253 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 100, "ip",
2254 REGBIT_CONNTRACK_DEFRAG" = 1; next;");
2255 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip",
2256 REGBIT_CONNTRACK_DEFRAG" = 1; next;");
2257 }
2258 }
2259
2260 /* For a 'key' of the form "IP:port" or just "IP", sets 'port' and
2261 * 'ip_address'. The caller must free() the memory allocated for
2262 * 'ip_address'. */
2263 static void
2264 ip_address_and_port_from_lb_key(const char *key, char **ip_address,
2265 uint16_t *port)
2266 {
2267 char *ip_str, *start, *next;
2268 *ip_address = NULL;
2269 *port = 0;
2270
2271 next = start = xstrdup(key);
2272 ip_str = strsep(&next, ":");
2273 if (!ip_str || !ip_str[0]) {
2274 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2275 VLOG_WARN_RL(&rl, "bad ip address for load balancer key %s", key);
2276 free(start);
2277 return;
2278 }
2279
2280 ovs_be32 ip, mask;
2281 char *error = ip_parse_masked(ip_str, &ip, &mask);
2282 if (error || mask != OVS_BE32_MAX) {
2283 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2284 VLOG_WARN_RL(&rl, "bad ip address for load balancer key %s", key);
2285 free(start);
2286 free(error);
2287 return;
2288 }
2289
2290 int l4_port = 0;
2291 if (next && next[0]) {
2292 if (!str_to_int(next, 0, &l4_port) || l4_port < 0 || l4_port > 65535) {
2293 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2294 VLOG_WARN_RL(&rl, "bad ip port for load balancer key %s", key);
2295 free(start);
2296 return;
2297 }
2298 }
2299
2300 *port = l4_port;
2301 *ip_address = strdup(ip_str);
2302 free(start);
2303 }
2304
2305 static void
2306 build_pre_lb(struct ovn_datapath *od, struct hmap *lflows)
2307 {
2308 /* Allow all packets to go to next tables by default. */
2309 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 0, "1", "next;");
2310 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 0, "1", "next;");
2311
2312 struct sset all_ips = SSET_INITIALIZER(&all_ips);
2313 bool vip_configured = false;
2314 for (int i = 0; i < od->nbs->n_load_balancer; i++) {
2315 struct nbrec_load_balancer *lb = od->nbs->load_balancer[i];
2316 struct smap *vips = &lb->vips;
2317 struct smap_node *node;
2318
2319 SMAP_FOR_EACH (node, vips) {
2320 vip_configured = true;
2321
2322 /* node->key contains IP:port or just IP. */
2323 char *ip_address = NULL;
2324 uint16_t port;
2325 ip_address_and_port_from_lb_key(node->key, &ip_address, &port);
2326 if (!ip_address) {
2327 continue;
2328 }
2329
2330 if (!sset_contains(&all_ips, ip_address)) {
2331 sset_add(&all_ips, ip_address);
2332 }
2333
2334 free(ip_address);
2335
2336 /* Ignore L4 port information in the key because fragmented packets
2337 * may not have L4 information. The pre-stateful table will send
2338 * the packet through ct() action to de-fragment. In stateful
2339 * table, we will eventually look at L4 information. */
2340 }
2341 }
2342
2343 /* 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send
2344 * packet to conntrack for defragmentation. */
2345 const char *ip_address;
2346 SSET_FOR_EACH(ip_address, &all_ips) {
2347 char *match = xasprintf("ip && ip4.dst == %s", ip_address);
2348 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB,
2349 100, match, REGBIT_CONNTRACK_DEFRAG" = 1; next;");
2350 free(match);
2351 }
2352
2353 sset_destroy(&all_ips);
2354
2355 if (vip_configured) {
2356 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB,
2357 100, "ip", REGBIT_CONNTRACK_DEFRAG" = 1; next;");
2358 }
2359 }
2360
2361 static void
2362 build_pre_stateful(struct ovn_datapath *od, struct hmap *lflows)
2363 {
2364 /* Ingress and Egress pre-stateful Table (Priority 0): Packets are
2365 * allowed by default. */
2366 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 0, "1", "next;");
2367 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 0, "1", "next;");
2368
2369 /* If REGBIT_CONNTRACK_DEFRAG is set as 1, then the packets should be
2370 * sent to conntrack for tracking and defragmentation. */
2371 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 100,
2372 REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;");
2373 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 100,
2374 REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;");
2375 }
2376
2377 static void
2378 build_acls(struct ovn_datapath *od, struct hmap *lflows)
2379 {
2380 bool has_stateful = has_stateful_acl(od);
2381
2382 /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
2383 * default. A related rule at priority 1 is added below if there
2384 * are any stateful ACLs in this datapath. */
2385 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 0, "1", "next;");
2386 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 0, "1", "next;");
2387
2388 if (has_stateful) {
2389 /* Ingress and Egress ACL Table (Priority 1).
2390 *
2391 * By default, traffic is allowed. This is partially handled by
2392 * the Priority 0 ACL flows added earlier, but we also need to
2393 * commit IP flows. This is because, while the initiater's
2394 * direction may not have any stateful rules, the server's may
2395 * and then its return traffic would not have an associated
2396 * conntrack entry and would return "+invalid".
2397 *
2398 * We use "ct_commit" for a connection that is not already known
2399 * by the connection tracker. Once a connection is committed,
2400 * subsequent packets will hit the flow at priority 0 that just
2401 * uses "next;"
2402 *
2403 * We also check for established connections that have ct_label.blocked
2404 * set on them. That's a connection that was disallowed, but is
2405 * now allowed by policy again since it hit this default-allow flow.
2406 * We need to set ct_label.blocked=0 to let the connection continue,
2407 * which will be done by ct_commit() in the "stateful" stage.
2408 * Subsequent packets will hit the flow at priority 0 that just
2409 * uses "next;". */
2410 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 1,
2411 "ip && (!ct.est || (ct.est && ct_label.blocked == 1))",
2412 REGBIT_CONNTRACK_COMMIT" = 1; next;");
2413 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 1,
2414 "ip && (!ct.est || (ct.est && ct_label.blocked == 1))",
2415 REGBIT_CONNTRACK_COMMIT" = 1; next;");
2416
2417 /* Ingress and Egress ACL Table (Priority 65535).
2418 *
2419 * Always drop traffic that's in an invalid state. Also drop
2420 * reply direction packets for connections that have been marked
2421 * for deletion (bit 0 of ct_label is set).
2422 *
2423 * This is enforced at a higher priority than ACLs can be defined. */
2424 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
2425 "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
2426 "drop;");
2427 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
2428 "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
2429 "drop;");
2430
2431 /* Ingress and Egress ACL Table (Priority 65535).
2432 *
2433 * Allow reply traffic that is part of an established
2434 * conntrack entry that has not been marked for deletion
2435 * (bit 0 of ct_label). We only match traffic in the
2436 * reply direction because we want traffic in the request
2437 * direction to hit the currently defined policy from ACLs.
2438 *
2439 * This is enforced at a higher priority than ACLs can be defined. */
2440 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
2441 "ct.est && !ct.rel && !ct.new && !ct.inv "
2442 "&& ct.rpl && ct_label.blocked == 0",
2443 "next;");
2444 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
2445 "ct.est && !ct.rel && !ct.new && !ct.inv "
2446 "&& ct.rpl && ct_label.blocked == 0",
2447 "next;");
2448
2449 /* Ingress and Egress ACL Table (Priority 65535).
2450 *
2451 * Allow traffic that is related to an existing conntrack entry that
2452 * has not been marked for deletion (bit 0 of ct_label).
2453 *
2454 * This is enforced at a higher priority than ACLs can be defined.
2455 *
2456 * NOTE: This does not support related data sessions (eg,
2457 * a dynamically negotiated FTP data channel), but will allow
2458 * related traffic such as an ICMP Port Unreachable through
2459 * that's generated from a non-listening UDP port. */
2460 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
2461 "!ct.est && ct.rel && !ct.new && !ct.inv "
2462 "&& ct_label.blocked == 0",
2463 "next;");
2464 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
2465 "!ct.est && ct.rel && !ct.new && !ct.inv "
2466 "&& ct_label.blocked == 0",
2467 "next;");
2468
2469 /* Ingress and Egress ACL Table (Priority 65535).
2470 *
2471 * Not to do conntrack on ND packets. */
2472 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, "nd", "next;");
2473 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, "nd", "next;");
2474 }
2475
2476 /* Ingress or Egress ACL Table (Various priorities). */
2477 for (size_t i = 0; i < od->nbs->n_acls; i++) {
2478 struct nbrec_acl *acl = od->nbs->acls[i];
2479 bool ingress = !strcmp(acl->direction, "from-lport") ? true :false;
2480 enum ovn_stage stage = ingress ? S_SWITCH_IN_ACL : S_SWITCH_OUT_ACL;
2481
2482 if (!strcmp(acl->action, "allow")
2483 || !strcmp(acl->action, "allow-related")) {
2484 /* If there are any stateful flows, we must even commit "allow"
2485 * actions. This is because, while the initiater's
2486 * direction may not have any stateful rules, the server's
2487 * may and then its return traffic would not have an
2488 * associated conntrack entry and would return "+invalid". */
2489 if (!has_stateful) {
2490 ovn_lflow_add(lflows, od, stage,
2491 acl->priority + OVN_ACL_PRI_OFFSET,
2492 acl->match, "next;");
2493 } else {
2494 struct ds match = DS_EMPTY_INITIALIZER;
2495
2496 /* Commit the connection tracking entry if it's a new
2497 * connection that matches this ACL. After this commit,
2498 * the reply traffic is allowed by a flow we create at
2499 * priority 65535, defined earlier.
2500 *
2501 * It's also possible that a known connection was marked for
2502 * deletion after a policy was deleted, but the policy was
2503 * re-added while that connection is still known. We catch
2504 * that case here and un-set ct_label.blocked (which will be done
2505 * by ct_commit in the "stateful" stage) to indicate that the
2506 * connection should be allowed to resume.
2507 */
2508 ds_put_format(&match, "((ct.new && !ct.est)"
2509 " || (!ct.new && ct.est && !ct.rpl "
2510 "&& ct_label.blocked == 1)) "
2511 "&& (%s)", acl->match);
2512 ovn_lflow_add(lflows, od, stage,
2513 acl->priority + OVN_ACL_PRI_OFFSET,
2514 ds_cstr(&match),
2515 REGBIT_CONNTRACK_COMMIT" = 1; next;");
2516
2517 /* Match on traffic in the request direction for an established
2518 * connection tracking entry that has not been marked for
2519 * deletion. There is no need to commit here, so we can just
2520 * proceed to the next table. We use this to ensure that this
2521 * connection is still allowed by the currently defined
2522 * policy. */
2523 ds_clear(&match);
2524 ds_put_format(&match,
2525 "!ct.new && ct.est && !ct.rpl"
2526 " && ct_label.blocked == 0 && (%s)",
2527 acl->match);
2528 ovn_lflow_add(lflows, od, stage,
2529 acl->priority + OVN_ACL_PRI_OFFSET,
2530 ds_cstr(&match), "next;");
2531
2532 ds_destroy(&match);
2533 }
2534 } else if (!strcmp(acl->action, "drop")
2535 || !strcmp(acl->action, "reject")) {
2536 struct ds match = DS_EMPTY_INITIALIZER;
2537
2538 /* XXX Need to support "reject", treat it as "drop;" for now. */
2539 if (!strcmp(acl->action, "reject")) {
2540 VLOG_INFO("reject is not a supported action");
2541 }
2542
2543 /* The implementation of "drop" differs if stateful ACLs are in
2544 * use for this datapath. In that case, the actions differ
2545 * depending on whether the connection was previously committed
2546 * to the connection tracker with ct_commit. */
2547 if (has_stateful) {
2548 /* If the packet is not part of an established connection, then
2549 * we can simply drop it. */
2550 ds_put_format(&match,
2551 "(!ct.est || (ct.est && ct_label.blocked == 1)) "
2552 "&& (%s)",
2553 acl->match);
2554 ovn_lflow_add(lflows, od, stage, acl->priority +
2555 OVN_ACL_PRI_OFFSET, ds_cstr(&match), "drop;");
2556
2557 /* For an existing connection without ct_label set, we've
2558 * encountered a policy change. ACLs previously allowed
2559 * this connection and we committed the connection tracking
2560 * entry. Current policy says that we should drop this
2561 * connection. First, we set bit 0 of ct_label to indicate
2562 * that this connection is set for deletion. By not
2563 * specifying "next;", we implicitly drop the packet after
2564 * updating conntrack state. We would normally defer
2565 * ct_commit() to the "stateful" stage, but since we're
2566 * dropping the packet, we go ahead and do it here. */
2567 ds_clear(&match);
2568 ds_put_format(&match,
2569 "ct.est && ct_label.blocked == 0 && (%s)",
2570 acl->match);
2571 ovn_lflow_add(lflows, od, stage,
2572 acl->priority + OVN_ACL_PRI_OFFSET,
2573 ds_cstr(&match), "ct_commit(ct_label=1/1);");
2574
2575 ds_destroy(&match);
2576 } else {
2577 /* There are no stateful ACLs in use on this datapath,
2578 * so a "drop" ACL is simply the "drop" logical flow action
2579 * in all cases. */
2580 ovn_lflow_add(lflows, od, stage,
2581 acl->priority + OVN_ACL_PRI_OFFSET,
2582 acl->match, "drop;");
2583 ds_destroy(&match);
2584 }
2585 }
2586 }
2587
2588 /* Add 34000 priority flow to allow DHCP reply from ovn-controller to all
2589 * logical ports of the datapath if the CMS has configured DHCPv4 options*/
2590 for (size_t i = 0; i < od->nbs->n_ports; i++) {
2591 if (od->nbs->ports[i]->dhcpv4_options) {
2592 const char *server_id = smap_get(
2593 &od->nbs->ports[i]->dhcpv4_options->options, "server_id");
2594 const char *server_mac = smap_get(
2595 &od->nbs->ports[i]->dhcpv4_options->options, "server_mac");
2596 const char *lease_time = smap_get(
2597 &od->nbs->ports[i]->dhcpv4_options->options, "lease_time");
2598 const char *router = smap_get(
2599 &od->nbs->ports[i]->dhcpv4_options->options, "router");
2600 if (server_id && server_mac && lease_time && router) {
2601 struct ds match = DS_EMPTY_INITIALIZER;
2602 const char *actions =
2603 has_stateful ? "ct_commit; next;" : "next;";
2604 ds_put_format(&match, "outport == \"%s\" && eth.src == %s "
2605 "&& ip4.src == %s && udp && udp.src == 67 "
2606 "&& udp.dst == 68", od->nbs->ports[i]->name,
2607 server_mac, server_id);
2608 ovn_lflow_add(
2609 lflows, od, S_SWITCH_OUT_ACL, 34000, ds_cstr(&match),
2610 actions);
2611 ds_destroy(&match);
2612 }
2613 }
2614
2615 if (od->nbs->ports[i]->dhcpv6_options) {
2616 const char *server_mac = smap_get(
2617 &od->nbs->ports[i]->dhcpv6_options->options, "server_id");
2618 struct eth_addr ea;
2619 if (server_mac && eth_addr_from_string(server_mac, &ea)) {
2620 /* Get the link local IP of the DHCPv6 server from the
2621 * server MAC. */
2622 struct in6_addr lla;
2623 in6_generate_lla(ea, &lla);
2624
2625 char server_ip[INET6_ADDRSTRLEN + 1];
2626 ipv6_string_mapped(server_ip, &lla);
2627
2628 struct ds match = DS_EMPTY_INITIALIZER;
2629 const char *actions = has_stateful ? "ct_commit; next;" :
2630 "next;";
2631 ds_put_format(&match, "outport == \"%s\" && eth.src == %s "
2632 "&& ip6.src == %s && udp && udp.src == 547 "
2633 "&& udp.dst == 546", od->nbs->ports[i]->name,
2634 server_mac, server_ip);
2635 ovn_lflow_add(
2636 lflows, od, S_SWITCH_OUT_ACL, 34000, ds_cstr(&match),
2637 actions);
2638 ds_destroy(&match);
2639 }
2640 }
2641 }
2642 }
2643
2644 static void
2645 build_qos(struct ovn_datapath *od, struct hmap *lflows) {
2646 ovn_lflow_add(lflows, od, S_SWITCH_IN_QOS_MARK, 0, "1", "next;");
2647 ovn_lflow_add(lflows, od, S_SWITCH_OUT_QOS_MARK, 0, "1", "next;");
2648
2649 for (size_t i = 0; i < od->nbs->n_qos_rules; i++) {
2650 struct nbrec_qos *qos = od->nbs->qos_rules[i];
2651 bool ingress = !strcmp(qos->direction, "from-lport") ? true :false;
2652 enum ovn_stage stage = ingress ? S_SWITCH_IN_QOS_MARK : S_SWITCH_OUT_QOS_MARK;
2653
2654 if (!strcmp(qos->key_action, "dscp")) {
2655 struct ds dscp_action = DS_EMPTY_INITIALIZER;
2656
2657 ds_put_format(&dscp_action, "ip.dscp = %d; next;",
2658 (uint8_t)qos->value_action);
2659 ovn_lflow_add(lflows, od, stage,
2660 qos->priority,
2661 qos->match, ds_cstr(&dscp_action));
2662 ds_destroy(&dscp_action);
2663 }
2664 }
2665 }
2666
2667 static void
2668 build_lb(struct ovn_datapath *od, struct hmap *lflows)
2669 {
2670 /* Ingress and Egress LB Table (Priority 0): Packets are allowed by
2671 * default. */
2672 ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, 0, "1", "next;");
2673 ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, 0, "1", "next;");
2674
2675 if (od->nbs->load_balancer) {
2676 /* Ingress and Egress LB Table (Priority 65535).
2677 *
2678 * Send established traffic through conntrack for just NAT. */
2679 ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, UINT16_MAX,
2680 "ct.est && !ct.rel && !ct.new && !ct.inv",
2681 REGBIT_CONNTRACK_NAT" = 1; next;");
2682 ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, UINT16_MAX,
2683 "ct.est && !ct.rel && !ct.new && !ct.inv",
2684 REGBIT_CONNTRACK_NAT" = 1; next;");
2685 }
2686 }
2687
2688 static void
2689 build_stateful(struct ovn_datapath *od, struct hmap *lflows)
2690 {
2691 /* Ingress and Egress stateful Table (Priority 0): Packets are
2692 * allowed by default. */
2693 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 0, "1", "next;");
2694 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 0, "1", "next;");
2695
2696 /* If REGBIT_CONNTRACK_COMMIT is set as 1, then the packets should be
2697 * committed to conntrack. We always set ct_label.blocked to 0 here as
2698 * any packet that makes it this far is part of a connection we
2699 * want to allow to continue. */
2700 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100,
2701 REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit(ct_label=0/1); next;");
2702 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100,
2703 REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit(ct_label=0/1); next;");
2704
2705 /* If REGBIT_CONNTRACK_NAT is set as 1, then packets should just be sent
2706 * through nat (without committing).
2707 *
2708 * REGBIT_CONNTRACK_COMMIT is set for new connections and
2709 * REGBIT_CONNTRACK_NAT is set for established connections. So they
2710 * don't overlap.
2711 */
2712 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100,
2713 REGBIT_CONNTRACK_NAT" == 1", "ct_lb;");
2714 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100,
2715 REGBIT_CONNTRACK_NAT" == 1", "ct_lb;");
2716
2717 /* Load balancing rules for new connections get committed to conntrack
2718 * table. So even if REGBIT_CONNTRACK_COMMIT is set in a previous table
2719 * a higher priority rule for load balancing below also commits the
2720 * connection, so it is okay if we do not hit the above match on
2721 * REGBIT_CONNTRACK_COMMIT. */
2722 for (int i = 0; i < od->nbs->n_load_balancer; i++) {
2723 struct nbrec_load_balancer *lb = od->nbs->load_balancer[i];
2724 struct smap *vips = &lb->vips;
2725 struct smap_node *node;
2726
2727 SMAP_FOR_EACH (node, vips) {
2728 uint16_t port = 0;
2729
2730 /* node->key contains IP:port or just IP. */
2731 char *ip_address = NULL;
2732 ip_address_and_port_from_lb_key(node->key, &ip_address, &port);
2733 if (!ip_address) {
2734 continue;
2735 }
2736
2737 /* New connections in Ingress table. */
2738 char *action = xasprintf("ct_lb(%s);", node->value);
2739 struct ds match = DS_EMPTY_INITIALIZER;
2740 ds_put_format(&match, "ct.new && ip4.dst == %s", ip_address);
2741 if (port) {
2742 if (lb->protocol && !strcmp(lb->protocol, "udp")) {
2743 ds_put_format(&match, " && udp.dst == %d", port);
2744 } else {
2745 ds_put_format(&match, " && tcp.dst == %d", port);
2746 }
2747 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL,
2748 120, ds_cstr(&match), action);
2749 } else {
2750 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL,
2751 110, ds_cstr(&match), action);
2752 }
2753
2754 free(ip_address);
2755 ds_destroy(&match);
2756 free(action);
2757 }
2758 }
2759 }
2760
2761 static void
2762 build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
2763 struct hmap *lflows, struct hmap *mcgroups)
2764 {
2765 /* This flow table structure is documented in ovn-northd(8), so please
2766 * update ovn-northd.8.xml if you change anything. */
2767
2768 struct ds match = DS_EMPTY_INITIALIZER;
2769 struct ds actions = DS_EMPTY_INITIALIZER;
2770
2771 /* Build pre-ACL and ACL tables for both ingress and egress.
2772 * Ingress tables 3 through 9. Egress tables 0 through 6. */
2773 struct ovn_datapath *od;
2774 HMAP_FOR_EACH (od, key_node, datapaths) {
2775 if (!od->nbs) {
2776 continue;
2777 }
2778
2779 build_pre_acls(od, lflows);
2780 build_pre_lb(od, lflows);
2781 build_pre_stateful(od, lflows);
2782 build_acls(od, lflows);
2783 build_qos(od, lflows);
2784 build_lb(od, lflows);
2785 build_stateful(od, lflows);
2786 }
2787
2788 /* Logical switch ingress table 0: Admission control framework (priority
2789 * 100). */
2790 HMAP_FOR_EACH (od, key_node, datapaths) {
2791 if (!od->nbs) {
2792 continue;
2793 }
2794
2795 /* Logical VLANs not supported. */
2796 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "vlan.present",
2797 "drop;");
2798
2799 /* Broadcast/multicast source address is invalid. */
2800 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "eth.src[40]",
2801 "drop;");
2802
2803 /* Port security flows have priority 50 (see below) and will continue
2804 * to the next table if packet source is acceptable. */
2805 }
2806
2807 /* Logical switch ingress table 0: Ingress port security - L2
2808 * (priority 50).
2809 * Ingress table 1: Ingress port security - IP (priority 90 and 80)
2810 * Ingress table 2: Ingress port security - ND (priority 90 and 80)
2811 */
2812 struct ovn_port *op;
2813 HMAP_FOR_EACH (op, key_node, ports) {
2814 if (!op->nbsp) {
2815 continue;
2816 }
2817
2818 if (!lsp_is_enabled(op->nbsp)) {
2819 /* Drop packets from disabled logical ports (since logical flow
2820 * tables are default-drop). */
2821 continue;
2822 }
2823
2824 ds_clear(&match);
2825 ds_clear(&actions);
2826 ds_put_format(&match, "inport == %s", op->json_key);
2827 build_port_security_l2("eth.src", op->ps_addrs, op->n_ps_addrs,
2828 &match);
2829
2830 const char *queue_id = smap_get(&op->sb->options, "qdisc_queue_id");
2831 if (queue_id) {
2832 ds_put_format(&actions, "set_queue(%s); ", queue_id);
2833 }
2834 ds_put_cstr(&actions, "next;");
2835 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_L2, 50,
2836 ds_cstr(&match), ds_cstr(&actions));
2837
2838 if (op->nbsp->n_port_security) {
2839 build_port_security_ip(P_IN, op, lflows);
2840 build_port_security_nd(op, lflows);
2841 }
2842 }
2843
2844 /* Ingress table 1 and 2: Port security - IP and ND, by default goto next.
2845 * (priority 0)*/
2846 HMAP_FOR_EACH (od, key_node, datapaths) {
2847 if (!od->nbs) {
2848 continue;
2849 }
2850
2851 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_ND, 0, "1", "next;");
2852 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_IP, 0, "1", "next;");
2853 }
2854
2855 /* Ingress table 10: ARP/ND responder, skip requests coming from localnet
2856 * and vtep ports. (priority 100); see ovn-northd.8.xml for the
2857 * rationale. */
2858 HMAP_FOR_EACH (op, key_node, ports) {
2859 if (!op->nbsp) {
2860 continue;
2861 }
2862
2863 if ((!strcmp(op->nbsp->type, "localnet")) ||
2864 (!strcmp(op->nbsp->type, "vtep"))) {
2865 ds_clear(&match);
2866 ds_put_format(&match, "inport == %s", op->json_key);
2867 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100,
2868 ds_cstr(&match), "next;");
2869 }
2870 }
2871
2872 /* Ingress table 10: ARP/ND responder, reply for known IPs.
2873 * (priority 50). */
2874 HMAP_FOR_EACH (op, key_node, ports) {
2875 if (!op->nbsp) {
2876 continue;
2877 }
2878
2879 /*
2880 * Add ARP/ND reply flows if either the
2881 * - port is up or
2882 * - port type is router
2883 */
2884 if (!lsp_is_up(op->nbsp) && strcmp(op->nbsp->type, "router")) {
2885 continue;
2886 }
2887
2888 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
2889 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
2890 ds_clear(&match);
2891 ds_put_format(&match, "arp.tpa == %s && arp.op == 1",
2892 op->lsp_addrs[i].ipv4_addrs[j].addr_s);
2893 ds_clear(&actions);
2894 ds_put_format(&actions,
2895 "eth.dst = eth.src; "
2896 "eth.src = %s; "
2897 "arp.op = 2; /* ARP reply */ "
2898 "arp.tha = arp.sha; "
2899 "arp.sha = %s; "
2900 "arp.tpa = arp.spa; "
2901 "arp.spa = %s; "
2902 "outport = inport; "
2903 "flags.loopback = 1; "
2904 "output;",
2905 op->lsp_addrs[i].ea_s, op->lsp_addrs[i].ea_s,
2906 op->lsp_addrs[i].ipv4_addrs[j].addr_s);
2907 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50,
2908 ds_cstr(&match), ds_cstr(&actions));
2909
2910 /* Do not reply to an ARP request from the port that owns the
2911 * address (otherwise a DHCP client that ARPs to check for a
2912 * duplicate address will fail). Instead, forward it the usual
2913 * way.
2914 *
2915 * (Another alternative would be to simply drop the packet. If
2916 * everything is working as it is configured, then this would
2917 * produce equivalent results, since no one should reply to the
2918 * request. But ARPing for one's own IP address is intended to
2919 * detect situations where the network is not working as
2920 * configured, so dropping the request would frustrate that
2921 * intent.) */
2922 ds_put_format(&match, " && inport == %s", op->json_key);
2923 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100,
2924 ds_cstr(&match), "next;");
2925 }
2926
2927 /* For ND solicitations, we need to listen for both the
2928 * unicast IPv6 address and its all-nodes multicast address,
2929 * but always respond with the unicast IPv6 address. */
2930 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
2931 ds_clear(&match);
2932 ds_put_format(&match,
2933 "nd_ns && ip6.dst == {%s, %s} && nd.target == %s",
2934 op->lsp_addrs[i].ipv6_addrs[j].addr_s,
2935 op->lsp_addrs[i].ipv6_addrs[j].sn_addr_s,
2936 op->lsp_addrs[i].ipv6_addrs[j].addr_s);
2937
2938 ds_clear(&actions);
2939 ds_put_format(&actions,
2940 "nd_na { "
2941 "eth.src = %s; "
2942 "ip6.src = %s; "
2943 "nd.target = %s; "
2944 "nd.tll = %s; "
2945 "outport = inport; "
2946 "flags.loopback = 1; "
2947 "output; "
2948 "};",
2949 op->lsp_addrs[i].ea_s,
2950 op->lsp_addrs[i].ipv6_addrs[j].addr_s,
2951 op->lsp_addrs[i].ipv6_addrs[j].addr_s,
2952 op->lsp_addrs[i].ea_s);
2953 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50,
2954 ds_cstr(&match), ds_cstr(&actions));
2955
2956 /* Do not reply to a solicitation from the port that owns the
2957 * address (otherwise DAD detection will fail). */
2958 ds_put_format(&match, " && inport == %s", op->json_key);
2959 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100,
2960 ds_cstr(&match), "next;");
2961 }
2962 }
2963 }
2964
2965 /* Ingress table 10: ARP/ND responder, by default goto next.
2966 * (priority 0)*/
2967 HMAP_FOR_EACH (od, key_node, datapaths) {
2968 if (!od->nbs) {
2969 continue;
2970 }
2971
2972 ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_ND_RSP, 0, "1", "next;");
2973 }
2974
2975 /* Logical switch ingress table 11 and 12: DHCP options and response
2976 * priority 100 flows. */
2977 HMAP_FOR_EACH (op, key_node, ports) {
2978 if (!op->nbsp) {
2979 continue;
2980 }
2981
2982 if (!lsp_is_enabled(op->nbsp) || !strcmp(op->nbsp->type, "router")) {
2983 /* Don't add the DHCP flows if the port is not enabled or if the
2984 * port is a router port. */
2985 continue;
2986 }
2987
2988 if (!op->nbsp->dhcpv4_options && !op->nbsp->dhcpv6_options) {
2989 /* CMS has disabled both native DHCPv4 and DHCPv6 for this lport.
2990 */
2991 continue;
2992 }
2993
2994 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
2995 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
2996 struct ds options_action = DS_EMPTY_INITIALIZER;
2997 struct ds response_action = DS_EMPTY_INITIALIZER;
2998 if (build_dhcpv4_action(
2999 op, op->lsp_addrs[i].ipv4_addrs[j].addr,
3000 &options_action, &response_action)) {
3001 struct ds match = DS_EMPTY_INITIALIZER;
3002 ds_put_format(
3003 &match, "inport == %s && eth.src == %s && "
3004 "ip4.src == 0.0.0.0 && ip4.dst == 255.255.255.255 && "
3005 "udp.src == 68 && udp.dst == 67", op->json_key,
3006 op->lsp_addrs[i].ea_s);
3007
3008 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS,
3009 100, ds_cstr(&match),
3010 ds_cstr(&options_action));
3011 /* If REGBIT_DHCP_OPTS_RESULT is set, it means the
3012 * put_dhcp_opts action is successful */
3013 ds_put_cstr(&match, " && "REGBIT_DHCP_OPTS_RESULT);
3014 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_RESPONSE,
3015 100, ds_cstr(&match),
3016 ds_cstr(&response_action));
3017 ds_destroy(&match);
3018 ds_destroy(&options_action);
3019 ds_destroy(&response_action);
3020 break;
3021 }
3022 }
3023
3024 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
3025 struct ds options_action = DS_EMPTY_INITIALIZER;
3026 struct ds response_action = DS_EMPTY_INITIALIZER;
3027 if (build_dhcpv6_action(
3028 op, &op->lsp_addrs[i].ipv6_addrs[j].addr,
3029 &options_action, &response_action)) {
3030 struct ds match = DS_EMPTY_INITIALIZER;
3031 ds_put_format(
3032 &match, "inport == %s && eth.src == %s"
3033 " && ip6.dst == ff02::1:2 && udp.src == 546 &&"
3034 " udp.dst == 547", op->json_key,
3035 op->lsp_addrs[i].ea_s);
3036
3037 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS, 100,
3038 ds_cstr(&match), ds_cstr(&options_action));
3039
3040 /* If REGBIT_DHCP_OPTS_RESULT is set to 1, it means the
3041 * put_dhcpv6_opts action is successful */
3042 ds_put_cstr(&match, " && "REGBIT_DHCP_OPTS_RESULT);
3043 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_RESPONSE, 100,
3044 ds_cstr(&match), ds_cstr(&response_action));
3045 ds_destroy(&match);
3046 ds_destroy(&options_action);
3047 ds_destroy(&response_action);
3048 break;
3049 }
3050 }
3051 }
3052 }
3053
3054 /* Ingress table 11 and 12: DHCP options and response, by default goto next.
3055 * (priority 0). */
3056
3057 HMAP_FOR_EACH (od, key_node, datapaths) {
3058 if (!od->nbs) {
3059 continue;
3060 }
3061
3062 ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_OPTIONS, 0, "1", "next;");
3063 ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_RESPONSE, 0, "1", "next;");
3064 }
3065
3066 /* Ingress table 13: Destination lookup, broadcast and multicast handling
3067 * (priority 100). */
3068 HMAP_FOR_EACH (op, key_node, ports) {
3069 if (!op->nbsp) {
3070 continue;
3071 }
3072
3073 if (lsp_is_enabled(op->nbsp)) {
3074 ovn_multicast_add(mcgroups, &mc_flood, op);
3075 }
3076 }
3077 HMAP_FOR_EACH (od, key_node, datapaths) {
3078 if (!od->nbs) {
3079 continue;
3080 }
3081
3082 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 100, "eth.mcast",
3083 "outport = \""MC_FLOOD"\"; output;");
3084 }
3085
3086 /* Ingress table 13: Destination lookup, unicast handling (priority 50), */
3087 HMAP_FOR_EACH (op, key_node, ports) {
3088 if (!op->nbsp) {
3089 continue;
3090 }
3091
3092 for (size_t i = 0; i < op->nbsp->n_addresses; i++) {
3093 /* Addresses are owned by the logical port.
3094 * Ethernet address followed by zero or more IPv4
3095 * or IPv6 addresses (or both). */
3096 struct eth_addr mac;
3097 if (ovs_scan(op->nbsp->addresses[i],
3098 ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) {
3099 ds_clear(&match);
3100 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
3101 ETH_ADDR_ARGS(mac));
3102
3103 ds_clear(&actions);
3104 ds_put_format(&actions, "outport = %s; output;", op->json_key);
3105 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
3106 ds_cstr(&match), ds_cstr(&actions));
3107 } else if (!strcmp(op->nbsp->addresses[i], "unknown")) {
3108 if (lsp_is_enabled(op->nbsp)) {
3109 ovn_multicast_add(mcgroups, &mc_unknown, op);
3110 op->od->has_unknown = true;
3111 }
3112 } else if (is_dynamic_lsp_address(op->nbsp->addresses[i])) {
3113 if (!op->nbsp->dynamic_addresses
3114 || !ovs_scan(op->nbsp->dynamic_addresses,
3115 ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) {
3116 continue;
3117 }
3118 ds_clear(&match);
3119 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
3120 ETH_ADDR_ARGS(mac));
3121
3122 ds_clear(&actions);
3123 ds_put_format(&actions, "outport = %s; output;", op->json_key);
3124 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
3125 ds_cstr(&match), ds_cstr(&actions));
3126 } else {
3127 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
3128
3129 VLOG_INFO_RL(&rl,
3130 "%s: invalid syntax '%s' in addresses column",
3131 op->nbsp->name, op->nbsp->addresses[i]);
3132 }
3133 }
3134 }
3135
3136 /* Ingress table 13: Destination lookup for unknown MACs (priority 0). */
3137 HMAP_FOR_EACH (od, key_node, datapaths) {
3138 if (!od->nbs) {
3139 continue;
3140 }
3141
3142 if (od->has_unknown) {
3143 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1",
3144 "outport = \""MC_UNKNOWN"\"; output;");
3145 }
3146 }
3147
3148 /* Egress tables 6: Egress port security - IP (priority 0)
3149 * Egress table 7: Egress port security L2 - multicast/broadcast
3150 * (priority 100). */
3151 HMAP_FOR_EACH (od, key_node, datapaths) {
3152 if (!od->nbs) {
3153 continue;
3154 }
3155
3156 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_IP, 0, "1", "next;");
3157 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_L2, 100, "eth.mcast",
3158 "output;");
3159 }
3160
3161 /* Egress table 6: Egress port security - IP (priorities 90 and 80)
3162 * if port security enabled.
3163 *
3164 * Egress table 7: Egress port security - L2 (priorities 50 and 150).
3165 *
3166 * Priority 50 rules implement port security for enabled logical port.
3167 *
3168 * Priority 150 rules drop packets to disabled logical ports, so that they
3169 * don't even receive multicast or broadcast packets. */
3170 HMAP_FOR_EACH (op, key_node, ports) {
3171 if (!op->nbsp) {
3172 continue;
3173 }
3174
3175 ds_clear(&match);
3176 ds_put_format(&match, "outport == %s", op->json_key);
3177 if (lsp_is_enabled(op->nbsp)) {
3178 build_port_security_l2("eth.dst", op->ps_addrs, op->n_ps_addrs,
3179 &match);
3180 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 50,
3181 ds_cstr(&match), "output;");
3182 } else {
3183 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 150,
3184 ds_cstr(&match), "drop;");
3185 }
3186
3187 if (op->nbsp->n_port_security) {
3188 build_port_security_ip(P_OUT, op, lflows);
3189 }
3190 }
3191
3192 ds_destroy(&match);
3193 ds_destroy(&actions);
3194 }
3195
3196 static bool
3197 lrport_is_enabled(const struct nbrec_logical_router_port *lrport)
3198 {
3199 return !lrport->enabled || *lrport->enabled;
3200 }
3201
3202 /* Returns a string of the IP address of the router port 'op' that
3203 * overlaps with 'ip_s". If one is not found, returns NULL.
3204 *
3205 * The caller must not free the returned string. */
3206 static const char *
3207 find_lrp_member_ip(const struct ovn_port *op, const char *ip_s)
3208 {
3209 bool is_ipv4 = strchr(ip_s, '.') ? true : false;
3210
3211 if (is_ipv4) {
3212 ovs_be32 ip;
3213
3214 if (!ip_parse(ip_s, &ip)) {
3215 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3216 VLOG_WARN_RL(&rl, "bad ip address %s", ip_s);
3217 return NULL;
3218 }
3219
3220 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
3221 const struct ipv4_netaddr *na = &op->lrp_networks.ipv4_addrs[i];
3222
3223 if (!((na->network ^ ip) & na->mask)) {
3224 /* There should be only 1 interface that matches the
3225 * supplied IP. Otherwise, it's a configuration error,
3226 * because subnets of a router's interfaces should NOT
3227 * overlap. */
3228 return na->addr_s;
3229 }
3230 }
3231 } else {
3232 struct in6_addr ip6;
3233
3234 if (!ipv6_parse(ip_s, &ip6)) {
3235 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3236 VLOG_WARN_RL(&rl, "bad ipv6 address %s", ip_s);
3237 return NULL;
3238 }
3239
3240 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
3241 const struct ipv6_netaddr *na = &op->lrp_networks.ipv6_addrs[i];
3242 struct in6_addr xor_addr = ipv6_addr_bitxor(&na->network, &ip6);
3243 struct in6_addr and_addr = ipv6_addr_bitand(&xor_addr, &na->mask);
3244
3245 if (ipv6_is_zero(&and_addr)) {
3246 /* There should be only 1 interface that matches the
3247 * supplied IP. Otherwise, it's a configuration error,
3248 * because subnets of a router's interfaces should NOT
3249 * overlap. */
3250 return na->addr_s;
3251 }
3252 }
3253 }
3254
3255 return NULL;
3256 }
3257
3258 static void
3259 add_route(struct hmap *lflows, const struct ovn_port *op,
3260 const char *lrp_addr_s, const char *network_s, int plen,
3261 const char *gateway, const char *policy)
3262 {
3263 bool is_ipv4 = strchr(network_s, '.') ? true : false;
3264 struct ds match = DS_EMPTY_INITIALIZER;
3265 const char *dir;
3266 uint16_t priority;
3267
3268 if (policy && !strcmp(policy, "src-ip")) {
3269 dir = "src";
3270 priority = plen * 2;
3271 } else {
3272 dir = "dst";
3273 priority = (plen * 2) + 1;
3274 }
3275
3276 /* IPv6 link-local addresses must be scoped to the local router port. */
3277 if (!is_ipv4) {
3278 struct in6_addr network;
3279 ovs_assert(ipv6_parse(network_s, &network));
3280 if (in6_is_lla(&network)) {
3281 ds_put_format(&match, "inport == %s && ", op->json_key);
3282 }
3283 }
3284 ds_put_format(&match, "ip%s.%s == %s/%d", is_ipv4 ? "4" : "6", dir,
3285 network_s, plen);
3286
3287 struct ds actions = DS_EMPTY_INITIALIZER;
3288 ds_put_format(&actions, "ip.ttl--; %sreg0 = ", is_ipv4 ? "" : "xx");
3289
3290 if (gateway) {
3291 ds_put_cstr(&actions, gateway);
3292 } else {
3293 ds_put_format(&actions, "ip%s.dst", is_ipv4 ? "4" : "6");
3294 }
3295 ds_put_format(&actions, "; "
3296 "%sreg1 = %s; "
3297 "eth.src = %s; "
3298 "outport = %s; "
3299 "flags.loopback = 1; "
3300 "next;",
3301 is_ipv4 ? "" : "xx",
3302 lrp_addr_s,
3303 op->lrp_networks.ea_s,
3304 op->json_key);
3305
3306 /* The priority here is calculated to implement longest-prefix-match
3307 * routing. */
3308 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_ROUTING, priority,
3309 ds_cstr(&match), ds_cstr(&actions));
3310 ds_destroy(&match);
3311 ds_destroy(&actions);
3312 }
3313
3314 static void
3315 build_static_route_flow(struct hmap *lflows, struct ovn_datapath *od,
3316 struct hmap *ports,
3317 const struct nbrec_logical_router_static_route *route)
3318 {
3319 ovs_be32 nexthop;
3320 const char *lrp_addr_s;
3321 unsigned int plen;
3322 bool is_ipv4;
3323
3324 /* Verify that the next hop is an IP address with an all-ones mask. */
3325 char *error = ip_parse_cidr(route->nexthop, &nexthop, &plen);
3326 if (!error) {
3327 if (plen != 32) {
3328 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3329 VLOG_WARN_RL(&rl, "bad next hop mask %s", route->nexthop);
3330 return;
3331 }
3332 is_ipv4 = true;
3333 } else {
3334 free(error);
3335
3336 struct in6_addr ip6;
3337 char *error = ipv6_parse_cidr(route->nexthop, &ip6, &plen);
3338 if (!error) {
3339 if (plen != 128) {
3340 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3341 VLOG_WARN_RL(&rl, "bad next hop mask %s", route->nexthop);
3342 return;
3343 }
3344 is_ipv4 = false;
3345 } else {
3346 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3347 VLOG_WARN_RL(&rl, "bad next hop ip address %s", route->nexthop);
3348 free(error);
3349 return;
3350 }
3351 }
3352
3353 char *prefix_s;
3354 if (is_ipv4) {
3355 ovs_be32 prefix;
3356 /* Verify that ip prefix is a valid IPv4 address. */
3357 error = ip_parse_cidr(route->ip_prefix, &prefix, &plen);
3358 if (error) {
3359 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3360 VLOG_WARN_RL(&rl, "bad 'ip_prefix' in static routes %s",
3361 route->ip_prefix);
3362 free(error);
3363 return;
3364 }
3365 prefix_s = xasprintf(IP_FMT, IP_ARGS(prefix & be32_prefix_mask(plen)));
3366 } else {
3367 /* Verify that ip prefix is a valid IPv6 address. */
3368 struct in6_addr prefix;
3369 error = ipv6_parse_cidr(route->ip_prefix, &prefix, &plen);
3370 if (error) {
3371 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3372 VLOG_WARN_RL(&rl, "bad 'ip_prefix' in static routes %s",
3373 route->ip_prefix);
3374 free(error);
3375 return;
3376 }
3377 struct in6_addr mask = ipv6_create_mask(plen);
3378 struct in6_addr network = ipv6_addr_bitand(&prefix, &mask);
3379 prefix_s = xmalloc(INET6_ADDRSTRLEN);
3380 inet_ntop(AF_INET6, &network, prefix_s, INET6_ADDRSTRLEN);
3381 }
3382
3383 /* Find the outgoing port. */
3384 struct ovn_port *out_port = NULL;
3385 if (route->output_port) {
3386 out_port = ovn_port_find(ports, route->output_port);
3387 if (!out_port) {
3388 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3389 VLOG_WARN_RL(&rl, "Bad out port %s for static route %s",
3390 route->output_port, route->ip_prefix);
3391 goto free_prefix_s;
3392 }
3393 lrp_addr_s = find_lrp_member_ip(out_port, route->nexthop);
3394 } else {
3395 /* output_port is not specified, find the
3396 * router port matching the next hop. */
3397 int i;
3398 for (i = 0; i < od->nbr->n_ports; i++) {
3399 struct nbrec_logical_router_port *lrp = od->nbr->ports[i];
3400 out_port = ovn_port_find(ports, lrp->name);
3401 if (!out_port) {
3402 /* This should not happen. */
3403 continue;
3404 }
3405
3406 lrp_addr_s = find_lrp_member_ip(out_port, route->nexthop);
3407 if (lrp_addr_s) {
3408 break;
3409 }
3410 }
3411 }
3412
3413 if (!lrp_addr_s) {
3414 /* There is no matched out port. */
3415 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3416 VLOG_WARN_RL(&rl, "No path for static route %s; next hop %s",
3417 route->ip_prefix, route->nexthop);
3418 goto free_prefix_s;
3419 }
3420
3421 char *policy = route->policy ? route->policy : "dst-ip";
3422 add_route(lflows, out_port, lrp_addr_s, prefix_s, plen, route->nexthop,
3423 policy);
3424
3425 free_prefix_s:
3426 free(prefix_s);
3427 }
3428
3429 static void
3430 op_put_v4_networks(struct ds *ds, const struct ovn_port *op, bool add_bcast)
3431 {
3432 if (!add_bcast && op->lrp_networks.n_ipv4_addrs == 1) {
3433 ds_put_format(ds, "%s", op->lrp_networks.ipv4_addrs[0].addr_s);
3434 return;
3435 }
3436
3437 ds_put_cstr(ds, "{");
3438 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
3439 ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].addr_s);
3440 if (add_bcast) {
3441 ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].bcast_s);
3442 }
3443 }
3444 ds_chomp(ds, ' ');
3445 ds_chomp(ds, ',');
3446 ds_put_cstr(ds, "}");
3447 }
3448
3449 static void
3450 op_put_v6_networks(struct ds *ds, const struct ovn_port *op)
3451 {
3452 if (op->lrp_networks.n_ipv6_addrs == 1) {
3453 ds_put_format(ds, "%s", op->lrp_networks.ipv6_addrs[0].addr_s);
3454 return;
3455 }
3456
3457 ds_put_cstr(ds, "{");
3458 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
3459 ds_put_format(ds, "%s, ", op->lrp_networks.ipv6_addrs[i].addr_s);
3460 }
3461 ds_chomp(ds, ' ');
3462 ds_chomp(ds, ',');
3463 ds_put_cstr(ds, "}");
3464 }
3465
3466 static const char *
3467 get_force_snat_ip(struct ovn_datapath *od, const char *key_type, ovs_be32 *ip)
3468 {
3469 char *key = xasprintf("%s_force_snat_ip", key_type);
3470 const char *ip_address = smap_get(&od->nbr->options, key);
3471 free(key);
3472
3473 if (ip_address) {
3474 ovs_be32 mask;
3475 char *error = ip_parse_masked(ip_address, ip, &mask);
3476 if (error || mask != OVS_BE32_MAX) {
3477 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3478 VLOG_WARN_RL(&rl, "bad ip %s in options of router "UUID_FMT"",
3479 ip_address, UUID_ARGS(&od->key));
3480 free(error);
3481 *ip = 0;
3482 return NULL;
3483 }
3484 return ip_address;
3485 }
3486
3487 *ip = 0;
3488 return NULL;
3489 }
3490
3491 static void
3492 add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od,
3493 struct ds *match, struct ds *actions, int priority,
3494 const char *lb_force_snat_ip)
3495 {
3496 /* A match and actions for new connections. */
3497 char *new_match = xasprintf("ct.new && %s", ds_cstr(match));
3498 if (lb_force_snat_ip) {
3499 char *new_actions = xasprintf("flags.force_snat_for_lb = 1; %s",
3500 ds_cstr(actions));
3501 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, new_match,
3502 new_actions);
3503 free(new_actions);
3504 } else {
3505 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, new_match,
3506 ds_cstr(actions));
3507 }
3508
3509 /* A match and actions for established connections. */
3510 char *est_match = xasprintf("ct.est && %s", ds_cstr(match));
3511 if (lb_force_snat_ip) {
3512 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, est_match,
3513 "flags.force_snat_for_lb = 1; ct_dnat;");
3514 } else {
3515 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, est_match,
3516 "ct_dnat;");
3517 }
3518
3519 free(new_match);
3520 free(est_match);
3521 }
3522
3523 static void
3524 build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
3525 struct hmap *lflows)
3526 {
3527 /* This flow table structure is documented in ovn-northd(8), so please
3528 * update ovn-northd.8.xml if you change anything. */
3529
3530 struct ds match = DS_EMPTY_INITIALIZER;
3531 struct ds actions = DS_EMPTY_INITIALIZER;
3532
3533 /* Logical router ingress table 0: Admission control framework. */
3534 struct ovn_datapath *od;
3535 HMAP_FOR_EACH (od, key_node, datapaths) {
3536 if (!od->nbr) {
3537 continue;
3538 }
3539
3540 /* Logical VLANs not supported.
3541 * Broadcast/multicast source address is invalid. */
3542 ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100,
3543 "vlan.present || eth.src[40]", "drop;");
3544 }
3545
3546 /* Logical router ingress table 0: match (priority 50). */
3547 struct ovn_port *op;
3548 HMAP_FOR_EACH (op, key_node, ports) {
3549 if (!op->nbrp) {
3550 continue;
3551 }
3552
3553 if (!lrport_is_enabled(op->nbrp)) {
3554 /* Drop packets from disabled logical ports (since logical flow
3555 * tables are default-drop). */
3556 continue;
3557 }
3558
3559 ds_clear(&match);
3560 ds_put_format(&match, "(eth.mcast || eth.dst == %s) && inport == %s",
3561 op->lrp_networks.ea_s, op->json_key);
3562 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
3563 ds_cstr(&match), "next;");
3564 }
3565
3566 /* Logical router ingress table 1: IP Input. */
3567 HMAP_FOR_EACH (od, key_node, datapaths) {
3568 if (!od->nbr) {
3569 continue;
3570 }
3571
3572 /* L3 admission control: drop multicast and broadcast source, localhost
3573 * source or destination, and zero network source or destination
3574 * (priority 100). */
3575 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100,
3576 "ip4.mcast || "
3577 "ip4.src == 255.255.255.255 || "
3578 "ip4.src == 127.0.0.0/8 || "
3579 "ip4.dst == 127.0.0.0/8 || "
3580 "ip4.src == 0.0.0.0/8 || "
3581 "ip4.dst == 0.0.0.0/8",
3582 "drop;");
3583
3584 /* ARP reply handling. Use ARP replies to populate the logical
3585 * router's ARP table. */
3586 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "arp.op == 2",
3587 "put_arp(inport, arp.spa, arp.sha);");
3588
3589 /* Drop Ethernet local broadcast. By definition this traffic should
3590 * not be forwarded.*/
3591 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
3592 "eth.bcast", "drop;");
3593
3594 /* TTL discard.
3595 *
3596 * XXX Need to send ICMP time exceeded if !ip.later_frag. */
3597 ds_clear(&match);
3598 ds_put_cstr(&match, "ip4 && ip.ttl == {0, 1}");
3599 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30,
3600 ds_cstr(&match), "drop;");
3601
3602 /* ND advertisement handling. Use advertisements to populate
3603 * the logical router's ARP/ND table. */
3604 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "nd_na",
3605 "put_nd(inport, nd.target, nd.tll);");
3606
3607 /* Lean from neighbor solicitations that were not directed at
3608 * us. (A priority-90 flow will respond to requests to us and
3609 * learn the sender's mac address. */
3610 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 80, "nd_ns",
3611 "put_nd(inport, ip6.src, nd.sll);");
3612
3613 /* Pass other traffic not already handled to the next table for
3614 * routing. */
3615 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;");
3616 }
3617
3618 /* Logical router ingress table 1: IP Input for IPv4. */
3619 HMAP_FOR_EACH (op, key_node, ports) {
3620 if (!op->nbrp) {
3621 continue;
3622 }
3623
3624
3625 if (op->lrp_networks.n_ipv4_addrs) {
3626 /* L3 admission control: drop packets that originate from an
3627 * IPv4 address owned by the router or a broadcast address
3628 * known to the router (priority 100). */
3629 ds_clear(&match);
3630 ds_put_cstr(&match, "ip4.src == ");
3631 op_put_v4_networks(&match, op, true);
3632 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
3633 ds_cstr(&match), "drop;");
3634
3635 /* ICMP echo reply. These flows reply to ICMP echo requests
3636 * received for the router's IP address. Since packets only
3637 * get here as part of the logical router datapath, the inport
3638 * (i.e. the incoming locally attached net) does not matter.
3639 * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */
3640 ds_clear(&match);
3641 ds_put_cstr(&match, "ip4.dst == ");
3642 op_put_v4_networks(&match, op, false);
3643 ds_put_cstr(&match, " && icmp4.type == 8 && icmp4.code == 0");
3644
3645 ds_clear(&actions);
3646 ds_put_format(&actions,
3647 "ip4.dst <-> ip4.src; "
3648 "ip.ttl = 255; "
3649 "icmp4.type = 0; "
3650 "flags.loopback = 1; "
3651 "next; ");
3652 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
3653 ds_cstr(&match), ds_cstr(&actions));
3654 }
3655
3656 /* ARP reply. These flows reply to ARP requests for the router's own
3657 * IP address. */
3658 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
3659 ds_clear(&match);
3660 ds_put_format(&match,
3661 "inport == %s && arp.tpa == %s && arp.op == 1",
3662 op->json_key, op->lrp_networks.ipv4_addrs[i].addr_s);
3663
3664 ds_clear(&actions);
3665 ds_put_format(&actions,
3666 "eth.dst = eth.src; "
3667 "eth.src = %s; "
3668 "arp.op = 2; /* ARP reply */ "
3669 "arp.tha = arp.sha; "
3670 "arp.sha = %s; "
3671 "arp.tpa = arp.spa; "
3672 "arp.spa = %s; "
3673 "outport = %s; "
3674 "flags.loopback = 1; "
3675 "output;",
3676 op->lrp_networks.ea_s,
3677 op->lrp_networks.ea_s,
3678 op->lrp_networks.ipv4_addrs[i].addr_s,
3679 op->json_key);
3680 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
3681 ds_cstr(&match), ds_cstr(&actions));
3682 }
3683
3684 /* A set to hold all load-balancer vips that need ARP responses. */
3685 struct sset all_ips = SSET_INITIALIZER(&all_ips);
3686
3687 for (int i = 0; i < op->od->nbr->n_load_balancer; i++) {
3688 struct nbrec_load_balancer *lb = op->od->nbr->load_balancer[i];
3689 struct smap *vips = &lb->vips;
3690 struct smap_node *node;
3691
3692 SMAP_FOR_EACH (node, vips) {
3693 /* node->key contains IP:port or just IP. */
3694 char *ip_address = NULL;
3695 uint16_t port;
3696
3697 ip_address_and_port_from_lb_key(node->key, &ip_address, &port);
3698 if (!ip_address) {
3699 continue;
3700 }
3701
3702 if (!sset_contains(&all_ips, ip_address)) {
3703 sset_add(&all_ips, ip_address);
3704 }
3705
3706 free(ip_address);
3707 }
3708 }
3709
3710 const char *ip_address;
3711 SSET_FOR_EACH(ip_address, &all_ips) {
3712 ovs_be32 ip;
3713 if (!ip_parse(ip_address, &ip) || !ip) {
3714 continue;
3715 }
3716
3717 ds_clear(&match);
3718 ds_put_format(&match,
3719 "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1",
3720 op->json_key, IP_ARGS(ip));
3721
3722 ds_clear(&actions);
3723 ds_put_format(&actions,
3724 "eth.dst = eth.src; "
3725 "eth.src = %s; "
3726 "arp.op = 2; /* ARP reply */ "
3727 "arp.tha = arp.sha; "
3728 "arp.sha = %s; "
3729 "arp.tpa = arp.spa; "
3730 "arp.spa = "IP_FMT"; "
3731 "outport = %s; "
3732 "flags.loopback = 1; "
3733 "output;",
3734 op->lrp_networks.ea_s,
3735 op->lrp_networks.ea_s,
3736 IP_ARGS(ip),
3737 op->json_key);
3738 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
3739 ds_cstr(&match), ds_cstr(&actions));
3740 }
3741
3742 sset_destroy(&all_ips);
3743
3744 /* A gateway router can have 2 SNAT IP addresses to force DNATed and
3745 * LBed traffic respectively to be SNATed. In addition, there can be
3746 * a number of SNAT rules in the NAT table. */
3747 ovs_be32 *snat_ips = xmalloc(sizeof *snat_ips *
3748 (op->od->nbr->n_nat + 2));
3749 size_t n_snat_ips = 0;
3750
3751 ovs_be32 snat_ip;
3752 const char *dnat_force_snat_ip = get_force_snat_ip(op->od, "dnat",
3753 &snat_ip);
3754 if (dnat_force_snat_ip) {
3755 snat_ips[n_snat_ips++] = snat_ip;
3756 }
3757
3758 const char *lb_force_snat_ip = get_force_snat_ip(op->od, "lb",
3759 &snat_ip);
3760 if (lb_force_snat_ip) {
3761 snat_ips[n_snat_ips++] = snat_ip;
3762 }
3763
3764 for (int i = 0; i < op->od->nbr->n_nat; i++) {
3765 const struct nbrec_nat *nat;
3766
3767 nat = op->od->nbr->nat[i];
3768
3769 ovs_be32 ip;
3770 if (!ip_parse(nat->external_ip, &ip) || !ip) {
3771 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3772 VLOG_WARN_RL(&rl, "bad ip address %s in nat configuration "
3773 "for router %s", nat->external_ip, op->key);
3774 continue;
3775 }
3776
3777 if (!strcmp(nat->type, "snat")) {
3778 snat_ips[n_snat_ips++] = ip;
3779 continue;
3780 }
3781
3782 /* ARP handling for external IP addresses.
3783 *
3784 * DNAT IP addresses are external IP addresses that need ARP
3785 * handling. */
3786 ds_clear(&match);
3787 ds_put_format(&match,
3788 "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1",
3789 op->json_key, IP_ARGS(ip));
3790
3791 ds_clear(&actions);
3792 ds_put_format(&actions,
3793 "eth.dst = eth.src; "
3794 "eth.src = %s; "
3795 "arp.op = 2; /* ARP reply */ "
3796 "arp.tha = arp.sha; "
3797 "arp.sha = %s; "
3798 "arp.tpa = arp.spa; "
3799 "arp.spa = "IP_FMT"; "
3800 "outport = %s; "
3801 "flags.loopback = 1; "
3802 "output;",
3803 op->lrp_networks.ea_s,
3804 op->lrp_networks.ea_s,
3805 IP_ARGS(ip),
3806 op->json_key);
3807 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
3808 ds_cstr(&match), ds_cstr(&actions));
3809 }
3810
3811 ds_clear(&match);
3812 ds_put_cstr(&match, "ip4.dst == {");
3813 bool has_drop_ips = false;
3814 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
3815 bool snat_ip_is_router_ip = false;
3816 for (int j = 0; j < n_snat_ips; j++) {
3817 /* Packets to SNAT IPs should not be dropped. */
3818 if (op->lrp_networks.ipv4_addrs[i].addr == snat_ips[j]) {
3819 snat_ip_is_router_ip = true;
3820 break;
3821 }
3822 }
3823 if (snat_ip_is_router_ip) {
3824 continue;
3825 }
3826 ds_put_format(&match, "%s, ",
3827 op->lrp_networks.ipv4_addrs[i].addr_s);
3828 has_drop_ips = true;
3829 }
3830 ds_chomp(&match, ' ');
3831 ds_chomp(&match, ',');
3832 ds_put_cstr(&match, "}");
3833
3834 if (has_drop_ips) {
3835 /* Drop IP traffic to this router. */
3836 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
3837 ds_cstr(&match), "drop;");
3838 }
3839
3840 free(snat_ips);
3841 }
3842
3843 /* Logical router ingress table 1: IP Input for IPv6. */
3844 HMAP_FOR_EACH (op, key_node, ports) {
3845 if (!op->nbrp) {
3846 continue;
3847 }
3848
3849 if (op->lrp_networks.n_ipv6_addrs) {
3850 /* L3 admission control: drop packets that originate from an
3851 * IPv6 address owned by the router (priority 100). */
3852 ds_clear(&match);
3853 ds_put_cstr(&match, "ip6.src == ");
3854 op_put_v6_networks(&match, op);
3855 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
3856 ds_cstr(&match), "drop;");
3857
3858 /* ICMPv6 echo reply. These flows reply to echo requests
3859 * received for the router's IP address. */
3860 ds_clear(&match);
3861 ds_put_cstr(&match, "ip6.dst == ");
3862 op_put_v6_networks(&match, op);
3863 ds_put_cstr(&match, " && icmp6.type == 128 && icmp6.code == 0");
3864
3865 ds_clear(&actions);
3866 ds_put_cstr(&actions,
3867 "ip6.dst <-> ip6.src; "
3868 "ip.ttl = 255; "
3869 "icmp6.type = 129; "
3870 "flags.loopback = 1; "
3871 "next; ");
3872 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
3873 ds_cstr(&match), ds_cstr(&actions));
3874
3875 /* Drop IPv6 traffic to this router. */
3876 ds_clear(&match);
3877 ds_put_cstr(&match, "ip6.dst == ");
3878 op_put_v6_networks(&match, op);
3879 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
3880 ds_cstr(&match), "drop;");
3881 }
3882
3883 /* ND reply. These flows reply to ND solicitations for the
3884 * router's own IP address. */
3885 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
3886 ds_clear(&match);
3887 ds_put_format(&match,
3888 "inport == %s && nd_ns && ip6.dst == {%s, %s} "
3889 "&& nd.target == %s",
3890 op->json_key,
3891 op->lrp_networks.ipv6_addrs[i].addr_s,
3892 op->lrp_networks.ipv6_addrs[i].sn_addr_s,
3893 op->lrp_networks.ipv6_addrs[i].addr_s);
3894
3895 ds_clear(&actions);
3896 ds_put_format(&actions,
3897 "put_nd(inport, ip6.src, nd.sll); "
3898 "nd_na { "
3899 "eth.src = %s; "
3900 "ip6.src = %s; "
3901 "nd.target = %s; "
3902 "nd.tll = %s; "
3903 "outport = inport; "
3904 "flags.loopback = 1; "
3905 "output; "
3906 "};",
3907 op->lrp_networks.ea_s,
3908 op->lrp_networks.ipv6_addrs[i].addr_s,
3909 op->lrp_networks.ipv6_addrs[i].addr_s,
3910 op->lrp_networks.ea_s);
3911 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
3912 ds_cstr(&match), ds_cstr(&actions));
3913 }
3914 }
3915
3916 /* NAT, Defrag and load balancing in Gateway routers. */
3917 HMAP_FOR_EACH (od, key_node, datapaths) {
3918 if (!od->nbr) {
3919 continue;
3920 }
3921
3922 /* Packets are allowed by default. */
3923 ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG, 0, "1", "next;");
3924 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;");
3925 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;");
3926 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;");
3927
3928 /* NAT rules, packet defrag and load balancing are only valid on
3929 * Gateway routers. */
3930 if (!smap_get(&od->nbr->options, "chassis")) {
3931 continue;
3932 }
3933
3934 ovs_be32 snat_ip;
3935 const char *dnat_force_snat_ip = get_force_snat_ip(od, "dnat",
3936 &snat_ip);
3937 const char *lb_force_snat_ip = get_force_snat_ip(od, "lb",
3938 &snat_ip);
3939
3940 /* A set to hold all ips that need defragmentation and tracking. */
3941 struct sset all_ips = SSET_INITIALIZER(&all_ips);
3942
3943 for (int i = 0; i < od->nbr->n_load_balancer; i++) {
3944 struct nbrec_load_balancer *lb = od->nbr->load_balancer[i];
3945 struct smap *vips = &lb->vips;
3946 struct smap_node *node;
3947
3948 SMAP_FOR_EACH (node, vips) {
3949 uint16_t port = 0;
3950
3951 /* node->key contains IP:port or just IP. */
3952 char *ip_address = NULL;
3953 ip_address_and_port_from_lb_key(node->key, &ip_address, &port);
3954 if (!ip_address) {
3955 continue;
3956 }
3957
3958 if (!sset_contains(&all_ips, ip_address)) {
3959 sset_add(&all_ips, ip_address);
3960 }
3961
3962 /* Higher priority rules are added for load-balancing in DNAT
3963 * table. For every match (on a VIP[:port]), we add two flows
3964 * via add_router_lb_flow(). One flow is for specific matching
3965 * on ct.new with an action of "ct_lb($targets);". The other
3966 * flow is for ct.est with an action of "ct_dnat;". */
3967 ds_clear(&actions);
3968 ds_put_format(&actions, "ct_lb(%s);", node->value);
3969
3970 ds_clear(&match);
3971 ds_put_format(&match, "ip && ip4.dst == %s",
3972 ip_address);
3973 free(ip_address);
3974
3975 if (port) {
3976 if (lb->protocol && !strcmp(lb->protocol, "udp")) {
3977 ds_put_format(&match, " && udp && udp.dst == %d",
3978 port);
3979 } else {
3980 ds_put_format(&match, " && tcp && tcp.dst == %d",
3981 port);
3982 }
3983 add_router_lb_flow(lflows, od, &match, &actions, 120,
3984 lb_force_snat_ip);
3985 } else {
3986 add_router_lb_flow(lflows, od, &match, &actions, 110,
3987 lb_force_snat_ip);
3988 }
3989 }
3990 }
3991
3992 /* If there are any load balancing rules, we should send the
3993 * packet to conntrack for defragmentation and tracking. This helps
3994 * with two things.
3995 *
3996 * 1. With tracking, we can send only new connections to pick a
3997 * DNAT ip address from a group.
3998 * 2. If there are L4 ports in load balancing rules, we need the
3999 * defragmentation to match on L4 ports. */
4000 const char *ip_address;
4001 SSET_FOR_EACH(ip_address, &all_ips) {
4002 ds_clear(&match);
4003 ds_put_format(&match, "ip && ip4.dst == %s", ip_address);
4004 ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG,
4005 100, ds_cstr(&match), "ct_next;");
4006 }
4007
4008 sset_destroy(&all_ips);
4009
4010 for (int i = 0; i < od->nbr->n_nat; i++) {
4011 const struct nbrec_nat *nat;
4012
4013 nat = od->nbr->nat[i];
4014
4015 ovs_be32 ip, mask;
4016
4017 char *error = ip_parse_masked(nat->external_ip, &ip, &mask);
4018 if (error || mask != OVS_BE32_MAX) {
4019 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4020 VLOG_WARN_RL(&rl, "bad external ip %s for nat",
4021 nat->external_ip);
4022 free(error);
4023 continue;
4024 }
4025
4026 /* Check the validity of nat->logical_ip. 'logical_ip' can
4027 * be a subnet when the type is "snat". */
4028 error = ip_parse_masked(nat->logical_ip, &ip, &mask);
4029 if (!strcmp(nat->type, "snat")) {
4030 if (error) {
4031 static struct vlog_rate_limit rl =
4032 VLOG_RATE_LIMIT_INIT(5, 1);
4033 VLOG_WARN_RL(&rl, "bad ip network or ip %s for snat "
4034 "in router "UUID_FMT"",
4035 nat->logical_ip, UUID_ARGS(&od->key));
4036 free(error);
4037 continue;
4038 }
4039 } else {
4040 if (error || mask != OVS_BE32_MAX) {
4041 static struct vlog_rate_limit rl =
4042 VLOG_RATE_LIMIT_INIT(5, 1);
4043 VLOG_WARN_RL(&rl, "bad ip %s for dnat in router "
4044 ""UUID_FMT"", nat->logical_ip, UUID_ARGS(&od->key));
4045 free(error);
4046 continue;
4047 }
4048 }
4049
4050 /* Ingress UNSNAT table: It is for already established connections'
4051 * reverse traffic. i.e., SNAT has already been done in egress
4052 * pipeline and now the packet has entered the ingress pipeline as
4053 * part of a reply. We undo the SNAT here.
4054 *
4055 * Undoing SNAT has to happen before DNAT processing. This is
4056 * because when the packet was DNATed in ingress pipeline, it did
4057 * not know about the possibility of eventual additional SNAT in
4058 * egress pipeline. */
4059 if (!strcmp(nat->type, "snat")
4060 || !strcmp(nat->type, "dnat_and_snat")) {
4061 ds_clear(&match);
4062 ds_put_format(&match, "ip && ip4.dst == %s", nat->external_ip);
4063 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 90,
4064 ds_cstr(&match), "ct_snat; next;");
4065 }
4066
4067 /* Ingress DNAT table: Packets enter the pipeline with destination
4068 * IP address that needs to be DNATted from a external IP address
4069 * to a logical IP address. */
4070 if (!strcmp(nat->type, "dnat")
4071 || !strcmp(nat->type, "dnat_and_snat")) {
4072 /* Packet when it goes from the initiator to destination.
4073 * We need to zero the inport because the router can
4074 * send the packet back through the same interface. */
4075 ds_clear(&match);
4076 ds_put_format(&match, "ip && ip4.dst == %s", nat->external_ip);
4077 ds_clear(&actions);
4078 if (dnat_force_snat_ip) {
4079 /* Indicate to the future tables that a DNAT has taken
4080 * place and a force SNAT needs to be done in the Egress
4081 * SNAT table. */
4082 ds_put_format(&actions, "flags.force_snat_for_dnat = 1; ");
4083 }
4084 ds_put_format(&actions, "flags.loopback = 1; ct_dnat(%s);",
4085 nat->logical_ip);
4086 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100,
4087 ds_cstr(&match), ds_cstr(&actions));
4088 }
4089
4090 /* Egress SNAT table: Packets enter the egress pipeline with
4091 * source ip address that needs to be SNATted to a external ip
4092 * address. */
4093 if (!strcmp(nat->type, "snat")
4094 || !strcmp(nat->type, "dnat_and_snat")) {
4095 ds_clear(&match);
4096 ds_put_format(&match, "ip && ip4.src == %s", nat->logical_ip);
4097 ds_clear(&actions);
4098 ds_put_format(&actions, "ct_snat(%s);", nat->external_ip);
4099
4100 /* The priority here is calculated such that the
4101 * nat->logical_ip with the longest mask gets a higher
4102 * priority. */
4103 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT,
4104 count_1bits(ntohl(mask)) + 1,
4105 ds_cstr(&match), ds_cstr(&actions));
4106 }
4107 }
4108
4109 /* Handle force SNAT options set in the gateway router. */
4110 if (dnat_force_snat_ip) {
4111 /* If a packet with destination IP address as that of the
4112 * gateway router (as set in options:dnat_force_snat_ip) is seen,
4113 * UNSNAT it. */
4114 ds_clear(&match);
4115 ds_put_format(&match, "ip && ip4.dst == %s", dnat_force_snat_ip);
4116 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 110,
4117 ds_cstr(&match), "ct_snat; next;");
4118
4119 /* Higher priority rules to force SNAT with the IP addresses
4120 * configured in the Gateway router. This only takes effect
4121 * when the packet has already been DNATed once. */
4122 ds_clear(&match);
4123 ds_put_format(&match, "flags.force_snat_for_dnat == 1 && ip");
4124 ds_clear(&actions);
4125 ds_put_format(&actions, "ct_snat(%s);", dnat_force_snat_ip);
4126 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 100,
4127 ds_cstr(&match), ds_cstr(&actions));
4128 }
4129 if (lb_force_snat_ip) {
4130 /* If a packet with destination IP address as that of the
4131 * gateway router (as set in options:lb_force_snat_ip) is seen,
4132 * UNSNAT it. */
4133 ds_clear(&match);
4134 ds_put_format(&match, "ip && ip4.dst == %s", lb_force_snat_ip);
4135 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 100,
4136 ds_cstr(&match), "ct_snat; next;");
4137
4138 /* Load balanced traffic will have flags.force_snat_for_lb set.
4139 * Force SNAT it. */
4140 ds_clear(&match);
4141 ds_put_format(&match, "flags.force_snat_for_lb == 1 && ip");
4142 ds_clear(&actions);
4143 ds_put_format(&actions, "ct_snat(%s);", lb_force_snat_ip);
4144 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 100,
4145 ds_cstr(&match), ds_cstr(&actions));
4146 }
4147
4148 /* Re-circulate every packet through the DNAT zone.
4149 * This helps with two things.
4150 *
4151 * 1. Any packet that needs to be unDNATed in the reverse
4152 * direction gets unDNATed. Ideally this could be done in
4153 * the egress pipeline. But since the gateway router
4154 * does not have any feature that depends on the source
4155 * ip address being external IP address for IP routing,
4156 * we can do it here, saving a future re-circulation.
4157 *
4158 * 2. Any packet that was sent through SNAT zone in the
4159 * previous table automatically gets re-circulated to get
4160 * back the new destination IP address that is needed for
4161 * routing in the openflow pipeline. */
4162 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50,
4163 "ip", "flags.loopback = 1; ct_dnat;");
4164 }
4165
4166 /* Logical router ingress table 5: IP Routing.
4167 *
4168 * A packet that arrives at this table is an IP packet that should be
4169 * routed to the address in 'ip[46].dst'. This table sets outport to
4170 * the correct output port, eth.src to the output port's MAC
4171 * address, and '[xx]reg0' to the next-hop IP address (leaving
4172 * 'ip[46].dst', the packet’s final destination, unchanged), and
4173 * advances to the next table for ARP/ND resolution. */
4174 HMAP_FOR_EACH (op, key_node, ports) {
4175 if (!op->nbrp) {
4176 continue;
4177 }
4178
4179 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
4180 add_route(lflows, op, op->lrp_networks.ipv4_addrs[i].addr_s,
4181 op->lrp_networks.ipv4_addrs[i].network_s,
4182 op->lrp_networks.ipv4_addrs[i].plen, NULL, NULL);
4183 }
4184
4185 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
4186 add_route(lflows, op, op->lrp_networks.ipv6_addrs[i].addr_s,
4187 op->lrp_networks.ipv6_addrs[i].network_s,
4188 op->lrp_networks.ipv6_addrs[i].plen, NULL, NULL);
4189 }
4190 }
4191
4192 /* Convert the static routes to flows. */
4193 HMAP_FOR_EACH (od, key_node, datapaths) {
4194 if (!od->nbr) {
4195 continue;
4196 }
4197
4198 for (int i = 0; i < od->nbr->n_static_routes; i++) {
4199 const struct nbrec_logical_router_static_route *route;
4200
4201 route = od->nbr->static_routes[i];
4202 build_static_route_flow(lflows, od, ports, route);
4203 }
4204 }
4205
4206 /* XXX destination unreachable */
4207
4208 /* Local router ingress table 6: ARP Resolution.
4209 *
4210 * Any packet that reaches this table is an IP packet whose next-hop IP
4211 * address is in reg0. (ip4.dst is the final destination.) This table
4212 * resolves the IP address in reg0 into an output port in outport and an
4213 * Ethernet address in eth.dst. */
4214 HMAP_FOR_EACH (op, key_node, ports) {
4215 if (op->nbrp) {
4216 /* This is a logical router port. If next-hop IP address in
4217 * '[xx]reg0' matches IP address of this router port, then
4218 * the packet is intended to eventually be sent to this
4219 * logical port. Set the destination mac address using this
4220 * port's mac address.
4221 *
4222 * The packet is still in peer's logical pipeline. So the match
4223 * should be on peer's outport. */
4224 if (op->peer && op->nbrp->peer) {
4225 if (op->lrp_networks.n_ipv4_addrs) {
4226 ds_clear(&match);
4227 ds_put_format(&match, "outport == %s && reg0 == ",
4228 op->peer->json_key);
4229 op_put_v4_networks(&match, op, false);
4230
4231 ds_clear(&actions);
4232 ds_put_format(&actions, "eth.dst = %s; next;",
4233 op->lrp_networks.ea_s);
4234 ovn_lflow_add(lflows, op->peer->od, S_ROUTER_IN_ARP_RESOLVE,
4235 100, ds_cstr(&match), ds_cstr(&actions));
4236 }
4237
4238 if (op->lrp_networks.n_ipv6_addrs) {
4239 ds_clear(&match);
4240 ds_put_format(&match, "outport == %s && xxreg0 == ",
4241 op->peer->json_key);
4242 op_put_v6_networks(&match, op);
4243
4244 ds_clear(&actions);
4245 ds_put_format(&actions, "eth.dst = %s; next;",
4246 op->lrp_networks.ea_s);
4247 ovn_lflow_add(lflows, op->peer->od, S_ROUTER_IN_ARP_RESOLVE,
4248 100, ds_cstr(&match), ds_cstr(&actions));
4249 }
4250 }
4251 } else if (op->od->n_router_ports && strcmp(op->nbsp->type, "router")) {
4252 /* This is a logical switch port that backs a VM or a container.
4253 * Extract its addresses. For each of the address, go through all
4254 * the router ports attached to the switch (to which this port
4255 * connects) and if the address in question is reachable from the
4256 * router port, add an ARP/ND entry in that router's pipeline. */
4257
4258 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
4259 const char *ea_s = op->lsp_addrs[i].ea_s;
4260 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
4261 const char *ip_s = op->lsp_addrs[i].ipv4_addrs[j].addr_s;
4262 for (size_t k = 0; k < op->od->n_router_ports; k++) {
4263 /* Get the Logical_Router_Port that the
4264 * Logical_Switch_Port is connected to, as
4265 * 'peer'. */
4266 const char *peer_name = smap_get(
4267 &op->od->router_ports[k]->nbsp->options,
4268 "router-port");
4269 if (!peer_name) {
4270 continue;
4271 }
4272
4273 struct ovn_port *peer = ovn_port_find(ports, peer_name);
4274 if (!peer || !peer->nbrp) {
4275 continue;
4276 }
4277
4278 if (!find_lrp_member_ip(peer, ip_s)) {
4279 continue;
4280 }
4281
4282 ds_clear(&match);
4283 ds_put_format(&match, "outport == %s && reg0 == %s",
4284 peer->json_key, ip_s);
4285
4286 ds_clear(&actions);
4287 ds_put_format(&actions, "eth.dst = %s; next;", ea_s);
4288 ovn_lflow_add(lflows, peer->od,
4289 S_ROUTER_IN_ARP_RESOLVE, 100,
4290 ds_cstr(&match), ds_cstr(&actions));
4291 }
4292 }
4293
4294 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
4295 const char *ip_s = op->lsp_addrs[i].ipv6_addrs[j].addr_s;
4296 for (size_t k = 0; k < op->od->n_router_ports; k++) {
4297 /* Get the Logical_Router_Port that the
4298 * Logical_Switch_Port is connected to, as
4299 * 'peer'. */
4300 const char *peer_name = smap_get(
4301 &op->od->router_ports[k]->nbsp->options,
4302 "router-port");
4303 if (!peer_name) {
4304 continue;
4305 }
4306
4307 struct ovn_port *peer = ovn_port_find(ports, peer_name);
4308 if (!peer || !peer->nbrp) {
4309 continue;
4310 }
4311
4312 if (!find_lrp_member_ip(peer, ip_s)) {
4313 continue;
4314 }
4315
4316 ds_clear(&match);
4317 ds_put_format(&match, "outport == %s && xxreg0 == %s",
4318 peer->json_key, ip_s);
4319
4320 ds_clear(&actions);
4321 ds_put_format(&actions, "eth.dst = %s; next;", ea_s);
4322 ovn_lflow_add(lflows, peer->od,
4323 S_ROUTER_IN_ARP_RESOLVE, 100,
4324 ds_cstr(&match), ds_cstr(&actions));
4325 }
4326 }
4327 }
4328 } else if (!strcmp(op->nbsp->type, "router")) {
4329 /* This is a logical switch port that connects to a router. */
4330
4331 /* The peer of this switch port is the router port for which
4332 * we need to add logical flows such that it can resolve
4333 * ARP entries for all the other router ports connected to
4334 * the switch in question. */
4335
4336 const char *peer_name = smap_get(&op->nbsp->options,
4337 "router-port");
4338 if (!peer_name) {
4339 continue;
4340 }
4341
4342 struct ovn_port *peer = ovn_port_find(ports, peer_name);
4343 if (!peer || !peer->nbrp) {
4344 continue;
4345 }
4346
4347 for (size_t i = 0; i < op->od->n_router_ports; i++) {
4348 const char *router_port_name = smap_get(
4349 &op->od->router_ports[i]->nbsp->options,
4350 "router-port");
4351 struct ovn_port *router_port = ovn_port_find(ports,
4352 router_port_name);
4353 if (!router_port || !router_port->nbrp) {
4354 continue;
4355 }
4356
4357 /* Skip the router port under consideration. */
4358 if (router_port == peer) {
4359 continue;
4360 }
4361
4362 if (router_port->lrp_networks.n_ipv4_addrs) {
4363 ds_clear(&match);
4364 ds_put_format(&match, "outport == %s && reg0 == ",
4365 peer->json_key);
4366 op_put_v4_networks(&match, router_port, false);
4367
4368 ds_clear(&actions);
4369 ds_put_format(&actions, "eth.dst = %s; next;",
4370 router_port->lrp_networks.ea_s);
4371 ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE,
4372 100, ds_cstr(&match), ds_cstr(&actions));
4373 }
4374
4375 if (router_port->lrp_networks.n_ipv6_addrs) {
4376 ds_clear(&match);
4377 ds_put_format(&match, "outport == %s && xxreg0 == ",
4378 peer->json_key);
4379 op_put_v6_networks(&match, router_port);
4380
4381 ds_clear(&actions);
4382 ds_put_format(&actions, "eth.dst = %s; next;",
4383 router_port->lrp_networks.ea_s);
4384 ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE,
4385 100, ds_cstr(&match), ds_cstr(&actions));
4386 }
4387 }
4388 }
4389 }
4390
4391 HMAP_FOR_EACH (od, key_node, datapaths) {
4392 if (!od->nbr) {
4393 continue;
4394 }
4395
4396 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip4",
4397 "get_arp(outport, reg0); next;");
4398
4399 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip6",
4400 "get_nd(outport, xxreg0); next;");
4401 }
4402
4403 /* Local router ingress table 7: ARP request.
4404 *
4405 * In the common case where the Ethernet destination has been resolved,
4406 * this table outputs the packet (priority 0). Otherwise, it composes
4407 * and sends an ARP request (priority 100). */
4408 HMAP_FOR_EACH (od, key_node, datapaths) {
4409 if (!od->nbr) {
4410 continue;
4411 }
4412
4413 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100,
4414 "eth.dst == 00:00:00:00:00:00",
4415 "arp { "
4416 "eth.dst = ff:ff:ff:ff:ff:ff; "
4417 "arp.spa = reg1; "
4418 "arp.tpa = reg0; "
4419 "arp.op = 1; " /* ARP request */
4420 "output; "
4421 "};");
4422 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1", "output;");
4423 }
4424
4425 /* Logical router egress table 1: Delivery (priority 100).
4426 *
4427 * Priority 100 rules deliver packets to enabled logical ports. */
4428 HMAP_FOR_EACH (op, key_node, ports) {
4429 if (!op->nbrp) {
4430 continue;
4431 }
4432
4433 if (!lrport_is_enabled(op->nbrp)) {
4434 /* Drop packets to disabled logical ports (since logical flow
4435 * tables are default-drop). */
4436 continue;
4437 }
4438
4439 ds_clear(&match);
4440 ds_put_format(&match, "outport == %s", op->json_key);
4441 ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100,
4442 ds_cstr(&match), "output;");
4443 }
4444
4445 ds_destroy(&match);
4446 ds_destroy(&actions);
4447 }
4448
4449 /* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
4450 * constructing their contents based on the OVN_NB database. */
4451 static void
4452 build_lflows(struct northd_context *ctx, struct hmap *datapaths,
4453 struct hmap *ports)
4454 {
4455 struct hmap lflows = HMAP_INITIALIZER(&lflows);
4456 struct hmap mcgroups = HMAP_INITIALIZER(&mcgroups);
4457
4458 build_lswitch_flows(datapaths, ports, &lflows, &mcgroups);
4459 build_lrouter_flows(datapaths, ports, &lflows);
4460
4461 /* Push changes to the Logical_Flow table to database. */
4462 const struct sbrec_logical_flow *sbflow, *next_sbflow;
4463 SBREC_LOGICAL_FLOW_FOR_EACH_SAFE (sbflow, next_sbflow, ctx->ovnsb_idl) {
4464 struct ovn_datapath *od
4465 = ovn_datapath_from_sbrec(datapaths, sbflow->logical_datapath);
4466 if (!od) {
4467 sbrec_logical_flow_delete(sbflow);
4468 continue;
4469 }
4470
4471 enum ovn_datapath_type dp_type = od->nbs ? DP_SWITCH : DP_ROUTER;
4472 enum ovn_pipeline pipeline
4473 = !strcmp(sbflow->pipeline, "ingress") ? P_IN : P_OUT;
4474 struct ovn_lflow *lflow = ovn_lflow_find(
4475 &lflows, od, ovn_stage_build(dp_type, pipeline, sbflow->table_id),
4476 sbflow->priority, sbflow->match, sbflow->actions);
4477 if (lflow) {
4478 ovn_lflow_destroy(&lflows, lflow);
4479 } else {
4480 sbrec_logical_flow_delete(sbflow);
4481 }
4482 }
4483 struct ovn_lflow *lflow, *next_lflow;
4484 HMAP_FOR_EACH_SAFE (lflow, next_lflow, hmap_node, &lflows) {
4485 enum ovn_pipeline pipeline = ovn_stage_get_pipeline(lflow->stage);
4486 uint8_t table = ovn_stage_get_table(lflow->stage);
4487
4488 sbflow = sbrec_logical_flow_insert(ctx->ovnsb_txn);
4489 sbrec_logical_flow_set_logical_datapath(sbflow, lflow->od->sb);
4490 sbrec_logical_flow_set_pipeline(
4491 sbflow, pipeline == P_IN ? "ingress" : "egress");
4492 sbrec_logical_flow_set_table_id(sbflow, table);
4493 sbrec_logical_flow_set_priority(sbflow, lflow->priority);
4494 sbrec_logical_flow_set_match(sbflow, lflow->match);
4495 sbrec_logical_flow_set_actions(sbflow, lflow->actions);
4496
4497 /* Trim the source locator lflow->where, which looks something like
4498 * "ovn/northd/ovn-northd.c:1234", down to just the part following the
4499 * last slash, e.g. "ovn-northd.c:1234". */
4500 const char *slash = strrchr(lflow->where, '/');
4501 #if _WIN32
4502 const char *backslash = strrchr(lflow->where, '\\');
4503 if (!slash || backslash > slash) {
4504 slash = backslash;
4505 }
4506 #endif
4507 const char *where = slash ? slash + 1 : lflow->where;
4508
4509 const struct smap ids = SMAP_CONST2(
4510 &ids,
4511 "stage-name", ovn_stage_to_str(lflow->stage),
4512 "source", where);
4513 sbrec_logical_flow_set_external_ids(sbflow, &ids);
4514
4515 ovn_lflow_destroy(&lflows, lflow);
4516 }
4517 hmap_destroy(&lflows);
4518
4519 /* Push changes to the Multicast_Group table to database. */
4520 const struct sbrec_multicast_group *sbmc, *next_sbmc;
4521 SBREC_MULTICAST_GROUP_FOR_EACH_SAFE (sbmc, next_sbmc, ctx->ovnsb_idl) {
4522 struct ovn_datapath *od = ovn_datapath_from_sbrec(datapaths,
4523 sbmc->datapath);
4524 if (!od) {
4525 sbrec_multicast_group_delete(sbmc);
4526 continue;
4527 }
4528
4529 struct multicast_group group = { .name = sbmc->name,
4530 .key = sbmc->tunnel_key };
4531 struct ovn_multicast *mc = ovn_multicast_find(&mcgroups, od, &group);
4532 if (mc) {
4533 ovn_multicast_update_sbrec(mc, sbmc);
4534 ovn_multicast_destroy(&mcgroups, mc);
4535 } else {
4536 sbrec_multicast_group_delete(sbmc);
4537 }
4538 }
4539 struct ovn_multicast *mc, *next_mc;
4540 HMAP_FOR_EACH_SAFE (mc, next_mc, hmap_node, &mcgroups) {
4541 sbmc = sbrec_multicast_group_insert(ctx->ovnsb_txn);
4542 sbrec_multicast_group_set_datapath(sbmc, mc->datapath->sb);
4543 sbrec_multicast_group_set_name(sbmc, mc->group->name);
4544 sbrec_multicast_group_set_tunnel_key(sbmc, mc->group->key);
4545 ovn_multicast_update_sbrec(mc, sbmc);
4546 ovn_multicast_destroy(&mcgroups, mc);
4547 }
4548 hmap_destroy(&mcgroups);
4549 }
4550
4551 /* OVN_Northbound and OVN_Southbound have an identical Address_Set table.
4552 * We always update OVN_Southbound to match the current data in
4553 * OVN_Northbound, so that the address sets used in Logical_Flows in
4554 * OVN_Southbound is checked against the proper set.*/
4555 static void
4556 sync_address_sets(struct northd_context *ctx)
4557 {
4558 struct shash sb_address_sets = SHASH_INITIALIZER(&sb_address_sets);
4559
4560 const struct sbrec_address_set *sb_address_set;
4561 SBREC_ADDRESS_SET_FOR_EACH (sb_address_set, ctx->ovnsb_idl) {
4562 shash_add(&sb_address_sets, sb_address_set->name, sb_address_set);
4563 }
4564
4565 const struct nbrec_address_set *nb_address_set;
4566 NBREC_ADDRESS_SET_FOR_EACH (nb_address_set, ctx->ovnnb_idl) {
4567 sb_address_set = shash_find_and_delete(&sb_address_sets,
4568 nb_address_set->name);
4569 if (!sb_address_set) {
4570 sb_address_set = sbrec_address_set_insert(ctx->ovnsb_txn);
4571 sbrec_address_set_set_name(sb_address_set, nb_address_set->name);
4572 }
4573
4574 sbrec_address_set_set_addresses(sb_address_set,
4575 /* "char **" is not compatible with "const char **" */
4576 (const char **) nb_address_set->addresses,
4577 nb_address_set->n_addresses);
4578 }
4579
4580 struct shash_node *node, *next;
4581 SHASH_FOR_EACH_SAFE (node, next, &sb_address_sets) {
4582 sbrec_address_set_delete(node->data);
4583 shash_delete(&sb_address_sets, node);
4584 }
4585 shash_destroy(&sb_address_sets);
4586 }
4587 \f
4588 static void
4589 ovnnb_db_run(struct northd_context *ctx, struct ovsdb_idl_loop *sb_loop)
4590 {
4591 if (!ctx->ovnsb_txn || !ctx->ovnnb_txn) {
4592 return;
4593 }
4594 struct hmap datapaths, ports;
4595 build_datapaths(ctx, &datapaths);
4596 build_ports(ctx, &datapaths, &ports);
4597 build_ipam(&datapaths, &ports);
4598 build_lflows(ctx, &datapaths, &ports);
4599
4600 sync_address_sets(ctx);
4601
4602 struct ovn_datapath *dp, *next_dp;
4603 HMAP_FOR_EACH_SAFE (dp, next_dp, key_node, &datapaths) {
4604 ovn_datapath_destroy(&datapaths, dp);
4605 }
4606 hmap_destroy(&datapaths);
4607
4608 struct ovn_port *port, *next_port;
4609 HMAP_FOR_EACH_SAFE (port, next_port, key_node, &ports) {
4610 ovn_port_destroy(&ports, port);
4611 }
4612 hmap_destroy(&ports);
4613
4614 /* Copy nb_cfg from northbound to southbound database.
4615 *
4616 * Also set up to update sb_cfg once our southbound transaction commits. */
4617 const struct nbrec_nb_global *nb = nbrec_nb_global_first(ctx->ovnnb_idl);
4618 if (!nb) {
4619 nb = nbrec_nb_global_insert(ctx->ovnnb_txn);
4620 }
4621 const struct sbrec_sb_global *sb = sbrec_sb_global_first(ctx->ovnsb_idl);
4622 if (!sb) {
4623 sb = sbrec_sb_global_insert(ctx->ovnsb_txn);
4624 }
4625 sbrec_sb_global_set_nb_cfg(sb, nb->nb_cfg);
4626 sb_loop->next_cfg = nb->nb_cfg;
4627
4628 cleanup_macam(&macam);
4629 }
4630
4631 /* Handle changes to the 'chassis' column of the 'Port_Binding' table. When
4632 * this column is not empty, it means we need to set the corresponding logical
4633 * port as 'up' in the northbound DB. */
4634 static void
4635 update_logical_port_status(struct northd_context *ctx)
4636 {
4637 struct hmap lports_hmap;
4638 const struct sbrec_port_binding *sb;
4639 const struct nbrec_logical_switch_port *nbsp;
4640
4641 struct lport_hash_node {
4642 struct hmap_node node;
4643 const struct nbrec_logical_switch_port *nbsp;
4644 } *hash_node;
4645
4646 hmap_init(&lports_hmap);
4647
4648 NBREC_LOGICAL_SWITCH_PORT_FOR_EACH(nbsp, ctx->ovnnb_idl) {
4649 hash_node = xzalloc(sizeof *hash_node);
4650 hash_node->nbsp = nbsp;
4651 hmap_insert(&lports_hmap, &hash_node->node, hash_string(nbsp->name, 0));
4652 }
4653
4654 SBREC_PORT_BINDING_FOR_EACH(sb, ctx->ovnsb_idl) {
4655 nbsp = NULL;
4656 HMAP_FOR_EACH_WITH_HASH(hash_node, node,
4657 hash_string(sb->logical_port, 0),
4658 &lports_hmap) {
4659 if (!strcmp(sb->logical_port, hash_node->nbsp->name)) {
4660 nbsp = hash_node->nbsp;
4661 break;
4662 }
4663 }
4664
4665 if (!nbsp) {
4666 /* The logical port doesn't exist for this port binding. This can
4667 * happen under normal circumstances when ovn-northd hasn't gotten
4668 * around to pruning the Port_Binding yet. */
4669 continue;
4670 }
4671
4672 if (sb->chassis && (!nbsp->up || !*nbsp->up)) {
4673 bool up = true;
4674 nbrec_logical_switch_port_set_up(nbsp, &up, 1);
4675 } else if (!sb->chassis && (!nbsp->up || *nbsp->up)) {
4676 bool up = false;
4677 nbrec_logical_switch_port_set_up(nbsp, &up, 1);
4678 }
4679 }
4680
4681 HMAP_FOR_EACH_POP(hash_node, node, &lports_hmap) {
4682 free(hash_node);
4683 }
4684 hmap_destroy(&lports_hmap);
4685 }
4686
4687 static struct dhcp_opts_map supported_dhcp_opts[] = {
4688 OFFERIP,
4689 DHCP_OPT_NETMASK,
4690 DHCP_OPT_ROUTER,
4691 DHCP_OPT_DNS_SERVER,
4692 DHCP_OPT_LOG_SERVER,
4693 DHCP_OPT_LPR_SERVER,
4694 DHCP_OPT_SWAP_SERVER,
4695 DHCP_OPT_POLICY_FILTER,
4696 DHCP_OPT_ROUTER_SOLICITATION,
4697 DHCP_OPT_NIS_SERVER,
4698 DHCP_OPT_NTP_SERVER,
4699 DHCP_OPT_SERVER_ID,
4700 DHCP_OPT_TFTP_SERVER,
4701 DHCP_OPT_CLASSLESS_STATIC_ROUTE,
4702 DHCP_OPT_MS_CLASSLESS_STATIC_ROUTE,
4703 DHCP_OPT_IP_FORWARD_ENABLE,
4704 DHCP_OPT_ROUTER_DISCOVERY,
4705 DHCP_OPT_ETHERNET_ENCAP,
4706 DHCP_OPT_DEFAULT_TTL,
4707 DHCP_OPT_TCP_TTL,
4708 DHCP_OPT_MTU,
4709 DHCP_OPT_LEASE_TIME,
4710 DHCP_OPT_T1,
4711 DHCP_OPT_T2
4712 };
4713
4714 static struct dhcp_opts_map supported_dhcpv6_opts[] = {
4715 DHCPV6_OPT_IA_ADDR,
4716 DHCPV6_OPT_SERVER_ID,
4717 DHCPV6_OPT_DOMAIN_SEARCH,
4718 DHCPV6_OPT_DNS_SERVER
4719 };
4720
4721 static void
4722 check_and_add_supported_dhcp_opts_to_sb_db(struct northd_context *ctx)
4723 {
4724 struct hmap dhcp_opts_to_add = HMAP_INITIALIZER(&dhcp_opts_to_add);
4725 for (size_t i = 0; (i < sizeof(supported_dhcp_opts) /
4726 sizeof(supported_dhcp_opts[0])); i++) {
4727 hmap_insert(&dhcp_opts_to_add, &supported_dhcp_opts[i].hmap_node,
4728 dhcp_opt_hash(supported_dhcp_opts[i].name));
4729 }
4730
4731 const struct sbrec_dhcp_options *opt_row, *opt_row_next;
4732 SBREC_DHCP_OPTIONS_FOR_EACH_SAFE(opt_row, opt_row_next, ctx->ovnsb_idl) {
4733 struct dhcp_opts_map *dhcp_opt =
4734 dhcp_opts_find(&dhcp_opts_to_add, opt_row->name);
4735 if (dhcp_opt) {
4736 hmap_remove(&dhcp_opts_to_add, &dhcp_opt->hmap_node);
4737 } else {
4738 sbrec_dhcp_options_delete(opt_row);
4739 }
4740 }
4741
4742 struct dhcp_opts_map *opt;
4743 HMAP_FOR_EACH (opt, hmap_node, &dhcp_opts_to_add) {
4744 struct sbrec_dhcp_options *sbrec_dhcp_option =
4745 sbrec_dhcp_options_insert(ctx->ovnsb_txn);
4746 sbrec_dhcp_options_set_name(sbrec_dhcp_option, opt->name);
4747 sbrec_dhcp_options_set_code(sbrec_dhcp_option, opt->code);
4748 sbrec_dhcp_options_set_type(sbrec_dhcp_option, opt->type);
4749 }
4750
4751 hmap_destroy(&dhcp_opts_to_add);
4752 }
4753
4754 static void
4755 check_and_add_supported_dhcpv6_opts_to_sb_db(struct northd_context *ctx)
4756 {
4757 struct hmap dhcpv6_opts_to_add = HMAP_INITIALIZER(&dhcpv6_opts_to_add);
4758 for (size_t i = 0; (i < sizeof(supported_dhcpv6_opts) /
4759 sizeof(supported_dhcpv6_opts[0])); i++) {
4760 hmap_insert(&dhcpv6_opts_to_add, &supported_dhcpv6_opts[i].hmap_node,
4761 dhcp_opt_hash(supported_dhcpv6_opts[i].name));
4762 }
4763
4764 const struct sbrec_dhcpv6_options *opt_row, *opt_row_next;
4765 SBREC_DHCPV6_OPTIONS_FOR_EACH_SAFE(opt_row, opt_row_next, ctx->ovnsb_idl) {
4766 struct dhcp_opts_map *dhcp_opt =
4767 dhcp_opts_find(&dhcpv6_opts_to_add, opt_row->name);
4768 if (dhcp_opt) {
4769 hmap_remove(&dhcpv6_opts_to_add, &dhcp_opt->hmap_node);
4770 } else {
4771 sbrec_dhcpv6_options_delete(opt_row);
4772 }
4773 }
4774
4775 struct dhcp_opts_map *opt;
4776 HMAP_FOR_EACH(opt, hmap_node, &dhcpv6_opts_to_add) {
4777 struct sbrec_dhcpv6_options *sbrec_dhcpv6_option =
4778 sbrec_dhcpv6_options_insert(ctx->ovnsb_txn);
4779 sbrec_dhcpv6_options_set_name(sbrec_dhcpv6_option, opt->name);
4780 sbrec_dhcpv6_options_set_code(sbrec_dhcpv6_option, opt->code);
4781 sbrec_dhcpv6_options_set_type(sbrec_dhcpv6_option, opt->type);
4782 }
4783
4784 hmap_destroy(&dhcpv6_opts_to_add);
4785 }
4786
4787 /* Updates the sb_cfg and hv_cfg columns in the northbound NB_Global table. */
4788 static void
4789 update_northbound_cfg(struct northd_context *ctx,
4790 struct ovsdb_idl_loop *sb_loop)
4791 {
4792 /* Update northbound sb_cfg if appropriate. */
4793 const struct nbrec_nb_global *nbg = nbrec_nb_global_first(ctx->ovnnb_idl);
4794 int64_t sb_cfg = sb_loop->cur_cfg;
4795 if (nbg && sb_cfg && nbg->sb_cfg != sb_cfg) {
4796 nbrec_nb_global_set_sb_cfg(nbg, sb_cfg);
4797 }
4798
4799 /* Update northbound hv_cfg if appropriate. */
4800 if (nbg) {
4801 /* Find minimum nb_cfg among all chassis. */
4802 const struct sbrec_chassis *chassis;
4803 int64_t hv_cfg = nbg->nb_cfg;
4804 SBREC_CHASSIS_FOR_EACH (chassis, ctx->ovnsb_idl) {
4805 if (chassis->nb_cfg < hv_cfg) {
4806 hv_cfg = chassis->nb_cfg;
4807 }
4808 }
4809
4810 /* Update hv_cfg. */
4811 if (nbg->hv_cfg != hv_cfg) {
4812 nbrec_nb_global_set_hv_cfg(nbg, hv_cfg);
4813 }
4814 }
4815 }
4816
4817 /* Handle a fairly small set of changes in the southbound database. */
4818 static void
4819 ovnsb_db_run(struct northd_context *ctx, struct ovsdb_idl_loop *sb_loop)
4820 {
4821 if (!ctx->ovnnb_txn || !ovsdb_idl_has_ever_connected(ctx->ovnsb_idl)) {
4822 return;
4823 }
4824
4825 update_logical_port_status(ctx);
4826 update_northbound_cfg(ctx, sb_loop);
4827 }
4828 \f
4829 static void
4830 parse_options(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
4831 {
4832 enum {
4833 DAEMON_OPTION_ENUMS,
4834 VLOG_OPTION_ENUMS,
4835 SSL_OPTION_ENUMS,
4836 };
4837 static const struct option long_options[] = {
4838 {"ovnsb-db", required_argument, NULL, 'd'},
4839 {"ovnnb-db", required_argument, NULL, 'D'},
4840 {"help", no_argument, NULL, 'h'},
4841 {"options", no_argument, NULL, 'o'},
4842 {"version", no_argument, NULL, 'V'},
4843 DAEMON_LONG_OPTIONS,
4844 VLOG_LONG_OPTIONS,
4845 STREAM_SSL_LONG_OPTIONS,
4846 {NULL, 0, NULL, 0},
4847 };
4848 char *short_options = ovs_cmdl_long_options_to_short_options(long_options);
4849
4850 for (;;) {
4851 int c;
4852
4853 c = getopt_long(argc, argv, short_options, long_options, NULL);
4854 if (c == -1) {
4855 break;
4856 }
4857
4858 switch (c) {
4859 DAEMON_OPTION_HANDLERS;
4860 VLOG_OPTION_HANDLERS;
4861 STREAM_SSL_OPTION_HANDLERS;
4862
4863 case 'd':
4864 ovnsb_db = optarg;
4865 break;
4866
4867 case 'D':
4868 ovnnb_db = optarg;
4869 break;
4870
4871 case 'h':
4872 usage();
4873 exit(EXIT_SUCCESS);
4874
4875 case 'o':
4876 ovs_cmdl_print_options(long_options);
4877 exit(EXIT_SUCCESS);
4878
4879 case 'V':
4880 ovs_print_version(0, 0);
4881 exit(EXIT_SUCCESS);
4882
4883 default:
4884 break;
4885 }
4886 }
4887
4888 if (!ovnsb_db) {
4889 ovnsb_db = default_sb_db();
4890 }
4891
4892 if (!ovnnb_db) {
4893 ovnnb_db = default_nb_db();
4894 }
4895
4896 free(short_options);
4897 }
4898
4899 static void
4900 add_column_noalert(struct ovsdb_idl *idl,
4901 const struct ovsdb_idl_column *column)
4902 {
4903 ovsdb_idl_add_column(idl, column);
4904 ovsdb_idl_omit_alert(idl, column);
4905 }
4906
4907 int
4908 main(int argc, char *argv[])
4909 {
4910 int res = EXIT_SUCCESS;
4911 struct unixctl_server *unixctl;
4912 int retval;
4913 bool exiting;
4914
4915 fatal_ignore_sigpipe();
4916 ovs_cmdl_proctitle_init(argc, argv);
4917 set_program_name(argv[0]);
4918 service_start(&argc, &argv);
4919 parse_options(argc, argv);
4920
4921 daemonize_start(false);
4922
4923 retval = unixctl_server_create(NULL, &unixctl);
4924 if (retval) {
4925 exit(EXIT_FAILURE);
4926 }
4927 unixctl_command_register("exit", "", 0, 0, ovn_northd_exit, &exiting);
4928
4929 daemonize_complete();
4930
4931 /* We want to detect (almost) all changes to the ovn-nb db. */
4932 struct ovsdb_idl_loop ovnnb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
4933 ovsdb_idl_create(ovnnb_db, &nbrec_idl_class, true, true));
4934 ovsdb_idl_omit_alert(ovnnb_idl_loop.idl, &nbrec_nb_global_col_sb_cfg);
4935 ovsdb_idl_omit_alert(ovnnb_idl_loop.idl, &nbrec_nb_global_col_hv_cfg);
4936
4937 /* We want to detect only selected changes to the ovn-sb db. */
4938 struct ovsdb_idl_loop ovnsb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
4939 ovsdb_idl_create(ovnsb_db, &sbrec_idl_class, false, true));
4940
4941 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_sb_global);
4942 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_sb_global_col_nb_cfg);
4943
4944 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_logical_flow);
4945 add_column_noalert(ovnsb_idl_loop.idl,
4946 &sbrec_logical_flow_col_logical_datapath);
4947 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_pipeline);
4948 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_table_id);
4949 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_priority);
4950 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_match);
4951 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_actions);
4952
4953 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_multicast_group);
4954 add_column_noalert(ovnsb_idl_loop.idl,
4955 &sbrec_multicast_group_col_datapath);
4956 add_column_noalert(ovnsb_idl_loop.idl,
4957 &sbrec_multicast_group_col_tunnel_key);
4958 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_name);
4959 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_ports);
4960
4961 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_datapath_binding);
4962 add_column_noalert(ovnsb_idl_loop.idl,
4963 &sbrec_datapath_binding_col_tunnel_key);
4964 add_column_noalert(ovnsb_idl_loop.idl,
4965 &sbrec_datapath_binding_col_external_ids);
4966
4967 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_port_binding);
4968 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_datapath);
4969 add_column_noalert(ovnsb_idl_loop.idl,
4970 &sbrec_port_binding_col_logical_port);
4971 add_column_noalert(ovnsb_idl_loop.idl,
4972 &sbrec_port_binding_col_tunnel_key);
4973 add_column_noalert(ovnsb_idl_loop.idl,
4974 &sbrec_port_binding_col_parent_port);
4975 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_tag);
4976 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_type);
4977 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_options);
4978 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_mac);
4979 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_port_binding_col_chassis);
4980 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_mac_binding);
4981 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_datapath);
4982 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_ip);
4983 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_mac);
4984 add_column_noalert(ovnsb_idl_loop.idl,
4985 &sbrec_mac_binding_col_logical_port);
4986 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_dhcp_options);
4987 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_code);
4988 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_type);
4989 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_name);
4990 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_dhcpv6_options);
4991 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_code);
4992 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_type);
4993 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_name);
4994 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_address_set);
4995 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_name);
4996 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_addresses);
4997
4998 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_chassis);
4999 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_nb_cfg);
5000
5001 /* Main loop. */
5002 exiting = false;
5003 while (!exiting) {
5004 struct northd_context ctx = {
5005 .ovnnb_idl = ovnnb_idl_loop.idl,
5006 .ovnnb_txn = ovsdb_idl_loop_run(&ovnnb_idl_loop),
5007 .ovnsb_idl = ovnsb_idl_loop.idl,
5008 .ovnsb_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop),
5009 };
5010
5011 ovnnb_db_run(&ctx, &ovnsb_idl_loop);
5012 ovnsb_db_run(&ctx, &ovnsb_idl_loop);
5013 if (ctx.ovnsb_txn) {
5014 check_and_add_supported_dhcp_opts_to_sb_db(&ctx);
5015 check_and_add_supported_dhcpv6_opts_to_sb_db(&ctx);
5016 }
5017
5018 unixctl_server_run(unixctl);
5019 unixctl_server_wait(unixctl);
5020 if (exiting) {
5021 poll_immediate_wake();
5022 }
5023 ovsdb_idl_loop_commit_and_wait(&ovnnb_idl_loop);
5024 ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);
5025
5026 poll_block();
5027 if (should_service_stop()) {
5028 exiting = true;
5029 }
5030 }
5031
5032 unixctl_server_destroy(unixctl);
5033 ovsdb_idl_loop_destroy(&ovnnb_idl_loop);
5034 ovsdb_idl_loop_destroy(&ovnsb_idl_loop);
5035 service_stop();
5036
5037 exit(res);
5038 }
5039
5040 static void
5041 ovn_northd_exit(struct unixctl_conn *conn, int argc OVS_UNUSED,
5042 const char *argv[] OVS_UNUSED, void *exiting_)
5043 {
5044 bool *exiting = exiting_;
5045 *exiting = true;
5046
5047 unixctl_command_reply(conn, NULL);
5048 }