]> git.proxmox.com Git - mirror_ovs.git/blob - ovn/northd/ovn-northd.c
OVN: add mac address only support to IPAM/MACAM
[mirror_ovs.git] / ovn / northd / ovn-northd.c
1 /*
2 * Licensed under the Apache License, Version 2.0 (the "License");
3 * you may not use this file except in compliance with the License.
4 * You may obtain a copy of the License at:
5 *
6 * http://www.apache.org/licenses/LICENSE-2.0
7 *
8 * Unless required by applicable law or agreed to in writing, software
9 * distributed under the License is distributed on an "AS IS" BASIS,
10 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 * See the License for the specific language governing permissions and
12 * limitations under the License.
13 */
14
15 #include <config.h>
16
17 #include <getopt.h>
18 #include <stdlib.h>
19 #include <stdio.h>
20
21 #include "bitmap.h"
22 #include "command-line.h"
23 #include "daemon.h"
24 #include "dirs.h"
25 #include "openvswitch/dynamic-string.h"
26 #include "fatal-signal.h"
27 #include "hash.h"
28 #include "openvswitch/hmap.h"
29 #include "openvswitch/json.h"
30 #include "ovn/lex.h"
31 #include "ovn/lib/chassis-index.h"
32 #include "ovn/lib/logical-fields.h"
33 #include "ovn/lib/ovn-l7.h"
34 #include "ovn/lib/ovn-nb-idl.h"
35 #include "ovn/lib/ovn-sb-idl.h"
36 #include "ovn/lib/ovn-util.h"
37 #include "ovn/actions.h"
38 #include "packets.h"
39 #include "openvswitch/poll-loop.h"
40 #include "smap.h"
41 #include "sset.h"
42 #include "svec.h"
43 #include "stream.h"
44 #include "stream-ssl.h"
45 #include "unixctl.h"
46 #include "util.h"
47 #include "uuid.h"
48 #include "openvswitch/vlog.h"
49
50 VLOG_DEFINE_THIS_MODULE(ovn_northd);
51
52 static unixctl_cb_func ovn_northd_exit;
53
54 struct northd_context {
55 struct ovsdb_idl *ovnnb_idl;
56 struct ovsdb_idl *ovnsb_idl;
57 struct ovsdb_idl_txn *ovnnb_txn;
58 struct ovsdb_idl_txn *ovnsb_txn;
59 };
60
61 static const char *ovnnb_db;
62 static const char *ovnsb_db;
63 static const char *unixctl_path;
64
65 #define MAC_ADDR_PREFIX 0x0A0000000000ULL
66 #define MAC_ADDR_SPACE 0xffffff
67
68 /* MAC address management (macam) table of "struct eth_addr"s, that holds the
69 * MAC addresses allocated by the OVN ipam module. */
70 static struct hmap macam = HMAP_INITIALIZER(&macam);
71 static struct eth_addr mac_prefix;
72
73 #define MAX_OVN_TAGS 4096
74 \f
75 /* Pipeline stages. */
76
77 /* The two pipelines in an OVN logical flow table. */
78 enum ovn_pipeline {
79 P_IN, /* Ingress pipeline. */
80 P_OUT /* Egress pipeline. */
81 };
82
83 /* The two purposes for which ovn-northd uses OVN logical datapaths. */
84 enum ovn_datapath_type {
85 DP_SWITCH, /* OVN logical switch. */
86 DP_ROUTER /* OVN logical router. */
87 };
88
89 /* Returns an "enum ovn_stage" built from the arguments.
90 *
91 * (It's better to use ovn_stage_build() for type-safety reasons, but inline
92 * functions can't be used in enums or switch cases.) */
93 #define OVN_STAGE_BUILD(DP_TYPE, PIPELINE, TABLE) \
94 (((DP_TYPE) << 9) | ((PIPELINE) << 8) | (TABLE))
95
96 /* A stage within an OVN logical switch or router.
97 *
98 * An "enum ovn_stage" indicates whether the stage is part of a logical switch
99 * or router, whether the stage is part of the ingress or egress pipeline, and
100 * the table within that pipeline. The first three components are combined to
101 * form the stage's full name, e.g. S_SWITCH_IN_PORT_SEC_L2,
102 * S_ROUTER_OUT_DELIVERY. */
103 enum ovn_stage {
104 #define PIPELINE_STAGES \
105 /* Logical switch ingress stages. */ \
106 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_L2, 0, "ls_in_port_sec_l2") \
107 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_IP, 1, "ls_in_port_sec_ip") \
108 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_ND, 2, "ls_in_port_sec_nd") \
109 PIPELINE_STAGE(SWITCH, IN, PRE_ACL, 3, "ls_in_pre_acl") \
110 PIPELINE_STAGE(SWITCH, IN, PRE_LB, 4, "ls_in_pre_lb") \
111 PIPELINE_STAGE(SWITCH, IN, PRE_STATEFUL, 5, "ls_in_pre_stateful") \
112 PIPELINE_STAGE(SWITCH, IN, ACL, 6, "ls_in_acl") \
113 PIPELINE_STAGE(SWITCH, IN, QOS_MARK, 7, "ls_in_qos_mark") \
114 PIPELINE_STAGE(SWITCH, IN, QOS_METER, 8, "ls_in_qos_meter") \
115 PIPELINE_STAGE(SWITCH, IN, LB, 9, "ls_in_lb") \
116 PIPELINE_STAGE(SWITCH, IN, STATEFUL, 10, "ls_in_stateful") \
117 PIPELINE_STAGE(SWITCH, IN, ARP_ND_RSP, 11, "ls_in_arp_rsp") \
118 PIPELINE_STAGE(SWITCH, IN, DHCP_OPTIONS, 12, "ls_in_dhcp_options") \
119 PIPELINE_STAGE(SWITCH, IN, DHCP_RESPONSE, 13, "ls_in_dhcp_response") \
120 PIPELINE_STAGE(SWITCH, IN, DNS_LOOKUP, 14, "ls_in_dns_lookup") \
121 PIPELINE_STAGE(SWITCH, IN, DNS_RESPONSE, 15, "ls_in_dns_response") \
122 PIPELINE_STAGE(SWITCH, IN, L2_LKUP, 16, "ls_in_l2_lkup") \
123 \
124 /* Logical switch egress stages. */ \
125 PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 0, "ls_out_pre_lb") \
126 PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 1, "ls_out_pre_acl") \
127 PIPELINE_STAGE(SWITCH, OUT, PRE_STATEFUL, 2, "ls_out_pre_stateful") \
128 PIPELINE_STAGE(SWITCH, OUT, LB, 3, "ls_out_lb") \
129 PIPELINE_STAGE(SWITCH, OUT, ACL, 4, "ls_out_acl") \
130 PIPELINE_STAGE(SWITCH, OUT, QOS_MARK, 5, "ls_out_qos_mark") \
131 PIPELINE_STAGE(SWITCH, OUT, QOS_METER, 6, "ls_out_qos_meter") \
132 PIPELINE_STAGE(SWITCH, OUT, STATEFUL, 7, "ls_out_stateful") \
133 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_IP, 8, "ls_out_port_sec_ip") \
134 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_L2, 9, "ls_out_port_sec_l2") \
135 \
136 /* Logical router ingress stages. */ \
137 PIPELINE_STAGE(ROUTER, IN, ADMISSION, 0, "lr_in_admission") \
138 PIPELINE_STAGE(ROUTER, IN, IP_INPUT, 1, "lr_in_ip_input") \
139 PIPELINE_STAGE(ROUTER, IN, DEFRAG, 2, "lr_in_defrag") \
140 PIPELINE_STAGE(ROUTER, IN, UNSNAT, 3, "lr_in_unsnat") \
141 PIPELINE_STAGE(ROUTER, IN, DNAT, 4, "lr_in_dnat") \
142 PIPELINE_STAGE(ROUTER, IN, ND_RA_OPTIONS, 5, "lr_in_nd_ra_options") \
143 PIPELINE_STAGE(ROUTER, IN, ND_RA_RESPONSE, 6, "lr_in_nd_ra_response") \
144 PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 7, "lr_in_ip_routing") \
145 PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 8, "lr_in_arp_resolve") \
146 PIPELINE_STAGE(ROUTER, IN, GW_REDIRECT, 9, "lr_in_gw_redirect") \
147 PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 10, "lr_in_arp_request") \
148 \
149 /* Logical router egress stages. */ \
150 PIPELINE_STAGE(ROUTER, OUT, UNDNAT, 0, "lr_out_undnat") \
151 PIPELINE_STAGE(ROUTER, OUT, SNAT, 1, "lr_out_snat") \
152 PIPELINE_STAGE(ROUTER, OUT, EGR_LOOP, 2, "lr_out_egr_loop") \
153 PIPELINE_STAGE(ROUTER, OUT, DELIVERY, 3, "lr_out_delivery")
154
155 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
156 S_##DP_TYPE##_##PIPELINE##_##STAGE \
157 = OVN_STAGE_BUILD(DP_##DP_TYPE, P_##PIPELINE, TABLE),
158 PIPELINE_STAGES
159 #undef PIPELINE_STAGE
160 };
161
162 /* Due to various hard-coded priorities need to implement ACLs, the
163 * northbound database supports a smaller range of ACL priorities than
164 * are available to logical flows. This value is added to an ACL
165 * priority to determine the ACL's logical flow priority. */
166 #define OVN_ACL_PRI_OFFSET 1000
167
168 /* Register definitions specific to switches. */
169 #define REGBIT_CONNTRACK_DEFRAG "reg0[0]"
170 #define REGBIT_CONNTRACK_COMMIT "reg0[1]"
171 #define REGBIT_CONNTRACK_NAT "reg0[2]"
172 #define REGBIT_DHCP_OPTS_RESULT "reg0[3]"
173 #define REGBIT_DNS_LOOKUP_RESULT "reg0[4]"
174 #define REGBIT_ND_RA_OPTS_RESULT "reg0[5]"
175
176 /* Register definitions for switches and routers. */
177 #define REGBIT_NAT_REDIRECT "reg9[0]"
178 /* Indicate that this packet has been recirculated using egress
179 * loopback. This allows certain checks to be bypassed, such as a
180 * logical router dropping packets with source IP address equals
181 * one of the logical router's own IP addresses. */
182 #define REGBIT_EGRESS_LOOPBACK "reg9[1]"
183
184 /* Returns an "enum ovn_stage" built from the arguments. */
185 static enum ovn_stage
186 ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline,
187 uint8_t table)
188 {
189 return OVN_STAGE_BUILD(dp_type, pipeline, table);
190 }
191
192 /* Returns the pipeline to which 'stage' belongs. */
193 static enum ovn_pipeline
194 ovn_stage_get_pipeline(enum ovn_stage stage)
195 {
196 return (stage >> 8) & 1;
197 }
198
199 /* Returns the pipeline name to which 'stage' belongs. */
200 static const char *
201 ovn_stage_get_pipeline_name(enum ovn_stage stage)
202 {
203 return ovn_stage_get_pipeline(stage) == P_IN ? "ingress" : "egress";
204 }
205
206 /* Returns the table to which 'stage' belongs. */
207 static uint8_t
208 ovn_stage_get_table(enum ovn_stage stage)
209 {
210 return stage & 0xff;
211 }
212
213 /* Returns a string name for 'stage'. */
214 static const char *
215 ovn_stage_to_str(enum ovn_stage stage)
216 {
217 switch (stage) {
218 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
219 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return NAME;
220 PIPELINE_STAGES
221 #undef PIPELINE_STAGE
222 default: return "<unknown>";
223 }
224 }
225
226 /* Returns the type of the datapath to which a flow with the given 'stage' may
227 * be added. */
228 static enum ovn_datapath_type
229 ovn_stage_to_datapath_type(enum ovn_stage stage)
230 {
231 switch (stage) {
232 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
233 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return DP_##DP_TYPE;
234 PIPELINE_STAGES
235 #undef PIPELINE_STAGE
236 default: OVS_NOT_REACHED();
237 }
238 }
239 \f
240 static void
241 usage(void)
242 {
243 printf("\
244 %s: OVN northbound management daemon\n\
245 usage: %s [OPTIONS]\n\
246 \n\
247 Options:\n\
248 --ovnnb-db=DATABASE connect to ovn-nb database at DATABASE\n\
249 (default: %s)\n\
250 --ovnsb-db=DATABASE connect to ovn-sb database at DATABASE\n\
251 (default: %s)\n\
252 --unixctl=SOCKET override default control socket name\n\
253 -h, --help display this help message\n\
254 -o, --options list available options\n\
255 -V, --version display version information\n\
256 ", program_name, program_name, default_nb_db(), default_sb_db());
257 daemon_usage();
258 vlog_usage();
259 stream_usage("database", true, true, false);
260 }
261 \f
262 struct tnlid_node {
263 struct hmap_node hmap_node;
264 uint32_t tnlid;
265 };
266
267 static void
268 destroy_tnlids(struct hmap *tnlids)
269 {
270 struct tnlid_node *node;
271 HMAP_FOR_EACH_POP (node, hmap_node, tnlids) {
272 free(node);
273 }
274 hmap_destroy(tnlids);
275 }
276
277 static void
278 add_tnlid(struct hmap *set, uint32_t tnlid)
279 {
280 struct tnlid_node *node = xmalloc(sizeof *node);
281 hmap_insert(set, &node->hmap_node, hash_int(tnlid, 0));
282 node->tnlid = tnlid;
283 }
284
285 static bool
286 tnlid_in_use(const struct hmap *set, uint32_t tnlid)
287 {
288 const struct tnlid_node *node;
289 HMAP_FOR_EACH_IN_BUCKET (node, hmap_node, hash_int(tnlid, 0), set) {
290 if (node->tnlid == tnlid) {
291 return true;
292 }
293 }
294 return false;
295 }
296
297 static uint32_t
298 next_tnlid(uint32_t tnlid, uint32_t max)
299 {
300 return tnlid + 1 <= max ? tnlid + 1 : 1;
301 }
302
303 static uint32_t
304 allocate_tnlid(struct hmap *set, const char *name, uint32_t max,
305 uint32_t *hint)
306 {
307 for (uint32_t tnlid = next_tnlid(*hint, max); tnlid != *hint;
308 tnlid = next_tnlid(tnlid, max)) {
309 if (!tnlid_in_use(set, tnlid)) {
310 add_tnlid(set, tnlid);
311 *hint = tnlid;
312 return tnlid;
313 }
314 }
315
316 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
317 VLOG_WARN_RL(&rl, "all %s tunnel ids exhausted", name);
318 return 0;
319 }
320 \f
321 struct ovn_chassis_qdisc_queues {
322 struct hmap_node key_node;
323 uint32_t queue_id;
324 struct uuid chassis_uuid;
325 };
326
327 static uint32_t
328 hash_chassis_queue(const struct uuid *chassis_uuid, uint32_t queue_id)
329 {
330 return hash_2words(uuid_hash(chassis_uuid), queue_id);
331 }
332
333 static void
334 destroy_chassis_queues(struct hmap *set)
335 {
336 struct ovn_chassis_qdisc_queues *node;
337 HMAP_FOR_EACH_POP (node, key_node, set) {
338 free(node);
339 }
340 hmap_destroy(set);
341 }
342
343 static void
344 add_chassis_queue(struct hmap *set, struct uuid *chassis_uuid,
345 uint32_t queue_id)
346 {
347 struct ovn_chassis_qdisc_queues *node = xmalloc(sizeof *node);
348 node->queue_id = queue_id;
349 node->chassis_uuid = *chassis_uuid;
350 hmap_insert(set, &node->key_node,
351 hash_chassis_queue(chassis_uuid, queue_id));
352 }
353
354 static bool
355 chassis_queueid_in_use(const struct hmap *set, struct uuid *chassis_uuid,
356 uint32_t queue_id)
357 {
358 const struct ovn_chassis_qdisc_queues *node;
359 HMAP_FOR_EACH_WITH_HASH (node, key_node,
360 hash_chassis_queue(chassis_uuid, queue_id), set) {
361 if (uuid_equals(chassis_uuid, &node->chassis_uuid)
362 && node->queue_id == queue_id) {
363 return true;
364 }
365 }
366 return false;
367 }
368
369 static uint32_t
370 allocate_chassis_queueid(struct hmap *set, struct sbrec_chassis *chassis)
371 {
372 for (uint32_t queue_id = QDISC_MIN_QUEUE_ID + 1;
373 queue_id <= QDISC_MAX_QUEUE_ID;
374 queue_id++) {
375 if (!chassis_queueid_in_use(set, &chassis->header_.uuid, queue_id)) {
376 add_chassis_queue(set, &chassis->header_.uuid, queue_id);
377 return queue_id;
378 }
379 }
380
381 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
382 VLOG_WARN_RL(&rl, "all %s queue ids exhausted", chassis->name);
383 return 0;
384 }
385
386 static void
387 free_chassis_queueid(struct hmap *set, struct sbrec_chassis *chassis,
388 uint32_t queue_id)
389 {
390 const struct uuid *chassis_uuid = &chassis->header_.uuid;
391 struct ovn_chassis_qdisc_queues *node;
392 HMAP_FOR_EACH_WITH_HASH (node, key_node,
393 hash_chassis_queue(chassis_uuid, queue_id), set) {
394 if (uuid_equals(chassis_uuid, &node->chassis_uuid)
395 && node->queue_id == queue_id) {
396 hmap_remove(set, &node->key_node);
397 free(node);
398 break;
399 }
400 }
401 }
402
403 static inline bool
404 port_has_qos_params(const struct smap *opts)
405 {
406 return (smap_get(opts, "qos_max_rate") ||
407 smap_get(opts, "qos_burst"));
408 }
409 \f
410
411 struct ipam_info {
412 uint32_t start_ipv4;
413 size_t total_ipv4s;
414 unsigned long *allocated_ipv4s; /* A bitmap of allocated IPv4s */
415 bool ipv6_prefix_set;
416 struct in6_addr ipv6_prefix;
417 bool mac_only;
418 };
419
420 /* The 'key' comes from nbs->header_.uuid or nbr->header_.uuid or
421 * sb->external_ids:logical-switch. */
422 struct ovn_datapath {
423 struct hmap_node key_node; /* Index on 'key'. */
424 struct uuid key; /* (nbs/nbr)->header_.uuid. */
425
426 const struct nbrec_logical_switch *nbs; /* May be NULL. */
427 const struct nbrec_logical_router *nbr; /* May be NULL. */
428 const struct sbrec_datapath_binding *sb; /* May be NULL. */
429
430 struct ovs_list list; /* In list of similar records. */
431
432 /* Logical switch data. */
433 struct ovn_port **router_ports;
434 size_t n_router_ports;
435
436 struct hmap port_tnlids;
437 uint32_t port_key_hint;
438
439 bool has_unknown;
440
441 /* IPAM data. */
442 struct ipam_info ipam_info;
443
444 /* OVN northd only needs to know about the logical router gateway port for
445 * NAT on a distributed router. This "distributed gateway port" is
446 * populated only when there is a "redirect-chassis" specified for one of
447 * the ports on the logical router. Otherwise this will be NULL. */
448 struct ovn_port *l3dgw_port;
449 /* The "derived" OVN port representing the instance of l3dgw_port on
450 * the "redirect-chassis". */
451 struct ovn_port *l3redirect_port;
452 struct ovn_port *localnet_port;
453
454 /* Port groups related to the datapath, used only when nbs is NOT NULL. */
455 struct hmap nb_pgs;
456 };
457
458 struct macam_node {
459 struct hmap_node hmap_node;
460 struct eth_addr mac_addr; /* Allocated MAC address. */
461 };
462
463 static void
464 cleanup_macam(struct hmap *macam_)
465 {
466 struct macam_node *node;
467 HMAP_FOR_EACH_POP (node, hmap_node, macam_) {
468 free(node);
469 }
470 }
471
472 static struct ovn_datapath *
473 ovn_datapath_create(struct hmap *datapaths, const struct uuid *key,
474 const struct nbrec_logical_switch *nbs,
475 const struct nbrec_logical_router *nbr,
476 const struct sbrec_datapath_binding *sb)
477 {
478 struct ovn_datapath *od = xzalloc(sizeof *od);
479 od->key = *key;
480 od->sb = sb;
481 od->nbs = nbs;
482 od->nbr = nbr;
483 hmap_init(&od->port_tnlids);
484 hmap_init(&od->nb_pgs);
485 od->port_key_hint = 0;
486 hmap_insert(datapaths, &od->key_node, uuid_hash(&od->key));
487 return od;
488 }
489
490 static void ovn_ls_port_group_destroy(struct hmap *nb_pgs);
491
492 static void
493 ovn_datapath_destroy(struct hmap *datapaths, struct ovn_datapath *od)
494 {
495 if (od) {
496 /* Don't remove od->list. It is used within build_datapaths() as a
497 * private list and once we've exited that function it is not safe to
498 * use it. */
499 hmap_remove(datapaths, &od->key_node);
500 destroy_tnlids(&od->port_tnlids);
501 bitmap_free(od->ipam_info.allocated_ipv4s);
502 free(od->router_ports);
503 ovn_ls_port_group_destroy(&od->nb_pgs);
504 free(od);
505 }
506 }
507
508 /* Returns 'od''s datapath type. */
509 static enum ovn_datapath_type
510 ovn_datapath_get_type(const struct ovn_datapath *od)
511 {
512 return od->nbs ? DP_SWITCH : DP_ROUTER;
513 }
514
515 static struct ovn_datapath *
516 ovn_datapath_find(struct hmap *datapaths, const struct uuid *uuid)
517 {
518 struct ovn_datapath *od;
519
520 HMAP_FOR_EACH_WITH_HASH (od, key_node, uuid_hash(uuid), datapaths) {
521 if (uuid_equals(uuid, &od->key)) {
522 return od;
523 }
524 }
525 return NULL;
526 }
527
528 static struct ovn_datapath *
529 ovn_datapath_from_sbrec(struct hmap *datapaths,
530 const struct sbrec_datapath_binding *sb)
531 {
532 struct uuid key;
533
534 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
535 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
536 return NULL;
537 }
538 return ovn_datapath_find(datapaths, &key);
539 }
540
541 static bool
542 lrouter_is_enabled(const struct nbrec_logical_router *lrouter)
543 {
544 return !lrouter->enabled || *lrouter->enabled;
545 }
546
547 static void
548 init_ipam_info_for_datapath(struct ovn_datapath *od)
549 {
550 if (!od->nbs) {
551 return;
552 }
553
554 const char *subnet_str = smap_get(&od->nbs->other_config, "subnet");
555 const char *ipv6_prefix = smap_get(&od->nbs->other_config, "ipv6_prefix");
556
557 if (ipv6_prefix) {
558 od->ipam_info.ipv6_prefix_set = ipv6_parse(
559 ipv6_prefix, &od->ipam_info.ipv6_prefix);
560 }
561
562 if (!subnet_str) {
563 if (!ipv6_prefix) {
564 od->ipam_info.mac_only = smap_get_bool(&od->nbs->other_config,
565 "mac_only", false);
566 }
567 return;
568 }
569
570 ovs_be32 subnet, mask;
571 char *error = ip_parse_masked(subnet_str, &subnet, &mask);
572 if (error || mask == OVS_BE32_MAX || !ip_is_cidr(mask)) {
573 static struct vlog_rate_limit rl
574 = VLOG_RATE_LIMIT_INIT(5, 1);
575 VLOG_WARN_RL(&rl, "bad 'subnet' %s", subnet_str);
576 free(error);
577 return;
578 }
579
580 od->ipam_info.start_ipv4 = ntohl(subnet) + 1;
581 od->ipam_info.total_ipv4s = ~ntohl(mask);
582 od->ipam_info.allocated_ipv4s =
583 bitmap_allocate(od->ipam_info.total_ipv4s);
584
585 /* Mark first IP as taken */
586 bitmap_set1(od->ipam_info.allocated_ipv4s, 0);
587
588 /* Check if there are any reserver IPs (list) to be excluded from IPAM */
589 const char *exclude_ip_list = smap_get(&od->nbs->other_config,
590 "exclude_ips");
591 if (!exclude_ip_list) {
592 return;
593 }
594
595 struct lexer lexer;
596 lexer_init(&lexer, exclude_ip_list);
597 /* exclude_ip_list could be in the format -
598 * "10.0.0.4 10.0.0.10 10.0.0.20..10.0.0.50 10.0.0.100..10.0.0.110".
599 */
600 lexer_get(&lexer);
601 while (lexer.token.type != LEX_T_END) {
602 if (lexer.token.type != LEX_T_INTEGER) {
603 lexer_syntax_error(&lexer, "expecting address");
604 break;
605 }
606 uint32_t start = ntohl(lexer.token.value.ipv4);
607 lexer_get(&lexer);
608
609 uint32_t end = start + 1;
610 if (lexer_match(&lexer, LEX_T_ELLIPSIS)) {
611 if (lexer.token.type != LEX_T_INTEGER) {
612 lexer_syntax_error(&lexer, "expecting address range");
613 break;
614 }
615 end = ntohl(lexer.token.value.ipv4) + 1;
616 lexer_get(&lexer);
617 }
618
619 /* Clamp start...end to fit the subnet. */
620 start = MAX(od->ipam_info.start_ipv4, start);
621 end = MIN(od->ipam_info.start_ipv4 + od->ipam_info.total_ipv4s, end);
622 if (end > start) {
623 bitmap_set_multiple(od->ipam_info.allocated_ipv4s,
624 start - od->ipam_info.start_ipv4,
625 end - start, 1);
626 } else {
627 lexer_error(&lexer, "excluded addresses not in subnet");
628 }
629 }
630 if (lexer.error) {
631 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
632 VLOG_WARN_RL(&rl, "logical switch "UUID_FMT": bad exclude_ips (%s)",
633 UUID_ARGS(&od->key), lexer.error);
634 }
635 lexer_destroy(&lexer);
636 }
637
638 static void
639 ovn_datapath_update_external_ids(struct ovn_datapath *od)
640 {
641 /* Get the logical-switch or logical-router UUID to set in
642 * external-ids. */
643 char uuid_s[UUID_LEN + 1];
644 sprintf(uuid_s, UUID_FMT, UUID_ARGS(&od->key));
645 const char *key = od->nbs ? "logical-switch" : "logical-router";
646
647 /* Get names to set in external-ids. */
648 const char *name = od->nbs ? od->nbs->name : od->nbr->name;
649 const char *name2 = (od->nbs
650 ? smap_get(&od->nbs->external_ids,
651 "neutron:network_name")
652 : smap_get(&od->nbr->external_ids,
653 "neutron:router_name"));
654
655 /* Set external-ids. */
656 struct smap ids = SMAP_INITIALIZER(&ids);
657 smap_add(&ids, key, uuid_s);
658 smap_add(&ids, "name", name);
659 if (name2 && name2[0]) {
660 smap_add(&ids, "name2", name2);
661 }
662 sbrec_datapath_binding_set_external_ids(od->sb, &ids);
663 smap_destroy(&ids);
664 }
665
666 static void
667 join_datapaths(struct northd_context *ctx, struct hmap *datapaths,
668 struct ovs_list *sb_only, struct ovs_list *nb_only,
669 struct ovs_list *both)
670 {
671 hmap_init(datapaths);
672 ovs_list_init(sb_only);
673 ovs_list_init(nb_only);
674 ovs_list_init(both);
675
676 const struct sbrec_datapath_binding *sb, *sb_next;
677 SBREC_DATAPATH_BINDING_FOR_EACH_SAFE (sb, sb_next, ctx->ovnsb_idl) {
678 struct uuid key;
679 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
680 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
681 ovsdb_idl_txn_add_comment(
682 ctx->ovnsb_txn,
683 "deleting Datapath_Binding "UUID_FMT" that lacks "
684 "external-ids:logical-switch and "
685 "external-ids:logical-router",
686 UUID_ARGS(&sb->header_.uuid));
687 sbrec_datapath_binding_delete(sb);
688 continue;
689 }
690
691 if (ovn_datapath_find(datapaths, &key)) {
692 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
693 VLOG_INFO_RL(
694 &rl, "deleting Datapath_Binding "UUID_FMT" with "
695 "duplicate external-ids:logical-switch/router "UUID_FMT,
696 UUID_ARGS(&sb->header_.uuid), UUID_ARGS(&key));
697 sbrec_datapath_binding_delete(sb);
698 continue;
699 }
700
701 struct ovn_datapath *od = ovn_datapath_create(datapaths, &key,
702 NULL, NULL, sb);
703 ovs_list_push_back(sb_only, &od->list);
704 }
705
706 const struct nbrec_logical_switch *nbs;
707 NBREC_LOGICAL_SWITCH_FOR_EACH (nbs, ctx->ovnnb_idl) {
708 struct ovn_datapath *od = ovn_datapath_find(datapaths,
709 &nbs->header_.uuid);
710 if (od) {
711 od->nbs = nbs;
712 ovs_list_remove(&od->list);
713 ovs_list_push_back(both, &od->list);
714 ovn_datapath_update_external_ids(od);
715 } else {
716 od = ovn_datapath_create(datapaths, &nbs->header_.uuid,
717 nbs, NULL, NULL);
718 ovs_list_push_back(nb_only, &od->list);
719 }
720
721 init_ipam_info_for_datapath(od);
722 }
723
724 const struct nbrec_logical_router *nbr;
725 NBREC_LOGICAL_ROUTER_FOR_EACH (nbr, ctx->ovnnb_idl) {
726 if (!lrouter_is_enabled(nbr)) {
727 continue;
728 }
729
730 struct ovn_datapath *od = ovn_datapath_find(datapaths,
731 &nbr->header_.uuid);
732 if (od) {
733 if (!od->nbs) {
734 od->nbr = nbr;
735 ovs_list_remove(&od->list);
736 ovs_list_push_back(both, &od->list);
737 ovn_datapath_update_external_ids(od);
738 } else {
739 /* Can't happen! */
740 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
741 VLOG_WARN_RL(&rl,
742 "duplicate UUID "UUID_FMT" in OVN_Northbound",
743 UUID_ARGS(&nbr->header_.uuid));
744 continue;
745 }
746 } else {
747 od = ovn_datapath_create(datapaths, &nbr->header_.uuid,
748 NULL, nbr, NULL);
749 ovs_list_push_back(nb_only, &od->list);
750 }
751 }
752 }
753
754 static uint32_t
755 ovn_datapath_allocate_key(struct hmap *dp_tnlids)
756 {
757 static uint32_t hint;
758 return allocate_tnlid(dp_tnlids, "datapath", (1u << 24) - 1, &hint);
759 }
760
761 /* Updates the southbound Datapath_Binding table so that it contains the
762 * logical switches and routers specified by the northbound database.
763 *
764 * Initializes 'datapaths' to contain a "struct ovn_datapath" for every logical
765 * switch and router. */
766 static void
767 build_datapaths(struct northd_context *ctx, struct hmap *datapaths)
768 {
769 struct ovs_list sb_only, nb_only, both;
770
771 join_datapaths(ctx, datapaths, &sb_only, &nb_only, &both);
772
773 if (!ovs_list_is_empty(&nb_only)) {
774 /* First index the in-use datapath tunnel IDs. */
775 struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
776 struct ovn_datapath *od;
777 LIST_FOR_EACH (od, list, &both) {
778 add_tnlid(&dp_tnlids, od->sb->tunnel_key);
779 }
780
781 /* Add southbound record for each unmatched northbound record. */
782 LIST_FOR_EACH (od, list, &nb_only) {
783 uint16_t tunnel_key = ovn_datapath_allocate_key(&dp_tnlids);
784 if (!tunnel_key) {
785 break;
786 }
787
788 od->sb = sbrec_datapath_binding_insert(ctx->ovnsb_txn);
789 ovn_datapath_update_external_ids(od);
790 sbrec_datapath_binding_set_tunnel_key(od->sb, tunnel_key);
791 }
792 destroy_tnlids(&dp_tnlids);
793 }
794
795 /* Delete southbound records without northbound matches. */
796 struct ovn_datapath *od, *next;
797 LIST_FOR_EACH_SAFE (od, next, list, &sb_only) {
798 ovs_list_remove(&od->list);
799 sbrec_datapath_binding_delete(od->sb);
800 ovn_datapath_destroy(datapaths, od);
801 }
802 }
803 \f
804 struct ovn_port {
805 struct hmap_node key_node; /* Index on 'key'. */
806 char *key; /* nbs->name, nbr->name, sb->logical_port. */
807 char *json_key; /* 'key', quoted for use in JSON. */
808
809 const struct sbrec_port_binding *sb; /* May be NULL. */
810
811 /* Logical switch port data. */
812 const struct nbrec_logical_switch_port *nbsp; /* May be NULL. */
813
814 struct lport_addresses *lsp_addrs; /* Logical switch port addresses. */
815 unsigned int n_lsp_addrs;
816
817 struct lport_addresses *ps_addrs; /* Port security addresses. */
818 unsigned int n_ps_addrs;
819
820 /* Logical router port data. */
821 const struct nbrec_logical_router_port *nbrp; /* May be NULL. */
822
823 struct lport_addresses lrp_networks;
824
825 bool derived; /* Indicates whether this is an additional port
826 * derived from nbsp or nbrp. */
827
828 /* The port's peer:
829 *
830 * - A switch port S of type "router" has a router port R as a peer,
831 * and R in turn has S has its peer.
832 *
833 * - Two connected logical router ports have each other as peer. */
834 struct ovn_port *peer;
835
836 struct ovn_datapath *od;
837
838 struct ovs_list list; /* In list of similar records. */
839 };
840
841 static struct ovn_port *
842 ovn_port_create(struct hmap *ports, const char *key,
843 const struct nbrec_logical_switch_port *nbsp,
844 const struct nbrec_logical_router_port *nbrp,
845 const struct sbrec_port_binding *sb)
846 {
847 struct ovn_port *op = xzalloc(sizeof *op);
848
849 struct ds json_key = DS_EMPTY_INITIALIZER;
850 json_string_escape(key, &json_key);
851 op->json_key = ds_steal_cstr(&json_key);
852
853 op->key = xstrdup(key);
854 op->sb = sb;
855 op->nbsp = nbsp;
856 op->nbrp = nbrp;
857 op->derived = false;
858 hmap_insert(ports, &op->key_node, hash_string(op->key, 0));
859 return op;
860 }
861
862 static void
863 ovn_port_destroy(struct hmap *ports, struct ovn_port *port)
864 {
865 if (port) {
866 /* Don't remove port->list. It is used within build_ports() as a
867 * private list and once we've exited that function it is not safe to
868 * use it. */
869 hmap_remove(ports, &port->key_node);
870
871 for (int i = 0; i < port->n_lsp_addrs; i++) {
872 destroy_lport_addresses(&port->lsp_addrs[i]);
873 }
874 free(port->lsp_addrs);
875
876 for (int i = 0; i < port->n_ps_addrs; i++) {
877 destroy_lport_addresses(&port->ps_addrs[i]);
878 }
879 free(port->ps_addrs);
880
881 destroy_lport_addresses(&port->lrp_networks);
882 free(port->json_key);
883 free(port->key);
884 free(port);
885 }
886 }
887
888 static struct ovn_port *
889 ovn_port_find(struct hmap *ports, const char *name)
890 {
891 struct ovn_port *op;
892
893 HMAP_FOR_EACH_WITH_HASH (op, key_node, hash_string(name, 0), ports) {
894 if (!strcmp(op->key, name)) {
895 return op;
896 }
897 }
898 return NULL;
899 }
900
901 static uint32_t
902 ovn_port_allocate_key(struct ovn_datapath *od)
903 {
904 return allocate_tnlid(&od->port_tnlids, "port",
905 (1u << 15) - 1, &od->port_key_hint);
906 }
907
908 static char *
909 chassis_redirect_name(const char *port_name)
910 {
911 return xasprintf("cr-%s", port_name);
912 }
913
914 static bool
915 ipam_is_duplicate_mac(struct eth_addr *ea, uint64_t mac64, bool warn)
916 {
917 struct macam_node *macam_node;
918 HMAP_FOR_EACH_WITH_HASH (macam_node, hmap_node, hash_uint64(mac64),
919 &macam) {
920 if (eth_addr_equals(*ea, macam_node->mac_addr)) {
921 if (warn) {
922 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
923 VLOG_WARN_RL(&rl, "Duplicate MAC set: "ETH_ADDR_FMT,
924 ETH_ADDR_ARGS(macam_node->mac_addr));
925 }
926 return true;
927 }
928 }
929 return false;
930 }
931
932 static void
933 ipam_insert_mac(struct eth_addr *ea, bool check)
934 {
935 if (!ea) {
936 return;
937 }
938
939 uint64_t mac64 = eth_addr_to_uint64(*ea);
940 uint64_t prefix;
941
942 if (!eth_addr_is_zero(mac_prefix)) {
943 prefix = eth_addr_to_uint64(mac_prefix);
944 } else {
945 prefix = MAC_ADDR_PREFIX;
946 }
947 /* If the new MAC was not assigned by this address management system or
948 * check is true and the new MAC is a duplicate, do not insert it into the
949 * macam hmap. */
950 if (((mac64 ^ prefix) >> 24)
951 || (check && ipam_is_duplicate_mac(ea, mac64, true))) {
952 return;
953 }
954
955 struct macam_node *new_macam_node = xmalloc(sizeof *new_macam_node);
956 new_macam_node->mac_addr = *ea;
957 hmap_insert(&macam, &new_macam_node->hmap_node, hash_uint64(mac64));
958 }
959
960 static void
961 ipam_insert_ip(struct ovn_datapath *od, uint32_t ip)
962 {
963 if (!od || !od->ipam_info.allocated_ipv4s) {
964 return;
965 }
966
967 if (ip >= od->ipam_info.start_ipv4 &&
968 ip < (od->ipam_info.start_ipv4 + od->ipam_info.total_ipv4s)) {
969 if (bitmap_is_set(od->ipam_info.allocated_ipv4s,
970 ip - od->ipam_info.start_ipv4)) {
971 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
972 VLOG_WARN_RL(&rl, "Duplicate IP set on switch %s: "IP_FMT,
973 od->nbs->name, IP_ARGS(htonl(ip)));
974 }
975 bitmap_set1(od->ipam_info.allocated_ipv4s,
976 ip - od->ipam_info.start_ipv4);
977 }
978 }
979
980 static void
981 ipam_insert_lsp_addresses(struct ovn_datapath *od, struct ovn_port *op,
982 char *address)
983 {
984 if (!od || !op || !address || !strcmp(address, "unknown")
985 || !strcmp(address, "router") || is_dynamic_lsp_address(address)) {
986 return;
987 }
988
989 struct lport_addresses laddrs;
990 if (!extract_lsp_addresses(address, &laddrs)) {
991 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
992 VLOG_WARN_RL(&rl, "Extract addresses failed.");
993 return;
994 }
995 ipam_insert_mac(&laddrs.ea, true);
996
997 /* IP is only added to IPAM if the switch's subnet option
998 * is set, whereas MAC is always added to MACAM. */
999 if (!od->ipam_info.allocated_ipv4s) {
1000 destroy_lport_addresses(&laddrs);
1001 return;
1002 }
1003
1004 for (size_t j = 0; j < laddrs.n_ipv4_addrs; j++) {
1005 uint32_t ip = ntohl(laddrs.ipv4_addrs[j].addr);
1006 ipam_insert_ip(od, ip);
1007 }
1008
1009 destroy_lport_addresses(&laddrs);
1010 }
1011
1012 static void
1013 ipam_add_port_addresses(struct ovn_datapath *od, struct ovn_port *op)
1014 {
1015 if (!od || !op) {
1016 return;
1017 }
1018
1019 if (op->nbsp) {
1020 /* Add all the port's addresses to address data structures. */
1021 for (size_t i = 0; i < op->nbsp->n_addresses; i++) {
1022 ipam_insert_lsp_addresses(od, op, op->nbsp->addresses[i]);
1023 }
1024 } else if (op->nbrp) {
1025 struct lport_addresses lrp_networks;
1026 if (!extract_lrp_networks(op->nbrp, &lrp_networks)) {
1027 static struct vlog_rate_limit rl
1028 = VLOG_RATE_LIMIT_INIT(1, 1);
1029 VLOG_WARN_RL(&rl, "Extract addresses failed.");
1030 return;
1031 }
1032 ipam_insert_mac(&lrp_networks.ea, true);
1033
1034 if (!op->peer || !op->peer->nbsp || !op->peer->od || !op->peer->od->nbs
1035 || !smap_get(&op->peer->od->nbs->other_config, "subnet")) {
1036 destroy_lport_addresses(&lrp_networks);
1037 return;
1038 }
1039
1040 for (size_t i = 0; i < lrp_networks.n_ipv4_addrs; i++) {
1041 uint32_t ip = ntohl(lrp_networks.ipv4_addrs[i].addr);
1042 ipam_insert_ip(op->peer->od, ip);
1043 }
1044
1045 destroy_lport_addresses(&lrp_networks);
1046 }
1047 }
1048
1049 static uint64_t
1050 ipam_get_unused_mac(ovs_be32 ip)
1051 {
1052 uint32_t mac_addr_suffix, i, base_addr = ntohl(ip) & MAC_ADDR_SPACE;
1053 struct eth_addr mac;
1054 uint64_t mac64;
1055
1056 for (i = 0; i < MAC_ADDR_SPACE - 1; i++) {
1057 /* The tentative MAC's suffix will be in the interval (1, 0xfffffe). */
1058 mac_addr_suffix = ((base_addr + i) % (MAC_ADDR_SPACE - 1)) + 1;
1059 if (!eth_addr_is_zero(mac_prefix)) {
1060 mac64 = eth_addr_to_uint64(mac_prefix) | mac_addr_suffix;
1061 } else {
1062 mac64 = MAC_ADDR_PREFIX | mac_addr_suffix;
1063 }
1064 eth_addr_from_uint64(mac64, &mac);
1065 if (!ipam_is_duplicate_mac(&mac, mac64, true)) {
1066 break;
1067 }
1068 }
1069
1070 if (i == MAC_ADDR_SPACE) {
1071 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1072 VLOG_WARN_RL(&rl, "MAC address space exhausted.");
1073 mac64 = 0;
1074 }
1075
1076 return mac64;
1077 }
1078
1079 static uint32_t
1080 ipam_get_unused_ip(struct ovn_datapath *od)
1081 {
1082 if (!od || !od->ipam_info.allocated_ipv4s) {
1083 return 0;
1084 }
1085
1086 size_t new_ip_index = bitmap_scan(od->ipam_info.allocated_ipv4s, 0, 0,
1087 od->ipam_info.total_ipv4s - 1);
1088 if (new_ip_index == od->ipam_info.total_ipv4s - 1) {
1089 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1090 VLOG_WARN_RL( &rl, "Subnet address space has been exhausted.");
1091 return 0;
1092 }
1093
1094 return od->ipam_info.start_ipv4 + new_ip_index;
1095 }
1096
1097 enum dynamic_update_type {
1098 NONE, /* No change to the address */
1099 REMOVE, /* Address is no longer dynamic */
1100 STATIC, /* Use static address (MAC only) */
1101 DYNAMIC, /* Assign a new dynamic address */
1102 };
1103
1104 struct dynamic_address_update {
1105 struct ovs_list node; /* In build_ipam()'s list of updates. */
1106
1107 struct ovn_datapath *od;
1108 struct ovn_port *op;
1109
1110 struct lport_addresses current_addresses;
1111 struct eth_addr static_mac;
1112 enum dynamic_update_type mac;
1113 enum dynamic_update_type ipv4;
1114 enum dynamic_update_type ipv6;
1115 };
1116
1117 static enum dynamic_update_type
1118 dynamic_mac_changed(const char *lsp_addresses,
1119 struct dynamic_address_update *update)
1120 {
1121 struct eth_addr ea;
1122
1123 if (ovs_scan(lsp_addresses, ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(ea))) {
1124 if (eth_addr_equals(ea, update->current_addresses.ea)) {
1125 return NONE;
1126 } else {
1127 /* MAC is still static, but it has changed */
1128 update->static_mac = ea;
1129 return STATIC;
1130 }
1131 }
1132
1133 uint64_t mac64 = eth_addr_to_uint64(update->current_addresses.ea);
1134 uint64_t prefix;
1135
1136 if (!eth_addr_is_zero(mac_prefix)) {
1137 prefix = eth_addr_to_uint64(mac_prefix);
1138 } else {
1139 prefix = MAC_ADDR_PREFIX;
1140 }
1141
1142 if ((mac64 ^ prefix) >> 24) {
1143 return DYNAMIC;
1144 } else {
1145 return NONE;
1146 }
1147 }
1148
1149 static enum dynamic_update_type
1150 dynamic_ip4_changed(struct dynamic_address_update *update)
1151 {
1152 const struct ipam_info *ipam = &update->op->od->ipam_info;
1153 const struct lport_addresses *cur_addresses = &update->current_addresses;
1154 bool dynamic_ip4 = ipam->allocated_ipv4s != NULL;
1155
1156 if (!dynamic_ip4) {
1157 if (update->current_addresses.n_ipv4_addrs) {
1158 return REMOVE;
1159 } else {
1160 return NONE;
1161 }
1162 }
1163
1164 if (!cur_addresses->n_ipv4_addrs) {
1165 /* IPv4 was previously static but now is dynamic */
1166 return DYNAMIC;
1167 }
1168
1169 uint32_t ip4 = ntohl(cur_addresses->ipv4_addrs[0].addr);
1170 if (ip4 < ipam->start_ipv4) {
1171 return DYNAMIC;
1172 }
1173
1174 uint32_t index = ip4 - ipam->start_ipv4;
1175 if (index > ipam->total_ipv4s ||
1176 bitmap_is_set(ipam->allocated_ipv4s, index)) {
1177 /* Previously assigned dynamic IPv4 address can no longer be used.
1178 * It's either outside the subnet, conflicts with an excluded IP,
1179 * or conflicts with a statically-assigned address on the switch
1180 */
1181 return DYNAMIC;
1182 } else {
1183 return NONE;
1184 }
1185 }
1186
1187 static enum dynamic_update_type
1188 dynamic_ip6_changed(struct dynamic_address_update *update)
1189 {
1190 bool dynamic_ip6 = update->op->od->ipam_info.ipv6_prefix_set;
1191
1192 if (!dynamic_ip6) {
1193 if (update->current_addresses.n_ipv6_addrs) {
1194 /* IPv6 was dynamic but now is not */
1195 return REMOVE;
1196 } else {
1197 /* IPv6 has never been dynamic */
1198 return NONE;
1199 }
1200 }
1201
1202 if (update->mac != NONE) {
1203 /* IPv6 address is based on MAC, so if MAC has been updated,
1204 * then we have to update IPv6 address too.
1205 */
1206 return DYNAMIC;
1207 }
1208
1209 if (!update->current_addresses.n_ipv6_addrs) {
1210 /* IPv6 was previously static but now is dynamic */
1211 return DYNAMIC;
1212 }
1213
1214 struct in6_addr masked = ipv6_addr_bitand(
1215 &update->current_addresses.ipv6_addrs[0].addr,
1216 &update->op->od->ipam_info.ipv6_prefix);
1217 if (!IN6_ARE_ADDR_EQUAL(&masked, &update->op->od->ipam_info.ipv6_prefix)) {
1218 return DYNAMIC;
1219 }
1220
1221 return NONE;
1222 }
1223
1224 /* Check previously assigned dynamic addresses for validity. This will
1225 * check if the assigned addresses need to change.
1226 *
1227 * Returns true if any changes to dynamic addresses are required
1228 */
1229 static bool
1230 dynamic_addresses_check_for_updates(const char *lsp_addrs,
1231 struct dynamic_address_update *update)
1232 {
1233 update->mac = dynamic_mac_changed(lsp_addrs, update);
1234 update->ipv4 = dynamic_ip4_changed(update);
1235 update->ipv6 = dynamic_ip6_changed(update);
1236 if (update->mac == NONE &&
1237 update->ipv4 == NONE &&
1238 update->ipv6 == NONE) {
1239 return false;
1240 } else {
1241 return true;
1242 }
1243 }
1244
1245 /* For addresses that do not need to be updated, go ahead and insert them
1246 * into IPAM. This way, their addresses will be claimed and cannot be assigned
1247 * elsewhere later.
1248 */
1249 static void
1250 update_unchanged_dynamic_addresses(struct dynamic_address_update *update)
1251 {
1252 if (update->mac == NONE) {
1253 ipam_insert_mac(&update->current_addresses.ea, false);
1254 }
1255 if (update->ipv4 == NONE && update->current_addresses.n_ipv4_addrs) {
1256 ipam_insert_ip(update->op->od,
1257 ntohl(update->current_addresses.ipv4_addrs[0].addr));
1258 }
1259 }
1260
1261 static void
1262 set_lsp_dynamic_addresses(const char *dynamic_addresses, struct ovn_port *op)
1263 {
1264 extract_lsp_addresses(dynamic_addresses, &op->lsp_addrs[op->n_lsp_addrs]);
1265 op->n_lsp_addrs++;
1266 }
1267
1268 /* Determines which components (MAC, IPv4, and IPv6) of dynamic
1269 * addresses need to be assigned. This is used exclusively for
1270 * ports that do not have dynamic addresses already assigned.
1271 */
1272 static void
1273 set_dynamic_updates(const char *addrspec,
1274 struct dynamic_address_update *update)
1275 {
1276 struct eth_addr mac;
1277 int n = 0;
1278 if (ovs_scan(addrspec, ETH_ADDR_SCAN_FMT" dynamic%n",
1279 ETH_ADDR_SCAN_ARGS(mac), &n)
1280 && addrspec[n] == '\0') {
1281 update->mac = STATIC;
1282 update->static_mac = mac;
1283 } else {
1284 update->mac = DYNAMIC;
1285 }
1286 if (update->op->od->ipam_info.allocated_ipv4s) {
1287 update->ipv4 = DYNAMIC;
1288 } else {
1289 update->ipv4 = NONE;
1290 }
1291 if (update->op->od->ipam_info.ipv6_prefix_set) {
1292 update->ipv6 = DYNAMIC;
1293 } else {
1294 update->ipv6 = NONE;
1295 }
1296 }
1297
1298 static void
1299 update_dynamic_addresses(struct dynamic_address_update *update)
1300 {
1301 ovs_be32 ip4 = 0;
1302 switch (update->ipv4) {
1303 case NONE:
1304 if (update->current_addresses.n_ipv4_addrs) {
1305 ip4 = update->current_addresses.ipv4_addrs[0].addr;
1306 }
1307 break;
1308 case REMOVE:
1309 break;
1310 case STATIC:
1311 OVS_NOT_REACHED();
1312 case DYNAMIC:
1313 ip4 = htonl(ipam_get_unused_ip(update->od));
1314 }
1315
1316 struct eth_addr mac;
1317 switch (update->mac) {
1318 case NONE:
1319 mac = update->current_addresses.ea;
1320 break;
1321 case REMOVE:
1322 OVS_NOT_REACHED();
1323 case STATIC:
1324 mac = update->static_mac;
1325 break;
1326 case DYNAMIC:
1327 eth_addr_from_uint64(ipam_get_unused_mac(ip4), &mac);
1328 break;
1329 }
1330
1331 struct in6_addr ip6 = in6addr_any;
1332 switch (update->ipv6) {
1333 case NONE:
1334 if (update->current_addresses.n_ipv6_addrs) {
1335 ip6 = update->current_addresses.ipv6_addrs[0].addr;
1336 }
1337 break;
1338 case REMOVE:
1339 break;
1340 case STATIC:
1341 OVS_NOT_REACHED();
1342 case DYNAMIC:
1343 in6_generate_eui64(mac, &update->od->ipam_info.ipv6_prefix, &ip6);
1344 break;
1345 }
1346
1347 struct ds new_addr = DS_EMPTY_INITIALIZER;
1348 ds_put_format(&new_addr, ETH_ADDR_FMT, ETH_ADDR_ARGS(mac));
1349 ipam_insert_mac(&mac, true);
1350
1351 if (ip4) {
1352 ipam_insert_ip(update->od, ntohl(ip4));
1353 ds_put_format(&new_addr, " "IP_FMT, IP_ARGS(ip4));
1354 }
1355 if (!IN6_ARE_ADDR_EQUAL(&ip6, &in6addr_any)) {
1356 char ip6_s[INET6_ADDRSTRLEN + 1];
1357 ipv6_string_mapped(ip6_s, &ip6);
1358 ds_put_format(&new_addr, " %s", ip6_s);
1359 }
1360 nbrec_logical_switch_port_set_dynamic_addresses(update->op->nbsp,
1361 ds_cstr(&new_addr));
1362 set_lsp_dynamic_addresses(ds_cstr(&new_addr), update->op);
1363 ds_destroy(&new_addr);
1364 }
1365
1366 static void
1367 build_ipam(struct hmap *datapaths, struct hmap *ports)
1368 {
1369 /* IPAM generally stands for IP address management. In non-virtualized
1370 * world, MAC addresses come with the hardware. But, with virtualized
1371 * workloads, they need to be assigned and managed. This function
1372 * does both IP address management (ipam) and MAC address management
1373 * (macam). */
1374
1375 /* If the switch's other_config:subnet is set, allocate new addresses for
1376 * ports that have the "dynamic" keyword in their addresses column. */
1377 struct ovn_datapath *od;
1378 struct ovs_list updates;
1379
1380 ovs_list_init(&updates);
1381 HMAP_FOR_EACH (od, key_node, datapaths) {
1382 if (!od->nbs) {
1383 continue;
1384 }
1385
1386 for (size_t i = 0; i < od->nbs->n_ports; i++) {
1387 const struct nbrec_logical_switch_port *nbsp = od->nbs->ports[i];
1388
1389 if (!od->ipam_info.allocated_ipv4s &&
1390 !od->ipam_info.ipv6_prefix_set &&
1391 !od->ipam_info.mac_only) {
1392 if (nbsp->dynamic_addresses) {
1393 nbrec_logical_switch_port_set_dynamic_addresses(nbsp,
1394 NULL);
1395 }
1396 continue;
1397 }
1398
1399 struct ovn_port *op = ovn_port_find(ports, nbsp->name);
1400 if (!op || op->nbsp != nbsp || op->peer) {
1401 /* Do not allocate addresses for logical switch ports that
1402 * have a peer. */
1403 continue;
1404 }
1405
1406 int num_dynamic_addresses = 0;
1407 for (size_t j = 0; j < nbsp->n_addresses; j++) {
1408 if (!is_dynamic_lsp_address(nbsp->addresses[j])) {
1409 continue;
1410 }
1411 if (num_dynamic_addresses) {
1412 static struct vlog_rate_limit rl
1413 = VLOG_RATE_LIMIT_INIT(1, 1);
1414 VLOG_WARN_RL(&rl, "More than one dynamic address "
1415 "configured for logical switch port '%s'",
1416 nbsp->name);
1417 continue;
1418 }
1419 num_dynamic_addresses++;
1420 struct dynamic_address_update *update
1421 = xzalloc(sizeof *update);
1422 update->op = op;
1423 update->od = od;
1424 if (nbsp->dynamic_addresses) {
1425 bool any_changed;
1426 extract_lsp_addresses(nbsp->dynamic_addresses,
1427 &update->current_addresses);
1428 any_changed = dynamic_addresses_check_for_updates(
1429 nbsp->addresses[j], update);
1430 update_unchanged_dynamic_addresses(update);
1431 if (any_changed) {
1432 ovs_list_push_back(&updates, &update->node);
1433 } else {
1434 /* No changes to dynamic addresses */
1435 set_lsp_dynamic_addresses(nbsp->dynamic_addresses, op);
1436 destroy_lport_addresses(&update->current_addresses);
1437 free(update);
1438 }
1439 } else {
1440 set_dynamic_updates(nbsp->addresses[j], update);
1441 ovs_list_push_back(&updates, &update->node);
1442 }
1443 }
1444
1445 if (!nbsp->n_addresses && nbsp->dynamic_addresses) {
1446 nbrec_logical_switch_port_set_dynamic_addresses(nbsp, NULL);
1447 }
1448 }
1449
1450 }
1451
1452 /* After retaining all unchanged dynamic addresses, now assign
1453 * new ones.
1454 */
1455 struct dynamic_address_update *update;
1456 LIST_FOR_EACH_POP (update, node, &updates) {
1457 update_dynamic_addresses(update);
1458 destroy_lport_addresses(&update->current_addresses);
1459 free(update);
1460 }
1461 }
1462 \f
1463 /* Tag allocation for nested containers.
1464 *
1465 * For a logical switch port with 'parent_name' and a request to allocate tags,
1466 * keeps a track of all allocated tags. */
1467 struct tag_alloc_node {
1468 struct hmap_node hmap_node;
1469 char *parent_name;
1470 unsigned long *allocated_tags; /* A bitmap to track allocated tags. */
1471 };
1472
1473 static void
1474 tag_alloc_destroy(struct hmap *tag_alloc_table)
1475 {
1476 struct tag_alloc_node *node;
1477 HMAP_FOR_EACH_POP (node, hmap_node, tag_alloc_table) {
1478 bitmap_free(node->allocated_tags);
1479 free(node->parent_name);
1480 free(node);
1481 }
1482 hmap_destroy(tag_alloc_table);
1483 }
1484
1485 static struct tag_alloc_node *
1486 tag_alloc_get_node(struct hmap *tag_alloc_table, const char *parent_name)
1487 {
1488 /* If a node for the 'parent_name' exists, return it. */
1489 struct tag_alloc_node *tag_alloc_node;
1490 HMAP_FOR_EACH_WITH_HASH (tag_alloc_node, hmap_node,
1491 hash_string(parent_name, 0),
1492 tag_alloc_table) {
1493 if (!strcmp(tag_alloc_node->parent_name, parent_name)) {
1494 return tag_alloc_node;
1495 }
1496 }
1497
1498 /* Create a new node. */
1499 tag_alloc_node = xmalloc(sizeof *tag_alloc_node);
1500 tag_alloc_node->parent_name = xstrdup(parent_name);
1501 tag_alloc_node->allocated_tags = bitmap_allocate(MAX_OVN_TAGS);
1502 /* Tag 0 is invalid for nested containers. */
1503 bitmap_set1(tag_alloc_node->allocated_tags, 0);
1504 hmap_insert(tag_alloc_table, &tag_alloc_node->hmap_node,
1505 hash_string(parent_name, 0));
1506
1507 return tag_alloc_node;
1508 }
1509
1510 static void
1511 tag_alloc_add_existing_tags(struct hmap *tag_alloc_table,
1512 const struct nbrec_logical_switch_port *nbsp)
1513 {
1514 /* Add the tags of already existing nested containers. If there is no
1515 * 'nbsp->parent_name' or no 'nbsp->tag' set, there is nothing to do. */
1516 if (!nbsp->parent_name || !nbsp->parent_name[0] || !nbsp->tag) {
1517 return;
1518 }
1519
1520 struct tag_alloc_node *tag_alloc_node;
1521 tag_alloc_node = tag_alloc_get_node(tag_alloc_table, nbsp->parent_name);
1522 bitmap_set1(tag_alloc_node->allocated_tags, *nbsp->tag);
1523 }
1524
1525 static void
1526 tag_alloc_create_new_tag(struct hmap *tag_alloc_table,
1527 const struct nbrec_logical_switch_port *nbsp)
1528 {
1529 if (!nbsp->tag_request) {
1530 return;
1531 }
1532
1533 if (nbsp->parent_name && nbsp->parent_name[0]
1534 && *nbsp->tag_request == 0) {
1535 /* For nested containers that need allocation, do the allocation. */
1536
1537 if (nbsp->tag) {
1538 /* This has already been allocated. */
1539 return;
1540 }
1541
1542 struct tag_alloc_node *tag_alloc_node;
1543 int64_t tag;
1544 tag_alloc_node = tag_alloc_get_node(tag_alloc_table,
1545 nbsp->parent_name);
1546 tag = bitmap_scan(tag_alloc_node->allocated_tags, 0, 1, MAX_OVN_TAGS);
1547 if (tag == MAX_OVN_TAGS) {
1548 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1549 VLOG_ERR_RL(&rl, "out of vlans for logical switch ports with "
1550 "parent %s", nbsp->parent_name);
1551 return;
1552 }
1553 bitmap_set1(tag_alloc_node->allocated_tags, tag);
1554 nbrec_logical_switch_port_set_tag(nbsp, &tag, 1);
1555 } else if (*nbsp->tag_request != 0) {
1556 /* For everything else, copy the contents of 'tag_request' to 'tag'. */
1557 nbrec_logical_switch_port_set_tag(nbsp, nbsp->tag_request, 1);
1558 }
1559 }
1560 \f
1561
1562 static void
1563 join_logical_ports(struct northd_context *ctx,
1564 struct hmap *datapaths, struct hmap *ports,
1565 struct hmap *chassis_qdisc_queues,
1566 struct hmap *tag_alloc_table, struct ovs_list *sb_only,
1567 struct ovs_list *nb_only, struct ovs_list *both)
1568 {
1569 hmap_init(ports);
1570 ovs_list_init(sb_only);
1571 ovs_list_init(nb_only);
1572 ovs_list_init(both);
1573
1574 const struct sbrec_port_binding *sb;
1575 SBREC_PORT_BINDING_FOR_EACH (sb, ctx->ovnsb_idl) {
1576 struct ovn_port *op = ovn_port_create(ports, sb->logical_port,
1577 NULL, NULL, sb);
1578 ovs_list_push_back(sb_only, &op->list);
1579 }
1580
1581 struct ovn_datapath *od;
1582 HMAP_FOR_EACH (od, key_node, datapaths) {
1583 if (od->nbs) {
1584 for (size_t i = 0; i < od->nbs->n_ports; i++) {
1585 const struct nbrec_logical_switch_port *nbsp
1586 = od->nbs->ports[i];
1587 struct ovn_port *op = ovn_port_find(ports, nbsp->name);
1588 if (op) {
1589 if (op->nbsp || op->nbrp) {
1590 static struct vlog_rate_limit rl
1591 = VLOG_RATE_LIMIT_INIT(5, 1);
1592 VLOG_WARN_RL(&rl, "duplicate logical port %s",
1593 nbsp->name);
1594 continue;
1595 }
1596 op->nbsp = nbsp;
1597 ovs_list_remove(&op->list);
1598
1599 uint32_t queue_id = smap_get_int(&op->sb->options,
1600 "qdisc_queue_id", 0);
1601 if (queue_id && op->sb->chassis) {
1602 add_chassis_queue(
1603 chassis_qdisc_queues, &op->sb->chassis->header_.uuid,
1604 queue_id);
1605 }
1606
1607 ovs_list_push_back(both, &op->list);
1608
1609 /* This port exists due to a SB binding, but should
1610 * not have been initialized fully. */
1611 ovs_assert(!op->n_lsp_addrs && !op->n_ps_addrs);
1612 } else {
1613 op = ovn_port_create(ports, nbsp->name, nbsp, NULL, NULL);
1614 ovs_list_push_back(nb_only, &op->list);
1615 }
1616
1617 if (!strcmp(nbsp->type, "localnet")) {
1618 od->localnet_port = op;
1619 }
1620
1621 op->lsp_addrs
1622 = xmalloc(sizeof *op->lsp_addrs * nbsp->n_addresses);
1623 for (size_t j = 0; j < nbsp->n_addresses; j++) {
1624 if (!strcmp(nbsp->addresses[j], "unknown")
1625 || !strcmp(nbsp->addresses[j], "router")) {
1626 continue;
1627 }
1628 if (is_dynamic_lsp_address(nbsp->addresses[j])) {
1629 continue;
1630 } else if (!extract_lsp_addresses(nbsp->addresses[j],
1631 &op->lsp_addrs[op->n_lsp_addrs])) {
1632 static struct vlog_rate_limit rl
1633 = VLOG_RATE_LIMIT_INIT(1, 1);
1634 VLOG_INFO_RL(&rl, "invalid syntax '%s' in logical "
1635 "switch port addresses. No MAC "
1636 "address found",
1637 op->nbsp->addresses[j]);
1638 continue;
1639 }
1640 op->n_lsp_addrs++;
1641 }
1642
1643 op->ps_addrs
1644 = xmalloc(sizeof *op->ps_addrs * nbsp->n_port_security);
1645 for (size_t j = 0; j < nbsp->n_port_security; j++) {
1646 if (!extract_lsp_addresses(nbsp->port_security[j],
1647 &op->ps_addrs[op->n_ps_addrs])) {
1648 static struct vlog_rate_limit rl
1649 = VLOG_RATE_LIMIT_INIT(1, 1);
1650 VLOG_INFO_RL(&rl, "invalid syntax '%s' in port "
1651 "security. No MAC address found",
1652 op->nbsp->port_security[j]);
1653 continue;
1654 }
1655 op->n_ps_addrs++;
1656 }
1657
1658 op->od = od;
1659 ipam_add_port_addresses(od, op);
1660 tag_alloc_add_existing_tags(tag_alloc_table, nbsp);
1661 }
1662 } else {
1663 for (size_t i = 0; i < od->nbr->n_ports; i++) {
1664 const struct nbrec_logical_router_port *nbrp
1665 = od->nbr->ports[i];
1666
1667 struct lport_addresses lrp_networks;
1668 if (!extract_lrp_networks(nbrp, &lrp_networks)) {
1669 static struct vlog_rate_limit rl
1670 = VLOG_RATE_LIMIT_INIT(5, 1);
1671 VLOG_WARN_RL(&rl, "bad 'mac' %s", nbrp->mac);
1672 continue;
1673 }
1674
1675 if (!lrp_networks.n_ipv4_addrs && !lrp_networks.n_ipv6_addrs) {
1676 continue;
1677 }
1678
1679 struct ovn_port *op = ovn_port_find(ports, nbrp->name);
1680 if (op) {
1681 if (op->nbsp || op->nbrp) {
1682 static struct vlog_rate_limit rl
1683 = VLOG_RATE_LIMIT_INIT(5, 1);
1684 VLOG_WARN_RL(&rl, "duplicate logical router port %s",
1685 nbrp->name);
1686 continue;
1687 }
1688 op->nbrp = nbrp;
1689 ovs_list_remove(&op->list);
1690 ovs_list_push_back(both, &op->list);
1691
1692 /* This port exists but should not have been
1693 * initialized fully. */
1694 ovs_assert(!op->lrp_networks.n_ipv4_addrs
1695 && !op->lrp_networks.n_ipv6_addrs);
1696 } else {
1697 op = ovn_port_create(ports, nbrp->name, NULL, nbrp, NULL);
1698 ovs_list_push_back(nb_only, &op->list);
1699 }
1700
1701 op->lrp_networks = lrp_networks;
1702 op->od = od;
1703 ipam_add_port_addresses(op->od, op);
1704
1705 const char *redirect_chassis = smap_get(&op->nbrp->options,
1706 "redirect-chassis");
1707 if (redirect_chassis || op->nbrp->n_gateway_chassis) {
1708 /* Additional "derived" ovn_port crp represents the
1709 * instance of op on the "redirect-chassis". */
1710 const char *gw_chassis = smap_get(&op->od->nbr->options,
1711 "chassis");
1712 if (gw_chassis) {
1713 static struct vlog_rate_limit rl
1714 = VLOG_RATE_LIMIT_INIT(1, 1);
1715 VLOG_WARN_RL(&rl, "Bad configuration: "
1716 "redirect-chassis configured on port %s "
1717 "on L3 gateway router", nbrp->name);
1718 continue;
1719 }
1720 if (od->l3dgw_port || od->l3redirect_port) {
1721 static struct vlog_rate_limit rl
1722 = VLOG_RATE_LIMIT_INIT(1, 1);
1723 VLOG_WARN_RL(&rl, "Bad configuration: multiple ports "
1724 "with redirect-chassis on same logical "
1725 "router %s", od->nbr->name);
1726 continue;
1727 }
1728
1729 char *redirect_name = chassis_redirect_name(nbrp->name);
1730 struct ovn_port *crp = ovn_port_find(ports, redirect_name);
1731 if (crp) {
1732 crp->derived = true;
1733 crp->nbrp = nbrp;
1734 ovs_list_remove(&crp->list);
1735 ovs_list_push_back(both, &crp->list);
1736 } else {
1737 crp = ovn_port_create(ports, redirect_name,
1738 NULL, nbrp, NULL);
1739 crp->derived = true;
1740 ovs_list_push_back(nb_only, &crp->list);
1741 }
1742 crp->od = od;
1743 free(redirect_name);
1744
1745 /* Set l3dgw_port and l3redirect_port in od, for later
1746 * use during flow creation. */
1747 od->l3dgw_port = op;
1748 od->l3redirect_port = crp;
1749 }
1750 }
1751 }
1752 }
1753
1754 /* Connect logical router ports, and logical switch ports of type "router",
1755 * to their peers. */
1756 struct ovn_port *op;
1757 HMAP_FOR_EACH (op, key_node, ports) {
1758 if (op->nbsp && !strcmp(op->nbsp->type, "router") && !op->derived) {
1759 const char *peer_name = smap_get(&op->nbsp->options, "router-port");
1760 if (!peer_name) {
1761 continue;
1762 }
1763
1764 struct ovn_port *peer = ovn_port_find(ports, peer_name);
1765 if (!peer || !peer->nbrp) {
1766 continue;
1767 }
1768
1769 peer->peer = op;
1770 op->peer = peer;
1771 op->od->router_ports = xrealloc(
1772 op->od->router_ports,
1773 sizeof *op->od->router_ports * (op->od->n_router_ports + 1));
1774 op->od->router_ports[op->od->n_router_ports++] = op;
1775
1776 /* Fill op->lsp_addrs for op->nbsp->addresses[] with
1777 * contents "router", which was skipped in the loop above. */
1778 for (size_t j = 0; j < op->nbsp->n_addresses; j++) {
1779 if (!strcmp(op->nbsp->addresses[j], "router")) {
1780 if (extract_lrp_networks(peer->nbrp,
1781 &op->lsp_addrs[op->n_lsp_addrs])) {
1782 op->n_lsp_addrs++;
1783 }
1784 break;
1785 }
1786 }
1787 } else if (op->nbrp && op->nbrp->peer && !op->derived) {
1788 struct ovn_port *peer = ovn_port_find(ports, op->nbrp->peer);
1789 if (peer) {
1790 if (peer->nbrp) {
1791 op->peer = peer;
1792 } else if (peer->nbsp) {
1793 /* An ovn_port for a switch port of type "router" does have
1794 * a router port as its peer (see the case above for
1795 * "router" ports), but this is set via options:router-port
1796 * in Logical_Switch_Port and does not involve the
1797 * Logical_Router_Port's 'peer' column. */
1798 static struct vlog_rate_limit rl =
1799 VLOG_RATE_LIMIT_INIT(5, 1);
1800 VLOG_WARN_RL(&rl, "Bad configuration: The peer of router "
1801 "port %s is a switch port", op->key);
1802 }
1803 }
1804 }
1805 }
1806 }
1807
1808 static void
1809 ip_address_and_port_from_lb_key(const char *key, char **ip_address,
1810 uint16_t *port, int *addr_family);
1811
1812 static void
1813 get_router_load_balancer_ips(const struct ovn_datapath *od,
1814 struct sset *all_ips, int *addr_family)
1815 {
1816 if (!od->nbr) {
1817 return;
1818 }
1819
1820 for (int i = 0; i < od->nbr->n_load_balancer; i++) {
1821 struct nbrec_load_balancer *lb = od->nbr->load_balancer[i];
1822 struct smap *vips = &lb->vips;
1823 struct smap_node *node;
1824
1825 SMAP_FOR_EACH (node, vips) {
1826 /* node->key contains IP:port or just IP. */
1827 char *ip_address = NULL;
1828 uint16_t port;
1829
1830 ip_address_and_port_from_lb_key(node->key, &ip_address, &port,
1831 addr_family);
1832 if (!ip_address) {
1833 continue;
1834 }
1835
1836 if (!sset_contains(all_ips, ip_address)) {
1837 sset_add(all_ips, ip_address);
1838 }
1839
1840 free(ip_address);
1841 }
1842 }
1843 }
1844
1845 /* Returns an array of strings, each consisting of a MAC address followed
1846 * by one or more IP addresses, and if the port is a distributed gateway
1847 * port, followed by 'is_chassis_resident("LPORT_NAME")', where the
1848 * LPORT_NAME is the name of the L3 redirect port or the name of the
1849 * logical_port specified in a NAT rule. These strings include the
1850 * external IP addresses of all NAT rules defined on that router, and all
1851 * of the IP addresses used in load balancer VIPs defined on that router.
1852 *
1853 * The caller must free each of the n returned strings with free(),
1854 * and must free the returned array when it is no longer needed. */
1855 static char **
1856 get_nat_addresses(const struct ovn_port *op, size_t *n)
1857 {
1858 size_t n_nats = 0;
1859 struct eth_addr mac;
1860 if (!op->nbrp || !op->od || !op->od->nbr
1861 || (!op->od->nbr->n_nat && !op->od->nbr->n_load_balancer)
1862 || !eth_addr_from_string(op->nbrp->mac, &mac)) {
1863 *n = n_nats;
1864 return NULL;
1865 }
1866
1867 struct ds c_addresses = DS_EMPTY_INITIALIZER;
1868 ds_put_format(&c_addresses, ETH_ADDR_FMT, ETH_ADDR_ARGS(mac));
1869 bool central_ip_address = false;
1870
1871 char **addresses;
1872 addresses = xmalloc(sizeof *addresses * (op->od->nbr->n_nat + 1));
1873
1874 /* Get NAT IP addresses. */
1875 for (size_t i = 0; i < op->od->nbr->n_nat; i++) {
1876 const struct nbrec_nat *nat = op->od->nbr->nat[i];
1877 ovs_be32 ip, mask;
1878
1879 char *error = ip_parse_masked(nat->external_ip, &ip, &mask);
1880 if (error || mask != OVS_BE32_MAX) {
1881 free(error);
1882 continue;
1883 }
1884
1885 /* Determine whether this NAT rule satisfies the conditions for
1886 * distributed NAT processing. */
1887 if (op->od->l3redirect_port && !strcmp(nat->type, "dnat_and_snat")
1888 && nat->logical_port && nat->external_mac) {
1889 /* Distributed NAT rule. */
1890 if (eth_addr_from_string(nat->external_mac, &mac)) {
1891 struct ds address = DS_EMPTY_INITIALIZER;
1892 ds_put_format(&address, ETH_ADDR_FMT, ETH_ADDR_ARGS(mac));
1893 ds_put_format(&address, " %s", nat->external_ip);
1894 ds_put_format(&address, " is_chassis_resident(\"%s\")",
1895 nat->logical_port);
1896 addresses[n_nats++] = ds_steal_cstr(&address);
1897 }
1898 } else {
1899 /* Centralized NAT rule, either on gateway router or distributed
1900 * router. */
1901 ds_put_format(&c_addresses, " %s", nat->external_ip);
1902 central_ip_address = true;
1903 }
1904 }
1905
1906 /* A set to hold all load-balancer vips. */
1907 struct sset all_ips = SSET_INITIALIZER(&all_ips);
1908 int addr_family;
1909 get_router_load_balancer_ips(op->od, &all_ips, &addr_family);
1910
1911 const char *ip_address;
1912 SSET_FOR_EACH (ip_address, &all_ips) {
1913 ds_put_format(&c_addresses, " %s", ip_address);
1914 central_ip_address = true;
1915 }
1916 sset_destroy(&all_ips);
1917
1918 if (central_ip_address) {
1919 /* Gratuitous ARP for centralized NAT rules on distributed gateway
1920 * ports should be restricted to the "redirect-chassis". */
1921 if (op->od->l3redirect_port) {
1922 ds_put_format(&c_addresses, " is_chassis_resident(%s)",
1923 op->od->l3redirect_port->json_key);
1924 }
1925
1926 addresses[n_nats++] = ds_steal_cstr(&c_addresses);
1927 }
1928
1929 *n = n_nats;
1930
1931 return addresses;
1932 }
1933
1934 static bool
1935 gateway_chassis_equal(const struct nbrec_gateway_chassis *nb_gwc,
1936 const struct sbrec_chassis *nb_gwc_c,
1937 const struct sbrec_gateway_chassis *sb_gwc)
1938 {
1939 bool equal = !strcmp(nb_gwc->name, sb_gwc->name)
1940 && nb_gwc->priority == sb_gwc->priority
1941 && smap_equal(&nb_gwc->options, &sb_gwc->options)
1942 && smap_equal(&nb_gwc->external_ids, &sb_gwc->external_ids);
1943
1944 if (!equal) {
1945 return false;
1946 }
1947
1948 /* If everything else matched and we were unable to find the SBDB
1949 * Chassis entry at this time, assume a match and return true.
1950 * This happens when an ovn-controller is restarting and the Chassis
1951 * entry is gone away momentarily */
1952 return !nb_gwc_c
1953 || (sb_gwc->chassis && !strcmp(nb_gwc_c->name,
1954 sb_gwc->chassis->name));
1955 }
1956
1957 static bool
1958 sbpb_gw_chassis_needs_update(
1959 struct ovsdb_idl_index *sbrec_chassis_by_name,
1960 const struct sbrec_port_binding *port_binding,
1961 const struct nbrec_logical_router_port *lrp)
1962 {
1963 if (!lrp || !port_binding) {
1964 return false;
1965 }
1966
1967 /* These arrays are used to collect valid Gateway_Chassis and valid
1968 * Chassis records from the Logical_Router_Port Gateway_Chassis list,
1969 * we ignore the ones we can't match on the SBDB */
1970 struct nbrec_gateway_chassis **lrp_gwc = xzalloc(lrp->n_gateway_chassis *
1971 sizeof *lrp_gwc);
1972 const struct sbrec_chassis **lrp_gwc_c = xzalloc(lrp->n_gateway_chassis *
1973 sizeof *lrp_gwc_c);
1974
1975 /* Count the number of gateway chassis chassis names from the logical
1976 * router port that we are able to match on the southbound database */
1977 int lrp_n_gateway_chassis = 0;
1978 int n;
1979 for (n = 0; n < lrp->n_gateway_chassis; n++) {
1980
1981 if (!lrp->gateway_chassis[n]->chassis_name) {
1982 continue;
1983 }
1984
1985 const struct sbrec_chassis *chassis =
1986 chassis_lookup_by_name(sbrec_chassis_by_name,
1987 lrp->gateway_chassis[n]->chassis_name);
1988
1989 lrp_gwc_c[lrp_n_gateway_chassis] = chassis;
1990 lrp_gwc[lrp_n_gateway_chassis] = lrp->gateway_chassis[n];
1991 lrp_n_gateway_chassis++;
1992 if (!chassis) {
1993 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1994 VLOG_WARN_RL(
1995 &rl, "Chassis name %s referenced in NBDB via Gateway_Chassis "
1996 "on logical router port %s does not exist in SBDB",
1997 lrp->gateway_chassis[n]->chassis_name, lrp->name);
1998 }
1999 }
2000
2001 /* Basic check, different amount of Gateway_Chassis means that we
2002 * need to update southbound database Port_Binding */
2003 if (lrp_n_gateway_chassis != port_binding->n_gateway_chassis) {
2004 free(lrp_gwc_c);
2005 free(lrp_gwc);
2006 return true;
2007 }
2008
2009 for (n = 0; n < lrp_n_gateway_chassis; n++) {
2010 int i;
2011 /* For each of the valid gw chassis on the lrp, check if there's
2012 * a match on the Port_Binding list, we assume order is not
2013 * persisted */
2014 for (i = 0; i < port_binding->n_gateway_chassis; i++) {
2015 if (gateway_chassis_equal(lrp_gwc[n],
2016 lrp_gwc_c[n],
2017 port_binding->gateway_chassis[i])) {
2018 break; /* we found a match */
2019 }
2020 }
2021
2022 /* if no Port_Binding gateway chassis matched for the entry... */
2023 if (i == port_binding->n_gateway_chassis) {
2024 free(lrp_gwc_c);
2025 free(lrp_gwc);
2026 return true; /* found no match for this gateway chassis on lrp */
2027 }
2028 }
2029
2030 /* no need for update, all ports matched */
2031 free(lrp_gwc_c);
2032 free(lrp_gwc);
2033 return false;
2034 }
2035
2036 /* This functions translates the gw chassis on the nb database
2037 * to sb database entries, the only difference is that SB database
2038 * Gateway_Chassis table references the chassis directly instead
2039 * of using the name */
2040 static void
2041 copy_gw_chassis_from_nbrp_to_sbpb(
2042 struct northd_context *ctx,
2043 struct ovsdb_idl_index *sbrec_chassis_by_name,
2044 const struct nbrec_logical_router_port *lrp,
2045 const struct sbrec_port_binding *port_binding) {
2046
2047 if (!lrp || !port_binding || !lrp->n_gateway_chassis) {
2048 return;
2049 }
2050
2051 struct sbrec_gateway_chassis **gw_chassis = NULL;
2052 int n_gwc = 0;
2053 int n;
2054
2055 /* XXX: This can be improved. This code will generate a set of new
2056 * Gateway_Chassis and push them all in a single transaction, instead
2057 * this would be more optimal if we just add/update/remove the rows in
2058 * the southbound db that need to change. We don't expect lots of
2059 * changes to the Gateway_Chassis table, but if that proves to be wrong
2060 * we should optimize this. */
2061 for (n = 0; n < lrp->n_gateway_chassis; n++) {
2062 struct nbrec_gateway_chassis *lrp_gwc = lrp->gateway_chassis[n];
2063 if (!lrp_gwc->chassis_name) {
2064 continue;
2065 }
2066
2067 const struct sbrec_chassis *chassis =
2068 chassis_lookup_by_name(sbrec_chassis_by_name,
2069 lrp_gwc->chassis_name);
2070
2071 gw_chassis = xrealloc(gw_chassis, (n_gwc + 1) * sizeof *gw_chassis);
2072
2073 struct sbrec_gateway_chassis *pb_gwc =
2074 sbrec_gateway_chassis_insert(ctx->ovnsb_txn);
2075
2076 sbrec_gateway_chassis_set_name(pb_gwc, lrp_gwc->name);
2077 sbrec_gateway_chassis_set_priority(pb_gwc, lrp_gwc->priority);
2078 sbrec_gateway_chassis_set_chassis(pb_gwc, chassis);
2079 sbrec_gateway_chassis_set_options(pb_gwc, &lrp_gwc->options);
2080 sbrec_gateway_chassis_set_external_ids(pb_gwc, &lrp_gwc->external_ids);
2081
2082 gw_chassis[n_gwc++] = pb_gwc;
2083 }
2084 sbrec_port_binding_set_gateway_chassis(port_binding, gw_chassis, n_gwc);
2085 free(gw_chassis);
2086 }
2087
2088 static void
2089 ovn_port_update_sbrec(struct northd_context *ctx,
2090 struct ovsdb_idl_index *sbrec_chassis_by_name,
2091 const struct ovn_port *op,
2092 struct hmap *chassis_qdisc_queues)
2093 {
2094 sbrec_port_binding_set_datapath(op->sb, op->od->sb);
2095 if (op->nbrp) {
2096 /* If the router is for l3 gateway, it resides on a chassis
2097 * and its port type is "l3gateway". */
2098 const char *chassis_name = smap_get(&op->od->nbr->options, "chassis");
2099 if (op->derived) {
2100 sbrec_port_binding_set_type(op->sb, "chassisredirect");
2101 } else if (chassis_name) {
2102 sbrec_port_binding_set_type(op->sb, "l3gateway");
2103 } else {
2104 sbrec_port_binding_set_type(op->sb, "patch");
2105 }
2106
2107 struct smap new;
2108 smap_init(&new);
2109 if (op->derived) {
2110 const char *redirect_chassis = smap_get(&op->nbrp->options,
2111 "redirect-chassis");
2112 if (op->nbrp->n_gateway_chassis && redirect_chassis) {
2113 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
2114 VLOG_WARN_RL(
2115 &rl, "logical router port %s has both options:"
2116 "redirect-chassis and gateway_chassis populated "
2117 "redirect-chassis will be ignored in favour of "
2118 "gateway chassis", op->nbrp->name);
2119 }
2120
2121 if (op->nbrp->n_gateway_chassis) {
2122 if (sbpb_gw_chassis_needs_update(sbrec_chassis_by_name,
2123 op->sb, op->nbrp)) {
2124 copy_gw_chassis_from_nbrp_to_sbpb(ctx,
2125 sbrec_chassis_by_name,
2126 op->nbrp, op->sb);
2127 }
2128
2129 } else if (redirect_chassis) {
2130 /* Handle ports that had redirect-chassis option attached
2131 * to them, and for backwards compatibility convert them
2132 * to a single Gateway_Chassis entry */
2133 const struct sbrec_chassis *chassis =
2134 chassis_lookup_by_name(sbrec_chassis_by_name,
2135 redirect_chassis);
2136 if (chassis) {
2137 /* If we found the chassis, and the gw chassis on record
2138 * differs from what we expect go ahead and update */
2139 if (op->sb->n_gateway_chassis != 1
2140 || !op->sb->gateway_chassis[0]->chassis
2141 || strcmp(op->sb->gateway_chassis[0]->chassis->name,
2142 chassis->name)
2143 || op->sb->gateway_chassis[0]->priority != 0) {
2144 /* Construct a single Gateway_Chassis entry on the
2145 * Port_Binding attached to the redirect_chassis
2146 * name */
2147 struct sbrec_gateway_chassis *gw_chassis =
2148 sbrec_gateway_chassis_insert(ctx->ovnsb_txn);
2149
2150 char *gwc_name = xasprintf("%s_%s", op->nbrp->name,
2151 chassis->name);
2152
2153 /* XXX: Again, here, we could just update an existing
2154 * Gateway_Chassis, instead of creating a new one
2155 * and replacing it */
2156 sbrec_gateway_chassis_set_name(gw_chassis, gwc_name);
2157 sbrec_gateway_chassis_set_priority(gw_chassis, 0);
2158 sbrec_gateway_chassis_set_chassis(gw_chassis, chassis);
2159 sbrec_gateway_chassis_set_external_ids(gw_chassis,
2160 &op->nbrp->external_ids);
2161 sbrec_port_binding_set_gateway_chassis(op->sb,
2162 &gw_chassis, 1);
2163 free(gwc_name);
2164 }
2165 } else {
2166 VLOG_WARN("chassis name '%s' from redirect from logical "
2167 " router port '%s' redirect-chassis not found",
2168 redirect_chassis, op->nbrp->name);
2169 if (op->sb->n_gateway_chassis) {
2170 sbrec_port_binding_set_gateway_chassis(op->sb, NULL,
2171 0);
2172 }
2173 }
2174 }
2175 smap_add(&new, "distributed-port", op->nbrp->name);
2176 } else {
2177 if (op->peer) {
2178 smap_add(&new, "peer", op->peer->key);
2179 }
2180 if (chassis_name) {
2181 smap_add(&new, "l3gateway-chassis", chassis_name);
2182 }
2183 }
2184 sbrec_port_binding_set_options(op->sb, &new);
2185 smap_destroy(&new);
2186
2187 sbrec_port_binding_set_parent_port(op->sb, NULL);
2188 sbrec_port_binding_set_tag(op->sb, NULL, 0);
2189
2190 struct ds s = DS_EMPTY_INITIALIZER;
2191 ds_put_cstr(&s, op->nbrp->mac);
2192 for (int i = 0; i < op->nbrp->n_networks; ++i) {
2193 ds_put_format(&s, " %s", op->nbrp->networks[i]);
2194 }
2195 const char *addresses = ds_cstr(&s);
2196 sbrec_port_binding_set_mac(op->sb, &addresses, 1);
2197 ds_destroy(&s);
2198
2199 struct smap ids = SMAP_INITIALIZER(&ids);
2200 sbrec_port_binding_set_external_ids(op->sb, &ids);
2201
2202 sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0);
2203 } else {
2204 if (strcmp(op->nbsp->type, "router")) {
2205 uint32_t queue_id = smap_get_int(
2206 &op->sb->options, "qdisc_queue_id", 0);
2207 bool has_qos = port_has_qos_params(&op->nbsp->options);
2208 struct smap options;
2209
2210 if (op->sb->chassis && has_qos && !queue_id) {
2211 queue_id = allocate_chassis_queueid(chassis_qdisc_queues,
2212 op->sb->chassis);
2213 } else if (!has_qos && queue_id) {
2214 free_chassis_queueid(chassis_qdisc_queues,
2215 op->sb->chassis,
2216 queue_id);
2217 queue_id = 0;
2218 }
2219
2220 smap_clone(&options, &op->nbsp->options);
2221 if (queue_id) {
2222 smap_add_format(&options,
2223 "qdisc_queue_id", "%d", queue_id);
2224 }
2225 sbrec_port_binding_set_options(op->sb, &options);
2226 smap_destroy(&options);
2227 if (ovn_is_known_nb_lsp_type(op->nbsp->type)) {
2228 sbrec_port_binding_set_type(op->sb, op->nbsp->type);
2229 } else {
2230 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
2231 VLOG_WARN_RL(
2232 &rl, "Unknown port type '%s' set on logical switch '%s'.",
2233 op->nbsp->type, op->nbsp->name);
2234 }
2235
2236 sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0);
2237 } else {
2238 const char *chassis = NULL;
2239 if (op->peer && op->peer->od && op->peer->od->nbr) {
2240 chassis = smap_get(&op->peer->od->nbr->options, "chassis");
2241 }
2242
2243 /* A switch port connected to a gateway router is also of
2244 * type "l3gateway". */
2245 if (chassis) {
2246 sbrec_port_binding_set_type(op->sb, "l3gateway");
2247 } else {
2248 sbrec_port_binding_set_type(op->sb, "patch");
2249 }
2250
2251 const char *router_port = smap_get(&op->nbsp->options,
2252 "router-port");
2253 if (router_port || chassis) {
2254 struct smap new;
2255 smap_init(&new);
2256 if (router_port) {
2257 smap_add(&new, "peer", router_port);
2258 }
2259 if (chassis) {
2260 smap_add(&new, "l3gateway-chassis", chassis);
2261 }
2262 sbrec_port_binding_set_options(op->sb, &new);
2263 smap_destroy(&new);
2264 } else {
2265 sbrec_port_binding_set_options(op->sb, NULL);
2266 }
2267
2268 const char *nat_addresses = smap_get(&op->nbsp->options,
2269 "nat-addresses");
2270 if (nat_addresses && !strcmp(nat_addresses, "router")) {
2271 if (op->peer && op->peer->od
2272 && (chassis || op->peer->od->l3redirect_port)) {
2273 size_t n_nats;
2274 char **nats = get_nat_addresses(op->peer, &n_nats);
2275 if (n_nats) {
2276 sbrec_port_binding_set_nat_addresses(op->sb,
2277 (const char **) nats, n_nats);
2278 for (size_t i = 0; i < n_nats; i++) {
2279 free(nats[i]);
2280 }
2281 free(nats);
2282 } else {
2283 sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0);
2284 }
2285 } else {
2286 sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0);
2287 }
2288 /* Only accept manual specification of ethernet address
2289 * followed by IPv4 addresses on type "l3gateway" ports. */
2290 } else if (nat_addresses && chassis) {
2291 struct lport_addresses laddrs;
2292 if (!extract_lsp_addresses(nat_addresses, &laddrs)) {
2293 static struct vlog_rate_limit rl =
2294 VLOG_RATE_LIMIT_INIT(1, 1);
2295 VLOG_WARN_RL(&rl, "Error extracting nat-addresses.");
2296 sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0);
2297 } else {
2298 sbrec_port_binding_set_nat_addresses(op->sb,
2299 &nat_addresses, 1);
2300 destroy_lport_addresses(&laddrs);
2301 }
2302 } else {
2303 sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0);
2304 }
2305 }
2306 sbrec_port_binding_set_parent_port(op->sb, op->nbsp->parent_name);
2307 sbrec_port_binding_set_tag(op->sb, op->nbsp->tag, op->nbsp->n_tag);
2308 sbrec_port_binding_set_mac(op->sb, (const char **) op->nbsp->addresses,
2309 op->nbsp->n_addresses);
2310
2311 struct smap ids = SMAP_INITIALIZER(&ids);
2312 smap_clone(&ids, &op->nbsp->external_ids);
2313 const char *name = smap_get(&ids, "neutron:port_name");
2314 if (name && name[0]) {
2315 smap_add(&ids, "name", name);
2316 }
2317 sbrec_port_binding_set_external_ids(op->sb, &ids);
2318 smap_destroy(&ids);
2319 }
2320 }
2321
2322 /* Remove mac_binding entries that refer to logical_ports which are
2323 * deleted. */
2324 static void
2325 cleanup_mac_bindings(struct northd_context *ctx, struct hmap *ports)
2326 {
2327 const struct sbrec_mac_binding *b, *n;
2328 SBREC_MAC_BINDING_FOR_EACH_SAFE (b, n, ctx->ovnsb_idl) {
2329 if (!ovn_port_find(ports, b->logical_port)) {
2330 sbrec_mac_binding_delete(b);
2331 }
2332 }
2333 }
2334
2335 /* Updates the southbound Port_Binding table so that it contains the logical
2336 * switch ports specified by the northbound database.
2337 *
2338 * Initializes 'ports' to contain a "struct ovn_port" for every logical port,
2339 * using the "struct ovn_datapath"s in 'datapaths' to look up logical
2340 * datapaths. */
2341 static void
2342 build_ports(struct northd_context *ctx,
2343 struct ovsdb_idl_index *sbrec_chassis_by_name,
2344 struct hmap *datapaths, struct hmap *ports)
2345 {
2346 struct ovs_list sb_only, nb_only, both;
2347 struct hmap tag_alloc_table = HMAP_INITIALIZER(&tag_alloc_table);
2348 struct hmap chassis_qdisc_queues = HMAP_INITIALIZER(&chassis_qdisc_queues);
2349
2350 join_logical_ports(ctx, datapaths, ports, &chassis_qdisc_queues,
2351 &tag_alloc_table, &sb_only, &nb_only, &both);
2352
2353 struct ovn_port *op, *next;
2354 /* For logical ports that are in both databases, update the southbound
2355 * record based on northbound data. Also index the in-use tunnel_keys.
2356 * For logical ports that are in NB database, do any tag allocation
2357 * needed. */
2358 LIST_FOR_EACH_SAFE (op, next, list, &both) {
2359 if (op->nbsp) {
2360 tag_alloc_create_new_tag(&tag_alloc_table, op->nbsp);
2361 }
2362 ovn_port_update_sbrec(ctx, sbrec_chassis_by_name,
2363 op, &chassis_qdisc_queues);
2364
2365 add_tnlid(&op->od->port_tnlids, op->sb->tunnel_key);
2366 if (op->sb->tunnel_key > op->od->port_key_hint) {
2367 op->od->port_key_hint = op->sb->tunnel_key;
2368 }
2369 }
2370
2371 /* Add southbound record for each unmatched northbound record. */
2372 LIST_FOR_EACH_SAFE (op, next, list, &nb_only) {
2373 uint16_t tunnel_key = ovn_port_allocate_key(op->od);
2374 if (!tunnel_key) {
2375 continue;
2376 }
2377
2378 op->sb = sbrec_port_binding_insert(ctx->ovnsb_txn);
2379 ovn_port_update_sbrec(ctx, sbrec_chassis_by_name, op,
2380 &chassis_qdisc_queues);
2381
2382 sbrec_port_binding_set_logical_port(op->sb, op->key);
2383 sbrec_port_binding_set_tunnel_key(op->sb, tunnel_key);
2384 }
2385
2386 bool remove_mac_bindings = false;
2387 if (!ovs_list_is_empty(&sb_only)) {
2388 remove_mac_bindings = true;
2389 }
2390
2391 /* Delete southbound records without northbound matches. */
2392 LIST_FOR_EACH_SAFE(op, next, list, &sb_only) {
2393 ovs_list_remove(&op->list);
2394 sbrec_port_binding_delete(op->sb);
2395 ovn_port_destroy(ports, op);
2396 }
2397 if (remove_mac_bindings) {
2398 cleanup_mac_bindings(ctx, ports);
2399 }
2400
2401 tag_alloc_destroy(&tag_alloc_table);
2402 destroy_chassis_queues(&chassis_qdisc_queues);
2403 }
2404 \f
2405 #define OVN_MIN_MULTICAST 32768
2406 #define OVN_MAX_MULTICAST 65535
2407
2408 struct multicast_group {
2409 const char *name;
2410 uint16_t key; /* OVN_MIN_MULTICAST...OVN_MAX_MULTICAST. */
2411 };
2412
2413 #define MC_FLOOD "_MC_flood"
2414 static const struct multicast_group mc_flood = { MC_FLOOD, 65535 };
2415
2416 #define MC_UNKNOWN "_MC_unknown"
2417 static const struct multicast_group mc_unknown = { MC_UNKNOWN, 65534 };
2418
2419 static bool
2420 multicast_group_equal(const struct multicast_group *a,
2421 const struct multicast_group *b)
2422 {
2423 return !strcmp(a->name, b->name) && a->key == b->key;
2424 }
2425
2426 /* Multicast group entry. */
2427 struct ovn_multicast {
2428 struct hmap_node hmap_node; /* Index on 'datapath' and 'key'. */
2429 struct ovn_datapath *datapath;
2430 const struct multicast_group *group;
2431
2432 struct ovn_port **ports;
2433 size_t n_ports, allocated_ports;
2434 };
2435
2436 static uint32_t
2437 ovn_multicast_hash(const struct ovn_datapath *datapath,
2438 const struct multicast_group *group)
2439 {
2440 return hash_pointer(datapath, group->key);
2441 }
2442
2443 static struct ovn_multicast *
2444 ovn_multicast_find(struct hmap *mcgroups, struct ovn_datapath *datapath,
2445 const struct multicast_group *group)
2446 {
2447 struct ovn_multicast *mc;
2448
2449 HMAP_FOR_EACH_WITH_HASH (mc, hmap_node,
2450 ovn_multicast_hash(datapath, group), mcgroups) {
2451 if (mc->datapath == datapath
2452 && multicast_group_equal(mc->group, group)) {
2453 return mc;
2454 }
2455 }
2456 return NULL;
2457 }
2458
2459 static void
2460 ovn_multicast_add(struct hmap *mcgroups, const struct multicast_group *group,
2461 struct ovn_port *port)
2462 {
2463 struct ovn_datapath *od = port->od;
2464 struct ovn_multicast *mc = ovn_multicast_find(mcgroups, od, group);
2465 if (!mc) {
2466 mc = xmalloc(sizeof *mc);
2467 hmap_insert(mcgroups, &mc->hmap_node, ovn_multicast_hash(od, group));
2468 mc->datapath = od;
2469 mc->group = group;
2470 mc->n_ports = 0;
2471 mc->allocated_ports = 4;
2472 mc->ports = xmalloc(mc->allocated_ports * sizeof *mc->ports);
2473 }
2474 if (mc->n_ports >= mc->allocated_ports) {
2475 mc->ports = x2nrealloc(mc->ports, &mc->allocated_ports,
2476 sizeof *mc->ports);
2477 }
2478 mc->ports[mc->n_ports++] = port;
2479 }
2480
2481 static void
2482 ovn_multicast_destroy(struct hmap *mcgroups, struct ovn_multicast *mc)
2483 {
2484 if (mc) {
2485 hmap_remove(mcgroups, &mc->hmap_node);
2486 free(mc->ports);
2487 free(mc);
2488 }
2489 }
2490
2491 static void
2492 ovn_multicast_update_sbrec(const struct ovn_multicast *mc,
2493 const struct sbrec_multicast_group *sb)
2494 {
2495 struct sbrec_port_binding **ports = xmalloc(mc->n_ports * sizeof *ports);
2496 for (size_t i = 0; i < mc->n_ports; i++) {
2497 ports[i] = CONST_CAST(struct sbrec_port_binding *, mc->ports[i]->sb);
2498 }
2499 sbrec_multicast_group_set_ports(sb, ports, mc->n_ports);
2500 free(ports);
2501 }
2502 \f
2503 /* Logical flow generation.
2504 *
2505 * This code generates the Logical_Flow table in the southbound database, as a
2506 * function of most of the northbound database.
2507 */
2508
2509 struct ovn_lflow {
2510 struct hmap_node hmap_node;
2511
2512 struct ovn_datapath *od;
2513 enum ovn_stage stage;
2514 uint16_t priority;
2515 char *match;
2516 char *actions;
2517 char *stage_hint;
2518 const char *where;
2519 };
2520
2521 static size_t
2522 ovn_lflow_hash(const struct ovn_lflow *lflow)
2523 {
2524 return ovn_logical_flow_hash(&lflow->od->sb->header_.uuid,
2525 ovn_stage_get_table(lflow->stage),
2526 ovn_stage_get_pipeline_name(lflow->stage),
2527 lflow->priority, lflow->match,
2528 lflow->actions);
2529 }
2530
2531 static bool
2532 ovn_lflow_equal(const struct ovn_lflow *a, const struct ovn_lflow *b)
2533 {
2534 return (a->od == b->od
2535 && a->stage == b->stage
2536 && a->priority == b->priority
2537 && !strcmp(a->match, b->match)
2538 && !strcmp(a->actions, b->actions));
2539 }
2540
2541 static void
2542 ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
2543 enum ovn_stage stage, uint16_t priority,
2544 char *match, char *actions, char *stage_hint,
2545 const char *where)
2546 {
2547 lflow->od = od;
2548 lflow->stage = stage;
2549 lflow->priority = priority;
2550 lflow->match = match;
2551 lflow->actions = actions;
2552 lflow->stage_hint = stage_hint;
2553 lflow->where = where;
2554 }
2555
2556 /* Adds a row with the specified contents to the Logical_Flow table. */
2557 static void
2558 ovn_lflow_add_at(struct hmap *lflow_map, struct ovn_datapath *od,
2559 enum ovn_stage stage, uint16_t priority,
2560 const char *match, const char *actions,
2561 const char *stage_hint, const char *where)
2562 {
2563 ovs_assert(ovn_stage_to_datapath_type(stage) == ovn_datapath_get_type(od));
2564
2565 struct ovn_lflow *lflow = xmalloc(sizeof *lflow);
2566 ovn_lflow_init(lflow, od, stage, priority,
2567 xstrdup(match), xstrdup(actions),
2568 nullable_xstrdup(stage_hint), where);
2569 hmap_insert(lflow_map, &lflow->hmap_node, ovn_lflow_hash(lflow));
2570 }
2571
2572 /* Adds a row with the specified contents to the Logical_Flow table. */
2573 #define ovn_lflow_add_with_hint(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, \
2574 ACTIONS, STAGE_HINT) \
2575 ovn_lflow_add_at(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, ACTIONS, \
2576 STAGE_HINT, OVS_SOURCE_LOCATOR)
2577
2578 #define ovn_lflow_add(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, ACTIONS) \
2579 ovn_lflow_add_with_hint(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, \
2580 ACTIONS, NULL)
2581
2582 static struct ovn_lflow *
2583 ovn_lflow_find(struct hmap *lflows, struct ovn_datapath *od,
2584 enum ovn_stage stage, uint16_t priority,
2585 const char *match, const char *actions, uint32_t hash)
2586 {
2587 struct ovn_lflow target;
2588 ovn_lflow_init(&target, od, stage, priority,
2589 CONST_CAST(char *, match), CONST_CAST(char *, actions),
2590 NULL, NULL);
2591
2592 struct ovn_lflow *lflow;
2593 HMAP_FOR_EACH_WITH_HASH (lflow, hmap_node, hash, lflows) {
2594 if (ovn_lflow_equal(lflow, &target)) {
2595 return lflow;
2596 }
2597 }
2598 return NULL;
2599 }
2600
2601 static void
2602 ovn_lflow_destroy(struct hmap *lflows, struct ovn_lflow *lflow)
2603 {
2604 if (lflow) {
2605 hmap_remove(lflows, &lflow->hmap_node);
2606 free(lflow->match);
2607 free(lflow->actions);
2608 free(lflow->stage_hint);
2609 free(lflow);
2610 }
2611 }
2612
2613 /* Appends port security constraints on L2 address field 'eth_addr_field'
2614 * (e.g. "eth.src" or "eth.dst") to 'match'. 'ps_addrs', with 'n_ps_addrs'
2615 * elements, is the collection of port_security constraints from an
2616 * OVN_NB Logical_Switch_Port row generated by extract_lsp_addresses(). */
2617 static void
2618 build_port_security_l2(const char *eth_addr_field,
2619 struct lport_addresses *ps_addrs,
2620 unsigned int n_ps_addrs,
2621 struct ds *match)
2622 {
2623 if (!n_ps_addrs) {
2624 return;
2625 }
2626
2627 ds_put_format(match, " && %s == {", eth_addr_field);
2628
2629 for (size_t i = 0; i < n_ps_addrs; i++) {
2630 ds_put_format(match, "%s ", ps_addrs[i].ea_s);
2631 }
2632 ds_chomp(match, ' ');
2633 ds_put_cstr(match, "}");
2634 }
2635
2636 static void
2637 build_port_security_ipv6_nd_flow(
2638 struct ds *match, struct eth_addr ea, struct ipv6_netaddr *ipv6_addrs,
2639 int n_ipv6_addrs)
2640 {
2641 ds_put_format(match, " && ip6 && nd && ((nd.sll == "ETH_ADDR_FMT" || "
2642 "nd.sll == "ETH_ADDR_FMT") || ((nd.tll == "ETH_ADDR_FMT" || "
2643 "nd.tll == "ETH_ADDR_FMT")", ETH_ADDR_ARGS(eth_addr_zero),
2644 ETH_ADDR_ARGS(ea), ETH_ADDR_ARGS(eth_addr_zero),
2645 ETH_ADDR_ARGS(ea));
2646 if (!n_ipv6_addrs) {
2647 ds_put_cstr(match, "))");
2648 return;
2649 }
2650
2651 char ip6_str[INET6_ADDRSTRLEN + 1];
2652 struct in6_addr lla;
2653 in6_generate_lla(ea, &lla);
2654 memset(ip6_str, 0, sizeof(ip6_str));
2655 ipv6_string_mapped(ip6_str, &lla);
2656 ds_put_format(match, " && (nd.target == %s", ip6_str);
2657
2658 for(int i = 0; i < n_ipv6_addrs; i++) {
2659 memset(ip6_str, 0, sizeof(ip6_str));
2660 ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr);
2661 ds_put_format(match, " || nd.target == %s", ip6_str);
2662 }
2663
2664 ds_put_format(match, ")))");
2665 }
2666
2667 static void
2668 build_port_security_ipv6_flow(
2669 enum ovn_pipeline pipeline, struct ds *match, struct eth_addr ea,
2670 struct ipv6_netaddr *ipv6_addrs, int n_ipv6_addrs)
2671 {
2672 char ip6_str[INET6_ADDRSTRLEN + 1];
2673
2674 ds_put_format(match, " && %s == {",
2675 pipeline == P_IN ? "ip6.src" : "ip6.dst");
2676
2677 /* Allow link-local address. */
2678 struct in6_addr lla;
2679 in6_generate_lla(ea, &lla);
2680 ipv6_string_mapped(ip6_str, &lla);
2681 ds_put_format(match, "%s, ", ip6_str);
2682
2683 /* Allow ip6.dst=ff00::/8 for multicast packets */
2684 if (pipeline == P_OUT) {
2685 ds_put_cstr(match, "ff00::/8, ");
2686 }
2687 for(int i = 0; i < n_ipv6_addrs; i++) {
2688 ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr);
2689 ds_put_format(match, "%s, ", ip6_str);
2690 }
2691 /* Replace ", " by "}". */
2692 ds_chomp(match, ' ');
2693 ds_chomp(match, ',');
2694 ds_put_cstr(match, "}");
2695 }
2696
2697 /**
2698 * Build port security constraints on ARP and IPv6 ND fields
2699 * and add logical flows to S_SWITCH_IN_PORT_SEC_ND stage.
2700 *
2701 * For each port security of the logical port, following
2702 * logical flows are added
2703 * - If the port security has no IP (both IPv4 and IPv6) or
2704 * if it has IPv4 address(es)
2705 * - Priority 90 flow to allow ARP packets for known MAC addresses
2706 * in the eth.src and arp.spa fields. If the port security
2707 * has IPv4 addresses, allow known IPv4 addresses in the arp.tpa field.
2708 *
2709 * - If the port security has no IP (both IPv4 and IPv6) or
2710 * if it has IPv6 address(es)
2711 * - Priority 90 flow to allow IPv6 ND packets for known MAC addresses
2712 * in the eth.src and nd.sll/nd.tll fields. If the port security
2713 * has IPv6 addresses, allow known IPv6 addresses in the nd.target field
2714 * for IPv6 Neighbor Advertisement packet.
2715 *
2716 * - Priority 80 flow to drop ARP and IPv6 ND packets.
2717 */
2718 static void
2719 build_port_security_nd(struct ovn_port *op, struct hmap *lflows)
2720 {
2721 struct ds match = DS_EMPTY_INITIALIZER;
2722
2723 for (size_t i = 0; i < op->n_ps_addrs; i++) {
2724 struct lport_addresses *ps = &op->ps_addrs[i];
2725
2726 bool no_ip = !(ps->n_ipv4_addrs || ps->n_ipv6_addrs);
2727
2728 ds_clear(&match);
2729 if (ps->n_ipv4_addrs || no_ip) {
2730 ds_put_format(&match,
2731 "inport == %s && eth.src == %s && arp.sha == %s",
2732 op->json_key, ps->ea_s, ps->ea_s);
2733
2734 if (ps->n_ipv4_addrs) {
2735 ds_put_cstr(&match, " && arp.spa == {");
2736 for (size_t j = 0; j < ps->n_ipv4_addrs; j++) {
2737 /* When the netmask is applied, if the host portion is
2738 * non-zero, the host can only use the specified
2739 * address in the arp.spa. If zero, the host is allowed
2740 * to use any address in the subnet. */
2741 if (ps->ipv4_addrs[j].plen == 32
2742 || ps->ipv4_addrs[j].addr & ~ps->ipv4_addrs[j].mask) {
2743 ds_put_cstr(&match, ps->ipv4_addrs[j].addr_s);
2744 } else {
2745 ds_put_format(&match, "%s/%d",
2746 ps->ipv4_addrs[j].network_s,
2747 ps->ipv4_addrs[j].plen);
2748 }
2749 ds_put_cstr(&match, ", ");
2750 }
2751 ds_chomp(&match, ' ');
2752 ds_chomp(&match, ',');
2753 ds_put_cstr(&match, "}");
2754 }
2755 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90,
2756 ds_cstr(&match), "next;");
2757 }
2758
2759 if (ps->n_ipv6_addrs || no_ip) {
2760 ds_clear(&match);
2761 ds_put_format(&match, "inport == %s && eth.src == %s",
2762 op->json_key, ps->ea_s);
2763 build_port_security_ipv6_nd_flow(&match, ps->ea, ps->ipv6_addrs,
2764 ps->n_ipv6_addrs);
2765 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90,
2766 ds_cstr(&match), "next;");
2767 }
2768 }
2769
2770 ds_clear(&match);
2771 ds_put_format(&match, "inport == %s && (arp || nd)", op->json_key);
2772 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 80,
2773 ds_cstr(&match), "drop;");
2774 ds_destroy(&match);
2775 }
2776
2777 /**
2778 * Build port security constraints on IPv4 and IPv6 src and dst fields
2779 * and add logical flows to S_SWITCH_(IN/OUT)_PORT_SEC_IP stage.
2780 *
2781 * For each port security of the logical port, following
2782 * logical flows are added
2783 * - If the port security has IPv4 addresses,
2784 * - Priority 90 flow to allow IPv4 packets for known IPv4 addresses
2785 *
2786 * - If the port security has IPv6 addresses,
2787 * - Priority 90 flow to allow IPv6 packets for known IPv6 addresses
2788 *
2789 * - If the port security has IPv4 addresses or IPv6 addresses or both
2790 * - Priority 80 flow to drop all IPv4 and IPv6 traffic
2791 */
2792 static void
2793 build_port_security_ip(enum ovn_pipeline pipeline, struct ovn_port *op,
2794 struct hmap *lflows)
2795 {
2796 char *port_direction;
2797 enum ovn_stage stage;
2798 if (pipeline == P_IN) {
2799 port_direction = "inport";
2800 stage = S_SWITCH_IN_PORT_SEC_IP;
2801 } else {
2802 port_direction = "outport";
2803 stage = S_SWITCH_OUT_PORT_SEC_IP;
2804 }
2805
2806 for (size_t i = 0; i < op->n_ps_addrs; i++) {
2807 struct lport_addresses *ps = &op->ps_addrs[i];
2808
2809 if (!(ps->n_ipv4_addrs || ps->n_ipv6_addrs)) {
2810 continue;
2811 }
2812
2813 if (ps->n_ipv4_addrs) {
2814 struct ds match = DS_EMPTY_INITIALIZER;
2815 if (pipeline == P_IN) {
2816 /* Permit use of the unspecified address for DHCP discovery */
2817 struct ds dhcp_match = DS_EMPTY_INITIALIZER;
2818 ds_put_format(&dhcp_match, "inport == %s"
2819 " && eth.src == %s"
2820 " && ip4.src == 0.0.0.0"
2821 " && ip4.dst == 255.255.255.255"
2822 " && udp.src == 68 && udp.dst == 67",
2823 op->json_key, ps->ea_s);
2824 ovn_lflow_add(lflows, op->od, stage, 90,
2825 ds_cstr(&dhcp_match), "next;");
2826 ds_destroy(&dhcp_match);
2827 ds_put_format(&match, "inport == %s && eth.src == %s"
2828 " && ip4.src == {", op->json_key,
2829 ps->ea_s);
2830 } else {
2831 ds_put_format(&match, "outport == %s && eth.dst == %s"
2832 " && ip4.dst == {255.255.255.255, 224.0.0.0/4, ",
2833 op->json_key, ps->ea_s);
2834 }
2835
2836 for (int j = 0; j < ps->n_ipv4_addrs; j++) {
2837 ovs_be32 mask = ps->ipv4_addrs[j].mask;
2838 /* When the netmask is applied, if the host portion is
2839 * non-zero, the host can only use the specified
2840 * address. If zero, the host is allowed to use any
2841 * address in the subnet.
2842 */
2843 if (ps->ipv4_addrs[j].plen == 32
2844 || ps->ipv4_addrs[j].addr & ~mask) {
2845 ds_put_format(&match, "%s", ps->ipv4_addrs[j].addr_s);
2846 if (pipeline == P_OUT && ps->ipv4_addrs[j].plen != 32) {
2847 /* Host is also allowed to receive packets to the
2848 * broadcast address in the specified subnet. */
2849 ds_put_format(&match, ", %s",
2850 ps->ipv4_addrs[j].bcast_s);
2851 }
2852 } else {
2853 /* host portion is zero */
2854 ds_put_format(&match, "%s/%d", ps->ipv4_addrs[j].network_s,
2855 ps->ipv4_addrs[j].plen);
2856 }
2857 ds_put_cstr(&match, ", ");
2858 }
2859
2860 /* Replace ", " by "}". */
2861 ds_chomp(&match, ' ');
2862 ds_chomp(&match, ',');
2863 ds_put_cstr(&match, "}");
2864 ovn_lflow_add(lflows, op->od, stage, 90, ds_cstr(&match), "next;");
2865 ds_destroy(&match);
2866 }
2867
2868 if (ps->n_ipv6_addrs) {
2869 struct ds match = DS_EMPTY_INITIALIZER;
2870 if (pipeline == P_IN) {
2871 /* Permit use of unspecified address for duplicate address
2872 * detection */
2873 struct ds dad_match = DS_EMPTY_INITIALIZER;
2874 ds_put_format(&dad_match, "inport == %s"
2875 " && eth.src == %s"
2876 " && ip6.src == ::"
2877 " && ip6.dst == ff02::/16"
2878 " && icmp6.type == {131, 135, 143}", op->json_key,
2879 ps->ea_s);
2880 ovn_lflow_add(lflows, op->od, stage, 90,
2881 ds_cstr(&dad_match), "next;");
2882 ds_destroy(&dad_match);
2883 }
2884 ds_put_format(&match, "%s == %s && %s == %s",
2885 port_direction, op->json_key,
2886 pipeline == P_IN ? "eth.src" : "eth.dst", ps->ea_s);
2887 build_port_security_ipv6_flow(pipeline, &match, ps->ea,
2888 ps->ipv6_addrs, ps->n_ipv6_addrs);
2889 ovn_lflow_add(lflows, op->od, stage, 90,
2890 ds_cstr(&match), "next;");
2891 ds_destroy(&match);
2892 }
2893
2894 char *match = xasprintf("%s == %s && %s == %s && ip",
2895 port_direction, op->json_key,
2896 pipeline == P_IN ? "eth.src" : "eth.dst",
2897 ps->ea_s);
2898 ovn_lflow_add(lflows, op->od, stage, 80, match, "drop;");
2899 free(match);
2900 }
2901
2902 }
2903
2904 static bool
2905 lsp_is_enabled(const struct nbrec_logical_switch_port *lsp)
2906 {
2907 return !lsp->enabled || *lsp->enabled;
2908 }
2909
2910 static bool
2911 lsp_is_up(const struct nbrec_logical_switch_port *lsp)
2912 {
2913 return !lsp->up || *lsp->up;
2914 }
2915
2916 static bool
2917 build_dhcpv4_action(struct ovn_port *op, ovs_be32 offer_ip,
2918 struct ds *options_action, struct ds *response_action,
2919 struct ds *ipv4_addr_match)
2920 {
2921 if (!op->nbsp->dhcpv4_options) {
2922 /* CMS has disabled native DHCPv4 for this lport. */
2923 return false;
2924 }
2925
2926 ovs_be32 host_ip, mask;
2927 char *error = ip_parse_masked(op->nbsp->dhcpv4_options->cidr, &host_ip,
2928 &mask);
2929 if (error || ((offer_ip ^ host_ip) & mask)) {
2930 /* Either
2931 * - cidr defined is invalid or
2932 * - the offer ip of the logical port doesn't belong to the cidr
2933 * defined in the DHCPv4 options.
2934 * */
2935 free(error);
2936 return false;
2937 }
2938
2939 const char *server_ip = smap_get(
2940 &op->nbsp->dhcpv4_options->options, "server_id");
2941 const char *server_mac = smap_get(
2942 &op->nbsp->dhcpv4_options->options, "server_mac");
2943 const char *lease_time = smap_get(
2944 &op->nbsp->dhcpv4_options->options, "lease_time");
2945
2946 if (!(server_ip && server_mac && lease_time)) {
2947 /* "server_id", "server_mac" and "lease_time" should be
2948 * present in the dhcp_options. */
2949 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
2950 VLOG_WARN_RL(&rl, "Required DHCPv4 options not defined for lport - %s",
2951 op->json_key);
2952 return false;
2953 }
2954
2955 struct smap dhcpv4_options = SMAP_INITIALIZER(&dhcpv4_options);
2956 smap_clone(&dhcpv4_options, &op->nbsp->dhcpv4_options->options);
2957
2958 /* server_mac is not DHCPv4 option, delete it from the smap. */
2959 smap_remove(&dhcpv4_options, "server_mac");
2960 char *netmask = xasprintf(IP_FMT, IP_ARGS(mask));
2961 smap_add(&dhcpv4_options, "netmask", netmask);
2962 free(netmask);
2963
2964 ds_put_format(options_action,
2965 REGBIT_DHCP_OPTS_RESULT" = put_dhcp_opts(offerip = "
2966 IP_FMT", ", IP_ARGS(offer_ip));
2967
2968 /* We're not using SMAP_FOR_EACH because we want a consistent order of the
2969 * options on different architectures (big or little endian, SSE4.2) */
2970 const struct smap_node **sorted_opts = smap_sort(&dhcpv4_options);
2971 for (size_t i = 0; i < smap_count(&dhcpv4_options); i++) {
2972 const struct smap_node *node = sorted_opts[i];
2973 ds_put_format(options_action, "%s = %s, ", node->key, node->value);
2974 }
2975 free(sorted_opts);
2976
2977 ds_chomp(options_action, ' ');
2978 ds_chomp(options_action, ',');
2979 ds_put_cstr(options_action, "); next;");
2980
2981 ds_put_format(response_action, "eth.dst = eth.src; eth.src = %s; "
2982 "ip4.dst = "IP_FMT"; ip4.src = %s; udp.src = 67; "
2983 "udp.dst = 68; outport = inport; flags.loopback = 1; "
2984 "output;",
2985 server_mac, IP_ARGS(offer_ip), server_ip);
2986
2987 ds_put_format(ipv4_addr_match,
2988 "ip4.src == "IP_FMT" && ip4.dst == {%s, 255.255.255.255}",
2989 IP_ARGS(offer_ip), server_ip);
2990 smap_destroy(&dhcpv4_options);
2991 return true;
2992 }
2993
2994 static bool
2995 build_dhcpv6_action(struct ovn_port *op, struct in6_addr *offer_ip,
2996 struct ds *options_action, struct ds *response_action)
2997 {
2998 if (!op->nbsp->dhcpv6_options) {
2999 /* CMS has disabled native DHCPv6 for this lport. */
3000 return false;
3001 }
3002
3003 struct in6_addr host_ip, mask;
3004
3005 char *error = ipv6_parse_masked(op->nbsp->dhcpv6_options->cidr, &host_ip,
3006 &mask);
3007 if (error) {
3008 free(error);
3009 return false;
3010 }
3011 struct in6_addr ip6_mask = ipv6_addr_bitxor(offer_ip, &host_ip);
3012 ip6_mask = ipv6_addr_bitand(&ip6_mask, &mask);
3013 if (!ipv6_mask_is_any(&ip6_mask)) {
3014 /* offer_ip doesn't belongs to the cidr defined in lport's DHCPv6
3015 * options.*/
3016 return false;
3017 }
3018
3019 const struct smap *options_map = &op->nbsp->dhcpv6_options->options;
3020 /* "server_id" should be the MAC address. */
3021 const char *server_mac = smap_get(options_map, "server_id");
3022 struct eth_addr ea;
3023 if (!server_mac || !eth_addr_from_string(server_mac, &ea)) {
3024 /* "server_id" should be present in the dhcpv6_options. */
3025 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3026 VLOG_WARN_RL(&rl, "server_id not present in the DHCPv6 options"
3027 " for lport %s", op->json_key);
3028 return false;
3029 }
3030
3031 /* Get the link local IP of the DHCPv6 server from the server MAC. */
3032 struct in6_addr lla;
3033 in6_generate_lla(ea, &lla);
3034
3035 char server_ip[INET6_ADDRSTRLEN + 1];
3036 ipv6_string_mapped(server_ip, &lla);
3037
3038 char ia_addr[INET6_ADDRSTRLEN + 1];
3039 ipv6_string_mapped(ia_addr, offer_ip);
3040
3041 ds_put_format(options_action,
3042 REGBIT_DHCP_OPTS_RESULT" = put_dhcpv6_opts(");
3043
3044 /* Check whether the dhcpv6 options should be configured as stateful.
3045 * Only reply with ia_addr option for dhcpv6 stateful address mode. */
3046 if (!smap_get_bool(options_map, "dhcpv6_stateless", false)) {
3047 ipv6_string_mapped(ia_addr, offer_ip);
3048 ds_put_format(options_action, "ia_addr = %s, ", ia_addr);
3049 }
3050
3051 /* We're not using SMAP_FOR_EACH because we want a consistent order of the
3052 * options on different architectures (big or little endian, SSE4.2) */
3053 const struct smap_node **sorted_opts = smap_sort(options_map);
3054 for (size_t i = 0; i < smap_count(options_map); i++) {
3055 const struct smap_node *node = sorted_opts[i];
3056 if (strcmp(node->key, "dhcpv6_stateless")) {
3057 ds_put_format(options_action, "%s = %s, ", node->key, node->value);
3058 }
3059 }
3060 free(sorted_opts);
3061
3062 ds_chomp(options_action, ' ');
3063 ds_chomp(options_action, ',');
3064 ds_put_cstr(options_action, "); next;");
3065
3066 ds_put_format(response_action, "eth.dst = eth.src; eth.src = %s; "
3067 "ip6.dst = ip6.src; ip6.src = %s; udp.src = 547; "
3068 "udp.dst = 546; outport = inport; flags.loopback = 1; "
3069 "output;",
3070 server_mac, server_ip);
3071
3072 return true;
3073 }
3074
3075 struct ovn_port_group_ls {
3076 struct hmap_node key_node; /* Index on 'key'. */
3077 struct uuid key; /* nb_ls->header_.uuid. */
3078 const struct nbrec_logical_switch *nb_ls;
3079 };
3080
3081 struct ovn_port_group {
3082 struct hmap_node key_node; /* Index on 'key'. */
3083 struct uuid key; /* nb_pg->header_.uuid. */
3084 const struct nbrec_port_group *nb_pg;
3085 struct hmap nb_lswitches; /* NB lswitches related to the port group */
3086 };
3087
3088 static void
3089 ovn_port_group_ls_add(struct ovn_port_group *pg,
3090 const struct nbrec_logical_switch *nb_ls)
3091 {
3092 struct ovn_port_group_ls *pg_ls = xzalloc(sizeof *pg_ls);
3093 pg_ls->key = nb_ls->header_.uuid;
3094 pg_ls->nb_ls = nb_ls;
3095 hmap_insert(&pg->nb_lswitches, &pg_ls->key_node, uuid_hash(&pg_ls->key));
3096 }
3097
3098 static struct ovn_port_group_ls *
3099 ovn_port_group_ls_find(struct ovn_port_group *pg, const struct uuid *ls_uuid)
3100 {
3101 struct ovn_port_group_ls *pg_ls;
3102
3103 HMAP_FOR_EACH_WITH_HASH (pg_ls, key_node, uuid_hash(ls_uuid),
3104 &pg->nb_lswitches) {
3105 if (uuid_equals(ls_uuid, &pg_ls->key)) {
3106 return pg_ls;
3107 }
3108 }
3109 return NULL;
3110 }
3111
3112 struct ovn_ls_port_group {
3113 struct hmap_node key_node; /* Index on 'key'. */
3114 struct uuid key; /* nb_pg->header_.uuid. */
3115 const struct nbrec_port_group *nb_pg;
3116 };
3117
3118 static void
3119 ovn_ls_port_group_add(struct hmap *nb_pgs,
3120 const struct nbrec_port_group *nb_pg)
3121 {
3122 struct ovn_ls_port_group *ls_pg = xzalloc(sizeof *ls_pg);
3123 ls_pg->key = nb_pg->header_.uuid;
3124 ls_pg->nb_pg = nb_pg;
3125 hmap_insert(nb_pgs, &ls_pg->key_node, uuid_hash(&ls_pg->key));
3126 }
3127
3128 static void
3129 ovn_ls_port_group_destroy(struct hmap *nb_pgs)
3130 {
3131 struct ovn_ls_port_group *ls_pg;
3132 HMAP_FOR_EACH_POP (ls_pg, key_node, nb_pgs) {
3133 free(ls_pg);
3134 }
3135 hmap_destroy(nb_pgs);
3136 }
3137
3138 static bool
3139 has_stateful_acl(struct ovn_datapath *od)
3140 {
3141 for (size_t i = 0; i < od->nbs->n_acls; i++) {
3142 struct nbrec_acl *acl = od->nbs->acls[i];
3143 if (!strcmp(acl->action, "allow-related")) {
3144 return true;
3145 }
3146 }
3147
3148 struct ovn_ls_port_group *ls_pg;
3149 HMAP_FOR_EACH (ls_pg, key_node, &od->nb_pgs) {
3150 for (size_t i = 0; i < ls_pg->nb_pg->n_acls; i++) {
3151 struct nbrec_acl *acl = ls_pg->nb_pg->acls[i];
3152 if (!strcmp(acl->action, "allow-related")) {
3153 return true;
3154 }
3155 }
3156 }
3157
3158 return false;
3159 }
3160
3161 static void
3162 build_pre_acls(struct ovn_datapath *od, struct hmap *lflows)
3163 {
3164 bool has_stateful = has_stateful_acl(od);
3165
3166 /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are
3167 * allowed by default. */
3168 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 0, "1", "next;");
3169 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 0, "1", "next;");
3170
3171 /* If there are any stateful ACL rules in this datapath, we must
3172 * send all IP packets through the conntrack action, which handles
3173 * defragmentation, in order to match L4 headers. */
3174 if (has_stateful) {
3175 for (size_t i = 0; i < od->n_router_ports; i++) {
3176 struct ovn_port *op = od->router_ports[i];
3177 /* Can't use ct() for router ports. Consider the
3178 * following configuration: lp1(10.0.0.2) on
3179 * hostA--ls1--lr0--ls2--lp2(10.0.1.2) on hostB, For a
3180 * ping from lp1 to lp2, First, the response will go
3181 * through ct() with a zone for lp2 in the ls2 ingress
3182 * pipeline on hostB. That ct zone knows about this
3183 * connection. Next, it goes through ct() with the zone
3184 * for the router port in the egress pipeline of ls2 on
3185 * hostB. This zone does not know about the connection,
3186 * as the icmp request went through the logical router
3187 * on hostA, not hostB. This would only work with
3188 * distributed conntrack state across all chassis. */
3189 struct ds match_in = DS_EMPTY_INITIALIZER;
3190 struct ds match_out = DS_EMPTY_INITIALIZER;
3191
3192 ds_put_format(&match_in, "ip && inport == %s", op->json_key);
3193 ds_put_format(&match_out, "ip && outport == %s", op->json_key);
3194 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110,
3195 ds_cstr(&match_in), "next;");
3196 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110,
3197 ds_cstr(&match_out), "next;");
3198
3199 ds_destroy(&match_in);
3200 ds_destroy(&match_out);
3201 }
3202 if (od->localnet_port) {
3203 struct ds match_in = DS_EMPTY_INITIALIZER;
3204 struct ds match_out = DS_EMPTY_INITIALIZER;
3205
3206 ds_put_format(&match_in, "ip && inport == %s",
3207 od->localnet_port->json_key);
3208 ds_put_format(&match_out, "ip && outport == %s",
3209 od->localnet_port->json_key);
3210 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110,
3211 ds_cstr(&match_in), "next;");
3212 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110,
3213 ds_cstr(&match_out), "next;");
3214
3215 ds_destroy(&match_in);
3216 ds_destroy(&match_out);
3217 }
3218
3219 /* Ingress and Egress Pre-ACL Table (Priority 110).
3220 *
3221 * Not to do conntrack on ND and ICMP destination
3222 * unreachable packets. */
3223 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110,
3224 "nd || nd_rs || nd_ra || icmp4.type == 3 || "
3225 "icmp6.type == 1 || (tcp && tcp.flags == 4)",
3226 "next;");
3227 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110,
3228 "nd || nd_rs || nd_ra || icmp4.type == 3 || "
3229 "icmp6.type == 1 || (tcp && tcp.flags == 4)",
3230 "next;");
3231
3232 /* Ingress and Egress Pre-ACL Table (Priority 100).
3233 *
3234 * Regardless of whether the ACL is "from-lport" or "to-lport",
3235 * we need rules in both the ingress and egress table, because
3236 * the return traffic needs to be followed.
3237 *
3238 * 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send
3239 * it to conntrack for tracking and defragmentation. */
3240 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 100, "ip",
3241 REGBIT_CONNTRACK_DEFRAG" = 1; next;");
3242 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip",
3243 REGBIT_CONNTRACK_DEFRAG" = 1; next;");
3244 }
3245 }
3246
3247 /* For a 'key' of the form "IP:port" or just "IP", sets 'port' and
3248 * 'ip_address'. The caller must free() the memory allocated for
3249 * 'ip_address'. */
3250 static void
3251 ip_address_and_port_from_lb_key(const char *key, char **ip_address,
3252 uint16_t *port, int *addr_family)
3253 {
3254 struct sockaddr_storage ss;
3255 if (!inet_parse_active(key, 0, &ss, false)) {
3256 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3257 VLOG_WARN_RL(&rl, "bad ip address or port for load balancer key %s",
3258 key);
3259 return;
3260 }
3261
3262 struct ds s = DS_EMPTY_INITIALIZER;
3263 ss_format_address_nobracks(&ss, &s);
3264 *ip_address = ds_steal_cstr(&s);
3265
3266 *port = ss_get_port(&ss);
3267
3268 *addr_family = ss.ss_family;
3269 }
3270
3271 /*
3272 * Returns true if logical switch is configured with DNS records, false
3273 * otherwise.
3274 */
3275 static bool
3276 ls_has_dns_records(const struct nbrec_logical_switch *nbs)
3277 {
3278 for (size_t i = 0; i < nbs->n_dns_records; i++) {
3279 if (!smap_is_empty(&nbs->dns_records[i]->records)) {
3280 return true;
3281 }
3282 }
3283
3284 return false;
3285 }
3286
3287 static void
3288 build_pre_lb(struct ovn_datapath *od, struct hmap *lflows)
3289 {
3290 /* Do not send ND packets to conntrack */
3291 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 110,
3292 "nd || nd_rs || nd_ra", "next;");
3293 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 110,
3294 "nd || nd_rs || nd_ra", "next;");
3295
3296 /* Allow all packets to go to next tables by default. */
3297 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 0, "1", "next;");
3298 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 0, "1", "next;");
3299
3300 struct sset all_ips = SSET_INITIALIZER(&all_ips);
3301 bool vip_configured = false;
3302 int addr_family = AF_INET;
3303 for (int i = 0; i < od->nbs->n_load_balancer; i++) {
3304 struct nbrec_load_balancer *lb = od->nbs->load_balancer[i];
3305 struct smap *vips = &lb->vips;
3306 struct smap_node *node;
3307
3308 SMAP_FOR_EACH (node, vips) {
3309 vip_configured = true;
3310
3311 /* node->key contains IP:port or just IP. */
3312 char *ip_address = NULL;
3313 uint16_t port;
3314 ip_address_and_port_from_lb_key(node->key, &ip_address, &port,
3315 &addr_family);
3316 if (!ip_address) {
3317 continue;
3318 }
3319
3320 if (!sset_contains(&all_ips, ip_address)) {
3321 sset_add(&all_ips, ip_address);
3322 }
3323
3324 free(ip_address);
3325
3326 /* Ignore L4 port information in the key because fragmented packets
3327 * may not have L4 information. The pre-stateful table will send
3328 * the packet through ct() action to de-fragment. In stateful
3329 * table, we will eventually look at L4 information. */
3330 }
3331 }
3332
3333 /* 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send
3334 * packet to conntrack for defragmentation. */
3335 const char *ip_address;
3336 SSET_FOR_EACH(ip_address, &all_ips) {
3337 char *match;
3338
3339 if (addr_family == AF_INET) {
3340 match = xasprintf("ip && ip4.dst == %s", ip_address);
3341 } else {
3342 match = xasprintf("ip && ip6.dst == %s", ip_address);
3343 }
3344 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB,
3345 100, match, REGBIT_CONNTRACK_DEFRAG" = 1; next;");
3346 free(match);
3347 }
3348
3349 sset_destroy(&all_ips);
3350
3351 if (vip_configured) {
3352 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB,
3353 100, "ip", REGBIT_CONNTRACK_DEFRAG" = 1; next;");
3354 }
3355 }
3356
3357 static void
3358 build_pre_stateful(struct ovn_datapath *od, struct hmap *lflows)
3359 {
3360 /* Ingress and Egress pre-stateful Table (Priority 0): Packets are
3361 * allowed by default. */
3362 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 0, "1", "next;");
3363 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 0, "1", "next;");
3364
3365 /* If REGBIT_CONNTRACK_DEFRAG is set as 1, then the packets should be
3366 * sent to conntrack for tracking and defragmentation. */
3367 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 100,
3368 REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;");
3369 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 100,
3370 REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;");
3371 }
3372
3373 static void
3374 build_acl_log(struct ds *actions, const struct nbrec_acl *acl)
3375 {
3376 if (!acl->log) {
3377 return;
3378 }
3379
3380 ds_put_cstr(actions, "log(");
3381
3382 if (acl->name) {
3383 ds_put_format(actions, "name=\"%s\", ", acl->name);
3384 }
3385
3386 /* If a severity level isn't specified, default to "info". */
3387 if (acl->severity) {
3388 ds_put_format(actions, "severity=%s, ", acl->severity);
3389 } else {
3390 ds_put_format(actions, "severity=info, ");
3391 }
3392
3393 if (!strcmp(acl->action, "drop")) {
3394 ds_put_cstr(actions, "verdict=drop, ");
3395 } else if (!strcmp(acl->action, "reject")) {
3396 ds_put_cstr(actions, "verdict=reject, ");
3397 } else if (!strcmp(acl->action, "allow")
3398 || !strcmp(acl->action, "allow-related")) {
3399 ds_put_cstr(actions, "verdict=allow, ");
3400 }
3401
3402 if (acl->meter) {
3403 ds_put_format(actions, "meter=\"%s\", ", acl->meter);
3404 }
3405
3406 ds_chomp(actions, ' ');
3407 ds_chomp(actions, ',');
3408 ds_put_cstr(actions, "); ");
3409 }
3410
3411 static void
3412 build_reject_acl_rules(struct ovn_datapath *od, struct hmap *lflows,
3413 enum ovn_stage stage, struct nbrec_acl *acl,
3414 struct ds *extra_match, struct ds *extra_actions)
3415 {
3416 struct ds match = DS_EMPTY_INITIALIZER;
3417 struct ds actions = DS_EMPTY_INITIALIZER;
3418 bool ingress = (stage == S_SWITCH_IN_ACL);
3419
3420 /* TCP */
3421 build_acl_log(&actions, acl);
3422 if (extra_match->length > 0) {
3423 ds_put_format(&match, "(%s) && ", extra_match->string);
3424 }
3425 ds_put_format(&match, "ip4 && tcp && (%s)", acl->match);
3426 ds_put_format(&actions, "reg0 = 0; "
3427 "eth.dst <-> eth.src; ip4.dst <-> ip4.src; "
3428 "tcp_reset { outport <-> inport; %s };",
3429 ingress ? "output;" : "next(pipeline=ingress,table=0);");
3430 ovn_lflow_add(lflows, od, stage, acl->priority + OVN_ACL_PRI_OFFSET + 10,
3431 ds_cstr(&match), ds_cstr(&actions));
3432 ds_clear(&match);
3433 ds_clear(&actions);
3434 build_acl_log(&actions, acl);
3435 if (extra_match->length > 0) {
3436 ds_put_format(&match, "(%s) && ", extra_match->string);
3437 }
3438 ds_put_format(&match, "ip6 && tcp && (%s)", acl->match);
3439 ds_put_format(&actions, "reg0 = 0; "
3440 "eth.dst <-> eth.src; ip6.dst <-> ip6.src; "
3441 "tcp_reset { outport <-> inport; %s };",
3442 ingress ? "output;" : "next(pipeline=ingress,table=0);");
3443 ovn_lflow_add(lflows, od, stage, acl->priority + OVN_ACL_PRI_OFFSET + 10,
3444 ds_cstr(&match), ds_cstr(&actions));
3445
3446 /* IP traffic */
3447 ds_clear(&match);
3448 ds_clear(&actions);
3449 build_acl_log(&actions, acl);
3450 if (extra_match->length > 0) {
3451 ds_put_format(&match, "(%s) && ", extra_match->string);
3452 }
3453 ds_put_format(&match, "ip4 && (%s)", acl->match);
3454 if (extra_actions->length > 0) {
3455 ds_put_format(&actions, "%s ", extra_actions->string);
3456 }
3457 ds_put_format(&actions, "reg0 = 0; "
3458 "eth.dst <-> eth.src; ip4.dst <-> ip4.src; "
3459 "icmp4 { outport <-> inport; %s };",
3460 ingress ? "output;" : "next(pipeline=ingress,table=0);");
3461 ovn_lflow_add(lflows, od, stage, acl->priority + OVN_ACL_PRI_OFFSET,
3462 ds_cstr(&match), ds_cstr(&actions));
3463 ds_clear(&match);
3464 ds_clear(&actions);
3465 build_acl_log(&actions, acl);
3466 if (extra_match->length > 0) {
3467 ds_put_format(&match, "(%s) && ", extra_match->string);
3468 }
3469 ds_put_format(&match, "ip6 && (%s)", acl->match);
3470 if (extra_actions->length > 0) {
3471 ds_put_format(&actions, "%s ", extra_actions->string);
3472 }
3473 ds_put_format(&actions, "reg0 = 0; icmp6 { "
3474 "eth.dst <-> eth.src; ip6.dst <-> ip6.src; "
3475 "outport <-> inport; %s };",
3476 ingress ? "output;" : "next(pipeline=ingress,table=0);");
3477 ovn_lflow_add(lflows, od, stage, acl->priority + OVN_ACL_PRI_OFFSET,
3478 ds_cstr(&match), ds_cstr(&actions));
3479
3480 ds_destroy(&match);
3481 ds_destroy(&actions);
3482 }
3483
3484 static void
3485 consider_acl(struct hmap *lflows, struct ovn_datapath *od,
3486 struct nbrec_acl *acl, bool has_stateful)
3487 {
3488 bool ingress = !strcmp(acl->direction, "from-lport") ? true :false;
3489 enum ovn_stage stage = ingress ? S_SWITCH_IN_ACL : S_SWITCH_OUT_ACL;
3490
3491 char *stage_hint = xasprintf("%08x", acl->header_.uuid.parts[0]);
3492 if (!strcmp(acl->action, "allow")
3493 || !strcmp(acl->action, "allow-related")) {
3494 /* If there are any stateful flows, we must even commit "allow"
3495 * actions. This is because, while the initiater's
3496 * direction may not have any stateful rules, the server's
3497 * may and then its return traffic would not have an
3498 * associated conntrack entry and would return "+invalid". */
3499 if (!has_stateful) {
3500 struct ds actions = DS_EMPTY_INITIALIZER;
3501 build_acl_log(&actions, acl);
3502 ds_put_cstr(&actions, "next;");
3503 ovn_lflow_add_with_hint(lflows, od, stage,
3504 acl->priority + OVN_ACL_PRI_OFFSET,
3505 acl->match, ds_cstr(&actions),
3506 stage_hint);
3507 ds_destroy(&actions);
3508 } else {
3509 struct ds match = DS_EMPTY_INITIALIZER;
3510 struct ds actions = DS_EMPTY_INITIALIZER;
3511
3512 /* Commit the connection tracking entry if it's a new
3513 * connection that matches this ACL. After this commit,
3514 * the reply traffic is allowed by a flow we create at
3515 * priority 65535, defined earlier.
3516 *
3517 * It's also possible that a known connection was marked for
3518 * deletion after a policy was deleted, but the policy was
3519 * re-added while that connection is still known. We catch
3520 * that case here and un-set ct_label.blocked (which will be done
3521 * by ct_commit in the "stateful" stage) to indicate that the
3522 * connection should be allowed to resume.
3523 */
3524 ds_put_format(&match, "((ct.new && !ct.est)"
3525 " || (!ct.new && ct.est && !ct.rpl "
3526 "&& ct_label.blocked == 1)) "
3527 "&& (%s)", acl->match);
3528 ds_put_cstr(&actions, REGBIT_CONNTRACK_COMMIT" = 1; ");
3529 build_acl_log(&actions, acl);
3530 ds_put_cstr(&actions, "next;");
3531 ovn_lflow_add_with_hint(lflows, od, stage,
3532 acl->priority + OVN_ACL_PRI_OFFSET,
3533 ds_cstr(&match),
3534 ds_cstr(&actions),
3535 stage_hint);
3536
3537 /* Match on traffic in the request direction for an established
3538 * connection tracking entry that has not been marked for
3539 * deletion. There is no need to commit here, so we can just
3540 * proceed to the next table. We use this to ensure that this
3541 * connection is still allowed by the currently defined
3542 * policy. */
3543 ds_clear(&match);
3544 ds_clear(&actions);
3545 ds_put_format(&match,
3546 "!ct.new && ct.est && !ct.rpl"
3547 " && ct_label.blocked == 0 && (%s)",
3548 acl->match);
3549
3550 build_acl_log(&actions, acl);
3551 ds_put_cstr(&actions, "next;");
3552 ovn_lflow_add_with_hint(lflows, od, stage,
3553 acl->priority + OVN_ACL_PRI_OFFSET,
3554 ds_cstr(&match), ds_cstr(&actions),
3555 stage_hint);
3556
3557 ds_destroy(&match);
3558 ds_destroy(&actions);
3559 }
3560 } else if (!strcmp(acl->action, "drop")
3561 || !strcmp(acl->action, "reject")) {
3562 struct ds match = DS_EMPTY_INITIALIZER;
3563 struct ds actions = DS_EMPTY_INITIALIZER;
3564
3565 /* The implementation of "drop" differs if stateful ACLs are in
3566 * use for this datapath. In that case, the actions differ
3567 * depending on whether the connection was previously committed
3568 * to the connection tracker with ct_commit. */
3569 if (has_stateful) {
3570 /* If the packet is not part of an established connection, then
3571 * we can simply reject/drop it. */
3572 ds_put_cstr(&match,
3573 "(!ct.est || (ct.est && ct_label.blocked == 1))");
3574 if (!strcmp(acl->action, "reject")) {
3575 build_reject_acl_rules(od, lflows, stage, acl, &match,
3576 &actions);
3577 } else {
3578 ds_put_format(&match, " && (%s)", acl->match);
3579 build_acl_log(&actions, acl);
3580 ds_put_cstr(&actions, "/* drop */");
3581 ovn_lflow_add(lflows, od, stage,
3582 acl->priority + OVN_ACL_PRI_OFFSET,
3583 ds_cstr(&match), ds_cstr(&actions));
3584 }
3585 /* For an existing connection without ct_label set, we've
3586 * encountered a policy change. ACLs previously allowed
3587 * this connection and we committed the connection tracking
3588 * entry. Current policy says that we should drop this
3589 * connection. First, we set bit 0 of ct_label to indicate
3590 * that this connection is set for deletion. By not
3591 * specifying "next;", we implicitly drop the packet after
3592 * updating conntrack state. We would normally defer
3593 * ct_commit() to the "stateful" stage, but since we're
3594 * rejecting/dropping the packet, we go ahead and do it here.
3595 */
3596 ds_clear(&match);
3597 ds_clear(&actions);
3598 ds_put_cstr(&match, "ct.est && ct_label.blocked == 0");
3599 ds_put_cstr(&actions, "ct_commit(ct_label=1/1); ");
3600 if (!strcmp(acl->action, "reject")) {
3601 build_reject_acl_rules(od, lflows, stage, acl, &match,
3602 &actions);
3603 } else {
3604 ds_put_format(&match, " && (%s)", acl->match);
3605 build_acl_log(&actions, acl);
3606 ds_put_cstr(&actions, "/* drop */");
3607 ovn_lflow_add(lflows, od, stage,
3608 acl->priority + OVN_ACL_PRI_OFFSET,
3609 ds_cstr(&match), ds_cstr(&actions));
3610 }
3611 } else {
3612 /* There are no stateful ACLs in use on this datapath,
3613 * so a "reject/drop" ACL is simply the "reject/drop"
3614 * logical flow action in all cases. */
3615 if (!strcmp(acl->action, "reject")) {
3616 build_reject_acl_rules(od, lflows, stage, acl, &match,
3617 &actions);
3618 } else {
3619 build_acl_log(&actions, acl);
3620 ds_put_cstr(&actions, "/* drop */");
3621 ovn_lflow_add(lflows, od, stage,
3622 acl->priority + OVN_ACL_PRI_OFFSET,
3623 acl->match, ds_cstr(&actions));
3624 }
3625 }
3626 ds_destroy(&match);
3627 ds_destroy(&actions);
3628 }
3629 free(stage_hint);
3630 }
3631
3632 static struct ovn_port_group *
3633 ovn_port_group_create(struct hmap *pgs,
3634 const struct nbrec_port_group *nb_pg)
3635 {
3636 struct ovn_port_group *pg = xzalloc(sizeof *pg);
3637 pg->key = nb_pg->header_.uuid;
3638 pg->nb_pg = nb_pg;
3639 hmap_init(&pg->nb_lswitches);
3640 hmap_insert(pgs, &pg->key_node, uuid_hash(&pg->key));
3641 return pg;
3642 }
3643
3644 static void
3645 ovn_port_group_destroy(struct hmap *pgs, struct ovn_port_group *pg)
3646 {
3647 if (pg) {
3648 hmap_remove(pgs, &pg->key_node);
3649 struct ovn_port_group_ls *ls;
3650 HMAP_FOR_EACH_POP (ls, key_node, &pg->nb_lswitches) {
3651 free(ls);
3652 }
3653 hmap_destroy(&pg->nb_lswitches);
3654 free(pg);
3655 }
3656 }
3657
3658 static void
3659 build_port_group_lswitches(struct northd_context *ctx, struct hmap *pgs,
3660 struct hmap *ports)
3661 {
3662 hmap_init(pgs);
3663
3664 const struct nbrec_port_group *nb_pg;
3665 NBREC_PORT_GROUP_FOR_EACH (nb_pg, ctx->ovnnb_idl) {
3666 struct ovn_port_group *pg = ovn_port_group_create(pgs, nb_pg);
3667 for (size_t i = 0; i < nb_pg->n_ports; i++) {
3668 struct ovn_port *op = ovn_port_find(ports, nb_pg->ports[i]->name);
3669 if (!op) {
3670 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
3671 VLOG_ERR_RL(&rl, "lport %s in port group %s not found.",
3672 nb_pg->ports[i]->name,
3673 nb_pg->name);
3674 continue;
3675 }
3676
3677 if (!op->od->nbs) {
3678 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
3679 VLOG_WARN_RL(&rl, "lport %s in port group %s has no lswitch.",
3680 nb_pg->ports[i]->name,
3681 nb_pg->name);
3682 continue;
3683 }
3684
3685 struct ovn_port_group_ls *pg_ls =
3686 ovn_port_group_ls_find(pg, &op->od->nbs->header_.uuid);
3687 if (!pg_ls) {
3688 ovn_port_group_ls_add(pg, op->od->nbs);
3689 ovn_ls_port_group_add(&op->od->nb_pgs, nb_pg);
3690 }
3691 }
3692 }
3693 }
3694
3695 static void
3696 build_acls(struct ovn_datapath *od, struct hmap *lflows,
3697 struct hmap *port_groups)
3698 {
3699 bool has_stateful = has_stateful_acl(od);
3700
3701 /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
3702 * default. A related rule at priority 1 is added below if there
3703 * are any stateful ACLs in this datapath. */
3704 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 0, "1", "next;");
3705 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 0, "1", "next;");
3706
3707 if (has_stateful) {
3708 /* Ingress and Egress ACL Table (Priority 1).
3709 *
3710 * By default, traffic is allowed. This is partially handled by
3711 * the Priority 0 ACL flows added earlier, but we also need to
3712 * commit IP flows. This is because, while the initiater's
3713 * direction may not have any stateful rules, the server's may
3714 * and then its return traffic would not have an associated
3715 * conntrack entry and would return "+invalid".
3716 *
3717 * We use "ct_commit" for a connection that is not already known
3718 * by the connection tracker. Once a connection is committed,
3719 * subsequent packets will hit the flow at priority 0 that just
3720 * uses "next;"
3721 *
3722 * We also check for established connections that have ct_label.blocked
3723 * set on them. That's a connection that was disallowed, but is
3724 * now allowed by policy again since it hit this default-allow flow.
3725 * We need to set ct_label.blocked=0 to let the connection continue,
3726 * which will be done by ct_commit() in the "stateful" stage.
3727 * Subsequent packets will hit the flow at priority 0 that just
3728 * uses "next;". */
3729 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 1,
3730 "ip && (!ct.est || (ct.est && ct_label.blocked == 1))",
3731 REGBIT_CONNTRACK_COMMIT" = 1; next;");
3732 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 1,
3733 "ip && (!ct.est || (ct.est && ct_label.blocked == 1))",
3734 REGBIT_CONNTRACK_COMMIT" = 1; next;");
3735
3736 /* Ingress and Egress ACL Table (Priority 65535).
3737 *
3738 * Always drop traffic that's in an invalid state. Also drop
3739 * reply direction packets for connections that have been marked
3740 * for deletion (bit 0 of ct_label is set).
3741 *
3742 * This is enforced at a higher priority than ACLs can be defined. */
3743 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
3744 "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
3745 "drop;");
3746 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
3747 "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
3748 "drop;");
3749
3750 /* Ingress and Egress ACL Table (Priority 65535).
3751 *
3752 * Allow reply traffic that is part of an established
3753 * conntrack entry that has not been marked for deletion
3754 * (bit 0 of ct_label). We only match traffic in the
3755 * reply direction because we want traffic in the request
3756 * direction to hit the currently defined policy from ACLs.
3757 *
3758 * This is enforced at a higher priority than ACLs can be defined. */
3759 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
3760 "ct.est && !ct.rel && !ct.new && !ct.inv "
3761 "&& ct.rpl && ct_label.blocked == 0",
3762 "next;");
3763 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
3764 "ct.est && !ct.rel && !ct.new && !ct.inv "
3765 "&& ct.rpl && ct_label.blocked == 0",
3766 "next;");
3767
3768 /* Ingress and Egress ACL Table (Priority 65535).
3769 *
3770 * Allow traffic that is related to an existing conntrack entry that
3771 * has not been marked for deletion (bit 0 of ct_label).
3772 *
3773 * This is enforced at a higher priority than ACLs can be defined.
3774 *
3775 * NOTE: This does not support related data sessions (eg,
3776 * a dynamically negotiated FTP data channel), but will allow
3777 * related traffic such as an ICMP Port Unreachable through
3778 * that's generated from a non-listening UDP port. */
3779 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
3780 "!ct.est && ct.rel && !ct.new && !ct.inv "
3781 "&& ct_label.blocked == 0",
3782 "next;");
3783 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
3784 "!ct.est && ct.rel && !ct.new && !ct.inv "
3785 "&& ct_label.blocked == 0",
3786 "next;");
3787
3788 /* Ingress and Egress ACL Table (Priority 65535).
3789 *
3790 * Not to do conntrack on ND packets. */
3791 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, "nd", "next;");
3792 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, "nd", "next;");
3793 }
3794
3795 /* Ingress or Egress ACL Table (Various priorities). */
3796 for (size_t i = 0; i < od->nbs->n_acls; i++) {
3797 struct nbrec_acl *acl = od->nbs->acls[i];
3798 consider_acl(lflows, od, acl, has_stateful);
3799 }
3800 struct ovn_port_group *pg;
3801 HMAP_FOR_EACH (pg, key_node, port_groups) {
3802 if (ovn_port_group_ls_find(pg, &od->nbs->header_.uuid)) {
3803 for (size_t i = 0; i < pg->nb_pg->n_acls; i++) {
3804 consider_acl(lflows, od, pg->nb_pg->acls[i], has_stateful);
3805 }
3806 }
3807 }
3808
3809 /* Add 34000 priority flow to allow DHCP reply from ovn-controller to all
3810 * logical ports of the datapath if the CMS has configured DHCPv4 options.
3811 * */
3812 for (size_t i = 0; i < od->nbs->n_ports; i++) {
3813 if (od->nbs->ports[i]->dhcpv4_options) {
3814 const char *server_id = smap_get(
3815 &od->nbs->ports[i]->dhcpv4_options->options, "server_id");
3816 const char *server_mac = smap_get(
3817 &od->nbs->ports[i]->dhcpv4_options->options, "server_mac");
3818 const char *lease_time = smap_get(
3819 &od->nbs->ports[i]->dhcpv4_options->options, "lease_time");
3820 if (server_id && server_mac && lease_time) {
3821 struct ds match = DS_EMPTY_INITIALIZER;
3822 const char *actions =
3823 has_stateful ? "ct_commit; next;" : "next;";
3824 ds_put_format(&match, "outport == \"%s\" && eth.src == %s "
3825 "&& ip4.src == %s && udp && udp.src == 67 "
3826 "&& udp.dst == 68", od->nbs->ports[i]->name,
3827 server_mac, server_id);
3828 ovn_lflow_add(
3829 lflows, od, S_SWITCH_OUT_ACL, 34000, ds_cstr(&match),
3830 actions);
3831 ds_destroy(&match);
3832 }
3833 }
3834
3835 if (od->nbs->ports[i]->dhcpv6_options) {
3836 const char *server_mac = smap_get(
3837 &od->nbs->ports[i]->dhcpv6_options->options, "server_id");
3838 struct eth_addr ea;
3839 if (server_mac && eth_addr_from_string(server_mac, &ea)) {
3840 /* Get the link local IP of the DHCPv6 server from the
3841 * server MAC. */
3842 struct in6_addr lla;
3843 in6_generate_lla(ea, &lla);
3844
3845 char server_ip[INET6_ADDRSTRLEN + 1];
3846 ipv6_string_mapped(server_ip, &lla);
3847
3848 struct ds match = DS_EMPTY_INITIALIZER;
3849 const char *actions = has_stateful ? "ct_commit; next;" :
3850 "next;";
3851 ds_put_format(&match, "outport == \"%s\" && eth.src == %s "
3852 "&& ip6.src == %s && udp && udp.src == 547 "
3853 "&& udp.dst == 546", od->nbs->ports[i]->name,
3854 server_mac, server_ip);
3855 ovn_lflow_add(
3856 lflows, od, S_SWITCH_OUT_ACL, 34000, ds_cstr(&match),
3857 actions);
3858 ds_destroy(&match);
3859 }
3860 }
3861 }
3862
3863 /* Add a 34000 priority flow to advance the DNS reply from ovn-controller,
3864 * if the CMS has configured DNS records for the datapath.
3865 */
3866 if (ls_has_dns_records(od->nbs)) {
3867 const char *actions = has_stateful ? "ct_commit; next;" : "next;";
3868 ovn_lflow_add(
3869 lflows, od, S_SWITCH_OUT_ACL, 34000, "udp.src == 53",
3870 actions);
3871 }
3872 }
3873
3874 static void
3875 build_qos(struct ovn_datapath *od, struct hmap *lflows) {
3876 ovn_lflow_add(lflows, od, S_SWITCH_IN_QOS_MARK, 0, "1", "next;");
3877 ovn_lflow_add(lflows, od, S_SWITCH_OUT_QOS_MARK, 0, "1", "next;");
3878 ovn_lflow_add(lflows, od, S_SWITCH_IN_QOS_METER, 0, "1", "next;");
3879 ovn_lflow_add(lflows, od, S_SWITCH_OUT_QOS_METER, 0, "1", "next;");
3880
3881 for (size_t i = 0; i < od->nbs->n_qos_rules; i++) {
3882 struct nbrec_qos *qos = od->nbs->qos_rules[i];
3883 bool ingress = !strcmp(qos->direction, "from-lport") ? true :false;
3884 enum ovn_stage stage = ingress ? S_SWITCH_IN_QOS_MARK : S_SWITCH_OUT_QOS_MARK;
3885 int64_t rate = 0;
3886 int64_t burst = 0;
3887
3888 for (size_t j = 0; j < qos->n_action; j++) {
3889 if (!strcmp(qos->key_action[j], "dscp")) {
3890 struct ds dscp_action = DS_EMPTY_INITIALIZER;
3891
3892 ds_put_format(&dscp_action, "ip.dscp = %"PRId64"; next;",
3893 qos->value_action[j]);
3894 ovn_lflow_add(lflows, od, stage,
3895 qos->priority,
3896 qos->match, ds_cstr(&dscp_action));
3897 ds_destroy(&dscp_action);
3898 }
3899 }
3900
3901 for (size_t n = 0; n < qos->n_bandwidth; n++) {
3902 if (!strcmp(qos->key_bandwidth[n], "rate")) {
3903 rate = qos->value_bandwidth[n];
3904 } else if (!strcmp(qos->key_bandwidth[n], "burst")) {
3905 burst = qos->value_bandwidth[n];
3906 }
3907 }
3908 if (rate) {
3909 struct ds meter_action = DS_EMPTY_INITIALIZER;
3910 stage = ingress ? S_SWITCH_IN_QOS_METER : S_SWITCH_OUT_QOS_METER;
3911 if (burst) {
3912 ds_put_format(&meter_action,
3913 "set_meter(%"PRId64", %"PRId64"); next;",
3914 rate, burst);
3915 } else {
3916 ds_put_format(&meter_action,
3917 "set_meter(%"PRId64"); next;",
3918 rate);
3919 }
3920
3921 /* Ingress and Egress QoS Meter Table.
3922 *
3923 * We limit the bandwidth of this flow by adding a meter table.
3924 */
3925 ovn_lflow_add(lflows, od, stage,
3926 qos->priority,
3927 qos->match, ds_cstr(&meter_action));
3928 ds_destroy(&meter_action);
3929 }
3930 }
3931 }
3932
3933 static void
3934 build_lb(struct ovn_datapath *od, struct hmap *lflows)
3935 {
3936 /* Ingress and Egress LB Table (Priority 0): Packets are allowed by
3937 * default. */
3938 ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, 0, "1", "next;");
3939 ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, 0, "1", "next;");
3940
3941 if (od->nbs->load_balancer) {
3942 /* Ingress and Egress LB Table (Priority 65535).
3943 *
3944 * Send established traffic through conntrack for just NAT. */
3945 ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, UINT16_MAX,
3946 "ct.est && !ct.rel && !ct.new && !ct.inv",
3947 REGBIT_CONNTRACK_NAT" = 1; next;");
3948 ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, UINT16_MAX,
3949 "ct.est && !ct.rel && !ct.new && !ct.inv",
3950 REGBIT_CONNTRACK_NAT" = 1; next;");
3951 }
3952 }
3953
3954 static void
3955 build_stateful(struct ovn_datapath *od, struct hmap *lflows)
3956 {
3957 /* Ingress and Egress stateful Table (Priority 0): Packets are
3958 * allowed by default. */
3959 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 0, "1", "next;");
3960 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 0, "1", "next;");
3961
3962 /* If REGBIT_CONNTRACK_COMMIT is set as 1, then the packets should be
3963 * committed to conntrack. We always set ct_label.blocked to 0 here as
3964 * any packet that makes it this far is part of a connection we
3965 * want to allow to continue. */
3966 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100,
3967 REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit(ct_label=0/1); next;");
3968 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100,
3969 REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit(ct_label=0/1); next;");
3970
3971 /* If REGBIT_CONNTRACK_NAT is set as 1, then packets should just be sent
3972 * through nat (without committing).
3973 *
3974 * REGBIT_CONNTRACK_COMMIT is set for new connections and
3975 * REGBIT_CONNTRACK_NAT is set for established connections. So they
3976 * don't overlap.
3977 */
3978 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100,
3979 REGBIT_CONNTRACK_NAT" == 1", "ct_lb;");
3980 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100,
3981 REGBIT_CONNTRACK_NAT" == 1", "ct_lb;");
3982
3983 /* Load balancing rules for new connections get committed to conntrack
3984 * table. So even if REGBIT_CONNTRACK_COMMIT is set in a previous table
3985 * a higher priority rule for load balancing below also commits the
3986 * connection, so it is okay if we do not hit the above match on
3987 * REGBIT_CONNTRACK_COMMIT. */
3988 for (int i = 0; i < od->nbs->n_load_balancer; i++) {
3989 struct nbrec_load_balancer *lb = od->nbs->load_balancer[i];
3990 struct smap *vips = &lb->vips;
3991 struct smap_node *node;
3992
3993 SMAP_FOR_EACH (node, vips) {
3994 uint16_t port = 0;
3995 int addr_family;
3996
3997 /* node->key contains IP:port or just IP. */
3998 char *ip_address = NULL;
3999 ip_address_and_port_from_lb_key(node->key, &ip_address, &port,
4000 &addr_family);
4001 if (!ip_address) {
4002 continue;
4003 }
4004
4005 /* New connections in Ingress table. */
4006 char *action = xasprintf("ct_lb(%s);", node->value);
4007 struct ds match = DS_EMPTY_INITIALIZER;
4008 if (addr_family == AF_INET) {
4009 ds_put_format(&match, "ct.new && ip4.dst == %s", ip_address);
4010 } else {
4011 ds_put_format(&match, "ct.new && ip6.dst == %s", ip_address);
4012 }
4013 if (port) {
4014 if (lb->protocol && !strcmp(lb->protocol, "udp")) {
4015 ds_put_format(&match, " && udp.dst == %d", port);
4016 } else {
4017 ds_put_format(&match, " && tcp.dst == %d", port);
4018 }
4019 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL,
4020 120, ds_cstr(&match), action);
4021 } else {
4022 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL,
4023 110, ds_cstr(&match), action);
4024 }
4025
4026 free(ip_address);
4027 ds_destroy(&match);
4028 free(action);
4029 }
4030 }
4031 }
4032
4033 static void
4034 build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
4035 struct hmap *port_groups, struct hmap *lflows,
4036 struct hmap *mcgroups)
4037 {
4038 /* This flow table structure is documented in ovn-northd(8), so please
4039 * update ovn-northd.8.xml if you change anything. */
4040
4041 struct ds match = DS_EMPTY_INITIALIZER;
4042 struct ds actions = DS_EMPTY_INITIALIZER;
4043
4044 /* Build pre-ACL and ACL tables for both ingress and egress.
4045 * Ingress tables 3 through 10. Egress tables 0 through 7. */
4046 struct ovn_datapath *od;
4047 HMAP_FOR_EACH (od, key_node, datapaths) {
4048 if (!od->nbs) {
4049 continue;
4050 }
4051
4052 build_pre_acls(od, lflows);
4053 build_pre_lb(od, lflows);
4054 build_pre_stateful(od, lflows);
4055 build_acls(od, lflows, port_groups);
4056 build_qos(od, lflows);
4057 build_lb(od, lflows);
4058 build_stateful(od, lflows);
4059 }
4060
4061 /* Logical switch ingress table 0: Admission control framework (priority
4062 * 100). */
4063 HMAP_FOR_EACH (od, key_node, datapaths) {
4064 if (!od->nbs) {
4065 continue;
4066 }
4067
4068 /* Logical VLANs not supported. */
4069 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "vlan.present",
4070 "drop;");
4071
4072 /* Broadcast/multicast source address is invalid. */
4073 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "eth.src[40]",
4074 "drop;");
4075
4076 /* Port security flows have priority 50 (see below) and will continue
4077 * to the next table if packet source is acceptable. */
4078 }
4079
4080 /* Logical switch ingress table 0: Ingress port security - L2
4081 * (priority 50).
4082 * Ingress table 1: Ingress port security - IP (priority 90 and 80)
4083 * Ingress table 2: Ingress port security - ND (priority 90 and 80)
4084 */
4085 struct ovn_port *op;
4086 HMAP_FOR_EACH (op, key_node, ports) {
4087 if (!op->nbsp) {
4088 continue;
4089 }
4090
4091 if (!lsp_is_enabled(op->nbsp)) {
4092 /* Drop packets from disabled logical ports (since logical flow
4093 * tables are default-drop). */
4094 continue;
4095 }
4096
4097 ds_clear(&match);
4098 ds_clear(&actions);
4099 ds_put_format(&match, "inport == %s", op->json_key);
4100 build_port_security_l2("eth.src", op->ps_addrs, op->n_ps_addrs,
4101 &match);
4102
4103 const char *queue_id = smap_get(&op->sb->options, "qdisc_queue_id");
4104 if (queue_id) {
4105 ds_put_format(&actions, "set_queue(%s); ", queue_id);
4106 }
4107 ds_put_cstr(&actions, "next;");
4108 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_L2, 50,
4109 ds_cstr(&match), ds_cstr(&actions));
4110
4111 if (op->nbsp->n_port_security) {
4112 build_port_security_ip(P_IN, op, lflows);
4113 build_port_security_nd(op, lflows);
4114 }
4115 }
4116
4117 /* Ingress table 1 and 2: Port security - IP and ND, by default goto next.
4118 * (priority 0)*/
4119 HMAP_FOR_EACH (od, key_node, datapaths) {
4120 if (!od->nbs) {
4121 continue;
4122 }
4123
4124 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_ND, 0, "1", "next;");
4125 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_IP, 0, "1", "next;");
4126 }
4127
4128 /* Ingress table 11: ARP/ND responder, skip requests coming from localnet
4129 * and vtep ports. (priority 100); see ovn-northd.8.xml for the
4130 * rationale. */
4131 HMAP_FOR_EACH (op, key_node, ports) {
4132 if (!op->nbsp) {
4133 continue;
4134 }
4135
4136 if ((!strcmp(op->nbsp->type, "localnet")) ||
4137 (!strcmp(op->nbsp->type, "vtep"))) {
4138 ds_clear(&match);
4139 ds_put_format(&match, "inport == %s", op->json_key);
4140 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100,
4141 ds_cstr(&match), "next;");
4142 }
4143 }
4144
4145 /* Ingress table 11: ARP/ND responder, reply for known IPs.
4146 * (priority 50). */
4147 HMAP_FOR_EACH (op, key_node, ports) {
4148 if (!op->nbsp) {
4149 continue;
4150 }
4151
4152 /*
4153 * Add ARP/ND reply flows if either the
4154 * - port is up or
4155 * - port type is router or
4156 * - port type is localport
4157 */
4158 if (!lsp_is_up(op->nbsp) && strcmp(op->nbsp->type, "router") &&
4159 strcmp(op->nbsp->type, "localport")) {
4160 continue;
4161 }
4162
4163 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
4164 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
4165 ds_clear(&match);
4166 ds_put_format(&match, "arp.tpa == %s && arp.op == 1",
4167 op->lsp_addrs[i].ipv4_addrs[j].addr_s);
4168 ds_clear(&actions);
4169 ds_put_format(&actions,
4170 "eth.dst = eth.src; "
4171 "eth.src = %s; "
4172 "arp.op = 2; /* ARP reply */ "
4173 "arp.tha = arp.sha; "
4174 "arp.sha = %s; "
4175 "arp.tpa = arp.spa; "
4176 "arp.spa = %s; "
4177 "outport = inport; "
4178 "flags.loopback = 1; "
4179 "output;",
4180 op->lsp_addrs[i].ea_s, op->lsp_addrs[i].ea_s,
4181 op->lsp_addrs[i].ipv4_addrs[j].addr_s);
4182 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50,
4183 ds_cstr(&match), ds_cstr(&actions));
4184
4185 /* Do not reply to an ARP request from the port that owns the
4186 * address (otherwise a DHCP client that ARPs to check for a
4187 * duplicate address will fail). Instead, forward it the usual
4188 * way.
4189 *
4190 * (Another alternative would be to simply drop the packet. If
4191 * everything is working as it is configured, then this would
4192 * produce equivalent results, since no one should reply to the
4193 * request. But ARPing for one's own IP address is intended to
4194 * detect situations where the network is not working as
4195 * configured, so dropping the request would frustrate that
4196 * intent.) */
4197 ds_put_format(&match, " && inport == %s", op->json_key);
4198 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100,
4199 ds_cstr(&match), "next;");
4200 }
4201
4202 /* For ND solicitations, we need to listen for both the
4203 * unicast IPv6 address and its all-nodes multicast address,
4204 * but always respond with the unicast IPv6 address. */
4205 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
4206 ds_clear(&match);
4207 ds_put_format(&match,
4208 "nd_ns && ip6.dst == {%s, %s} && nd.target == %s",
4209 op->lsp_addrs[i].ipv6_addrs[j].addr_s,
4210 op->lsp_addrs[i].ipv6_addrs[j].sn_addr_s,
4211 op->lsp_addrs[i].ipv6_addrs[j].addr_s);
4212
4213 ds_clear(&actions);
4214 ds_put_format(&actions,
4215 "%s { "
4216 "eth.src = %s; "
4217 "ip6.src = %s; "
4218 "nd.target = %s; "
4219 "nd.tll = %s; "
4220 "outport = inport; "
4221 "flags.loopback = 1; "
4222 "output; "
4223 "};",
4224 !strcmp(op->nbsp->type, "router") ?
4225 "nd_na_router" : "nd_na",
4226 op->lsp_addrs[i].ea_s,
4227 op->lsp_addrs[i].ipv6_addrs[j].addr_s,
4228 op->lsp_addrs[i].ipv6_addrs[j].addr_s,
4229 op->lsp_addrs[i].ea_s);
4230 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50,
4231 ds_cstr(&match), ds_cstr(&actions));
4232
4233 /* Do not reply to a solicitation from the port that owns the
4234 * address (otherwise DAD detection will fail). */
4235 ds_put_format(&match, " && inport == %s", op->json_key);
4236 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100,
4237 ds_cstr(&match), "next;");
4238 }
4239 }
4240 }
4241
4242 /* Ingress table 11: ARP/ND responder, by default goto next.
4243 * (priority 0)*/
4244 HMAP_FOR_EACH (od, key_node, datapaths) {
4245 if (!od->nbs) {
4246 continue;
4247 }
4248
4249 ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_ND_RSP, 0, "1", "next;");
4250 }
4251
4252 /* Logical switch ingress table 12 and 13: DHCP options and response
4253 * priority 100 flows. */
4254 HMAP_FOR_EACH (op, key_node, ports) {
4255 if (!op->nbsp) {
4256 continue;
4257 }
4258
4259 if (!lsp_is_enabled(op->nbsp) || !strcmp(op->nbsp->type, "router")) {
4260 /* Don't add the DHCP flows if the port is not enabled or if the
4261 * port is a router port. */
4262 continue;
4263 }
4264
4265 if (!op->nbsp->dhcpv4_options && !op->nbsp->dhcpv6_options) {
4266 /* CMS has disabled both native DHCPv4 and DHCPv6 for this lport.
4267 */
4268 continue;
4269 }
4270
4271 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
4272 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
4273 struct ds options_action = DS_EMPTY_INITIALIZER;
4274 struct ds response_action = DS_EMPTY_INITIALIZER;
4275 struct ds ipv4_addr_match = DS_EMPTY_INITIALIZER;
4276 if (build_dhcpv4_action(
4277 op, op->lsp_addrs[i].ipv4_addrs[j].addr,
4278 &options_action, &response_action, &ipv4_addr_match)) {
4279 ds_clear(&match);
4280 ds_put_format(
4281 &match, "inport == %s && eth.src == %s && "
4282 "ip4.src == 0.0.0.0 && ip4.dst == 255.255.255.255 && "
4283 "udp.src == 68 && udp.dst == 67", op->json_key,
4284 op->lsp_addrs[i].ea_s);
4285
4286 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS,
4287 100, ds_cstr(&match),
4288 ds_cstr(&options_action));
4289 ds_clear(&match);
4290 /* Allow ip4.src = OFFER_IP and
4291 * ip4.dst = {SERVER_IP, 255.255.255.255} for the below
4292 * cases
4293 * - When the client wants to renew the IP by sending
4294 * the DHCPREQUEST to the server ip.
4295 * - When the client wants to renew the IP by
4296 * broadcasting the DHCPREQUEST.
4297 */
4298 ds_put_format(
4299 &match, "inport == %s && eth.src == %s && "
4300 "%s && udp.src == 68 && udp.dst == 67", op->json_key,
4301 op->lsp_addrs[i].ea_s, ds_cstr(&ipv4_addr_match));
4302
4303 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS,
4304 100, ds_cstr(&match),
4305 ds_cstr(&options_action));
4306 ds_clear(&match);
4307
4308 /* If REGBIT_DHCP_OPTS_RESULT is set, it means the
4309 * put_dhcp_opts action is successful. */
4310 ds_put_format(
4311 &match, "inport == %s && eth.src == %s && "
4312 "ip4 && udp.src == 68 && udp.dst == 67"
4313 " && "REGBIT_DHCP_OPTS_RESULT, op->json_key,
4314 op->lsp_addrs[i].ea_s);
4315 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_RESPONSE,
4316 100, ds_cstr(&match),
4317 ds_cstr(&response_action));
4318 ds_destroy(&options_action);
4319 ds_destroy(&response_action);
4320 ds_destroy(&ipv4_addr_match);
4321 break;
4322 }
4323 }
4324
4325 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
4326 struct ds options_action = DS_EMPTY_INITIALIZER;
4327 struct ds response_action = DS_EMPTY_INITIALIZER;
4328 if (build_dhcpv6_action(
4329 op, &op->lsp_addrs[i].ipv6_addrs[j].addr,
4330 &options_action, &response_action)) {
4331 ds_clear(&match);
4332 ds_put_format(
4333 &match, "inport == %s && eth.src == %s"
4334 " && ip6.dst == ff02::1:2 && udp.src == 546 &&"
4335 " udp.dst == 547", op->json_key,
4336 op->lsp_addrs[i].ea_s);
4337
4338 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS, 100,
4339 ds_cstr(&match), ds_cstr(&options_action));
4340
4341 /* If REGBIT_DHCP_OPTS_RESULT is set to 1, it means the
4342 * put_dhcpv6_opts action is successful */
4343 ds_put_cstr(&match, " && "REGBIT_DHCP_OPTS_RESULT);
4344 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_RESPONSE, 100,
4345 ds_cstr(&match), ds_cstr(&response_action));
4346 ds_destroy(&options_action);
4347 ds_destroy(&response_action);
4348 break;
4349 }
4350 }
4351 }
4352 }
4353
4354 /* Logical switch ingress table 14 and 15: DNS lookup and response
4355 * priority 100 flows.
4356 */
4357 HMAP_FOR_EACH (od, key_node, datapaths) {
4358 if (!od->nbs || !ls_has_dns_records(od->nbs)) {
4359 continue;
4360 }
4361
4362 struct ds action = DS_EMPTY_INITIALIZER;
4363
4364 ds_clear(&match);
4365 ds_put_cstr(&match, "udp.dst == 53");
4366 ds_put_format(&action,
4367 REGBIT_DNS_LOOKUP_RESULT" = dns_lookup(); next;");
4368 ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_LOOKUP, 100,
4369 ds_cstr(&match), ds_cstr(&action));
4370 ds_clear(&action);
4371 ds_put_cstr(&match, " && "REGBIT_DNS_LOOKUP_RESULT);
4372 ds_put_format(&action, "eth.dst <-> eth.src; ip4.src <-> ip4.dst; "
4373 "udp.dst = udp.src; udp.src = 53; outport = inport; "
4374 "flags.loopback = 1; output;");
4375 ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_RESPONSE, 100,
4376 ds_cstr(&match), ds_cstr(&action));
4377 ds_clear(&action);
4378 ds_put_format(&action, "eth.dst <-> eth.src; ip6.src <-> ip6.dst; "
4379 "udp.dst = udp.src; udp.src = 53; outport = inport; "
4380 "flags.loopback = 1; output;");
4381 ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_RESPONSE, 100,
4382 ds_cstr(&match), ds_cstr(&action));
4383 ds_destroy(&action);
4384 }
4385
4386 /* Ingress table 12 and 13: DHCP options and response, by default goto
4387 * next. (priority 0).
4388 * Ingress table 14 and 15: DNS lookup and response, by default goto next.
4389 * (priority 0).*/
4390
4391 HMAP_FOR_EACH (od, key_node, datapaths) {
4392 if (!od->nbs) {
4393 continue;
4394 }
4395
4396 ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_OPTIONS, 0, "1", "next;");
4397 ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_RESPONSE, 0, "1", "next;");
4398 ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_LOOKUP, 0, "1", "next;");
4399 ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_RESPONSE, 0, "1", "next;");
4400 }
4401
4402 /* Ingress table 16: Destination lookup, broadcast and multicast handling
4403 * (priority 100). */
4404 HMAP_FOR_EACH (op, key_node, ports) {
4405 if (!op->nbsp) {
4406 continue;
4407 }
4408
4409 if (lsp_is_enabled(op->nbsp)) {
4410 ovn_multicast_add(mcgroups, &mc_flood, op);
4411 }
4412 }
4413 HMAP_FOR_EACH (od, key_node, datapaths) {
4414 if (!od->nbs) {
4415 continue;
4416 }
4417
4418 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 100, "eth.mcast",
4419 "outport = \""MC_FLOOD"\"; output;");
4420 }
4421
4422 /* Ingress table 16: Destination lookup, unicast handling (priority 50), */
4423 HMAP_FOR_EACH (op, key_node, ports) {
4424 if (!op->nbsp) {
4425 continue;
4426 }
4427
4428 for (size_t i = 0; i < op->nbsp->n_addresses; i++) {
4429 /* Addresses are owned by the logical port.
4430 * Ethernet address followed by zero or more IPv4
4431 * or IPv6 addresses (or both). */
4432 struct eth_addr mac;
4433 if (ovs_scan(op->nbsp->addresses[i],
4434 ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) {
4435 ds_clear(&match);
4436 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
4437 ETH_ADDR_ARGS(mac));
4438
4439 ds_clear(&actions);
4440 ds_put_format(&actions, "outport = %s; output;", op->json_key);
4441 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
4442 ds_cstr(&match), ds_cstr(&actions));
4443 } else if (!strcmp(op->nbsp->addresses[i], "unknown")) {
4444 if (lsp_is_enabled(op->nbsp)) {
4445 ovn_multicast_add(mcgroups, &mc_unknown, op);
4446 op->od->has_unknown = true;
4447 }
4448 } else if (is_dynamic_lsp_address(op->nbsp->addresses[i])) {
4449 if (!op->nbsp->dynamic_addresses
4450 || !ovs_scan(op->nbsp->dynamic_addresses,
4451 ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) {
4452 continue;
4453 }
4454 ds_clear(&match);
4455 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
4456 ETH_ADDR_ARGS(mac));
4457
4458 ds_clear(&actions);
4459 ds_put_format(&actions, "outport = %s; output;", op->json_key);
4460 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
4461 ds_cstr(&match), ds_cstr(&actions));
4462 } else if (!strcmp(op->nbsp->addresses[i], "router")) {
4463 if (!op->peer || !op->peer->nbrp
4464 || !ovs_scan(op->peer->nbrp->mac,
4465 ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) {
4466 continue;
4467 }
4468 ds_clear(&match);
4469 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
4470 ETH_ADDR_ARGS(mac));
4471 if (op->peer->od->l3dgw_port
4472 && op->peer->od->l3redirect_port
4473 && op->od->localnet_port) {
4474 bool add_chassis_resident_check = false;
4475 if (op->peer == op->peer->od->l3dgw_port) {
4476 /* The peer of this port represents a distributed
4477 * gateway port. The destination lookup flow for the
4478 * router's distributed gateway port MAC address should
4479 * only be programmed on the "redirect-chassis". */
4480 add_chassis_resident_check = true;
4481 } else {
4482 /* Check if the option 'reside-on-redirect-chassis'
4483 * is set to true on the peer port. If set to true
4484 * and if the logical switch has a localnet port, it
4485 * means the router pipeline for the packets from
4486 * this logical switch should be run on the chassis
4487 * hosting the gateway port.
4488 */
4489 add_chassis_resident_check = smap_get_bool(
4490 &op->peer->nbrp->options,
4491 "reside-on-redirect-chassis", false);
4492 }
4493
4494 if (add_chassis_resident_check) {
4495 ds_put_format(&match, " && is_chassis_resident(%s)",
4496 op->peer->od->l3redirect_port->json_key);
4497 }
4498 }
4499
4500 ds_clear(&actions);
4501 ds_put_format(&actions, "outport = %s; output;", op->json_key);
4502 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
4503 ds_cstr(&match), ds_cstr(&actions));
4504
4505 /* Add ethernet addresses specified in NAT rules on
4506 * distributed logical routers. */
4507 if (op->peer->od->l3dgw_port
4508 && op->peer == op->peer->od->l3dgw_port) {
4509 for (int j = 0; j < op->peer->od->nbr->n_nat; j++) {
4510 const struct nbrec_nat *nat
4511 = op->peer->od->nbr->nat[j];
4512 if (!strcmp(nat->type, "dnat_and_snat")
4513 && nat->logical_port && nat->external_mac
4514 && eth_addr_from_string(nat->external_mac, &mac)) {
4515
4516 ds_clear(&match);
4517 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT
4518 " && is_chassis_resident(\"%s\")",
4519 ETH_ADDR_ARGS(mac),
4520 nat->logical_port);
4521
4522 ds_clear(&actions);
4523 ds_put_format(&actions, "outport = %s; output;",
4524 op->json_key);
4525 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP,
4526 50, ds_cstr(&match),
4527 ds_cstr(&actions));
4528 }
4529 }
4530 }
4531 } else {
4532 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
4533
4534 VLOG_INFO_RL(&rl,
4535 "%s: invalid syntax '%s' in addresses column",
4536 op->nbsp->name, op->nbsp->addresses[i]);
4537 }
4538 }
4539 }
4540
4541 /* Ingress table 16: Destination lookup for unknown MACs (priority 0). */
4542 HMAP_FOR_EACH (od, key_node, datapaths) {
4543 if (!od->nbs) {
4544 continue;
4545 }
4546
4547 if (od->has_unknown) {
4548 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1",
4549 "outport = \""MC_UNKNOWN"\"; output;");
4550 }
4551 }
4552
4553 /* Egress tables 8: Egress port security - IP (priority 0)
4554 * Egress table 9: Egress port security L2 - multicast/broadcast
4555 * (priority 100). */
4556 HMAP_FOR_EACH (od, key_node, datapaths) {
4557 if (!od->nbs) {
4558 continue;
4559 }
4560
4561 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_IP, 0, "1", "next;");
4562 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_L2, 100, "eth.mcast",
4563 "output;");
4564 }
4565
4566 /* Egress table 8: Egress port security - IP (priorities 90 and 80)
4567 * if port security enabled.
4568 *
4569 * Egress table 9: Egress port security - L2 (priorities 50 and 150).
4570 *
4571 * Priority 50 rules implement port security for enabled logical port.
4572 *
4573 * Priority 150 rules drop packets to disabled logical ports, so that they
4574 * don't even receive multicast or broadcast packets. */
4575 HMAP_FOR_EACH (op, key_node, ports) {
4576 if (!op->nbsp) {
4577 continue;
4578 }
4579
4580 ds_clear(&match);
4581 ds_put_format(&match, "outport == %s", op->json_key);
4582 if (lsp_is_enabled(op->nbsp)) {
4583 build_port_security_l2("eth.dst", op->ps_addrs, op->n_ps_addrs,
4584 &match);
4585 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 50,
4586 ds_cstr(&match), "output;");
4587 } else {
4588 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 150,
4589 ds_cstr(&match), "drop;");
4590 }
4591
4592 if (op->nbsp->n_port_security) {
4593 build_port_security_ip(P_OUT, op, lflows);
4594 }
4595 }
4596
4597 ds_destroy(&match);
4598 ds_destroy(&actions);
4599 }
4600
4601 static bool
4602 lrport_is_enabled(const struct nbrec_logical_router_port *lrport)
4603 {
4604 return !lrport->enabled || *lrport->enabled;
4605 }
4606
4607 /* Returns a string of the IP address of the router port 'op' that
4608 * overlaps with 'ip_s". If one is not found, returns NULL.
4609 *
4610 * The caller must not free the returned string. */
4611 static const char *
4612 find_lrp_member_ip(const struct ovn_port *op, const char *ip_s)
4613 {
4614 bool is_ipv4 = strchr(ip_s, '.') ? true : false;
4615
4616 if (is_ipv4) {
4617 ovs_be32 ip;
4618
4619 if (!ip_parse(ip_s, &ip)) {
4620 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4621 VLOG_WARN_RL(&rl, "bad ip address %s", ip_s);
4622 return NULL;
4623 }
4624
4625 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
4626 const struct ipv4_netaddr *na = &op->lrp_networks.ipv4_addrs[i];
4627
4628 if (!((na->network ^ ip) & na->mask)) {
4629 /* There should be only 1 interface that matches the
4630 * supplied IP. Otherwise, it's a configuration error,
4631 * because subnets of a router's interfaces should NOT
4632 * overlap. */
4633 return na->addr_s;
4634 }
4635 }
4636 } else {
4637 struct in6_addr ip6;
4638
4639 if (!ipv6_parse(ip_s, &ip6)) {
4640 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4641 VLOG_WARN_RL(&rl, "bad ipv6 address %s", ip_s);
4642 return NULL;
4643 }
4644
4645 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
4646 const struct ipv6_netaddr *na = &op->lrp_networks.ipv6_addrs[i];
4647 struct in6_addr xor_addr = ipv6_addr_bitxor(&na->network, &ip6);
4648 struct in6_addr and_addr = ipv6_addr_bitand(&xor_addr, &na->mask);
4649
4650 if (ipv6_is_zero(&and_addr)) {
4651 /* There should be only 1 interface that matches the
4652 * supplied IP. Otherwise, it's a configuration error,
4653 * because subnets of a router's interfaces should NOT
4654 * overlap. */
4655 return na->addr_s;
4656 }
4657 }
4658 }
4659
4660 return NULL;
4661 }
4662
4663 static void
4664 add_route(struct hmap *lflows, const struct ovn_port *op,
4665 const char *lrp_addr_s, const char *network_s, int plen,
4666 const char *gateway, const char *policy)
4667 {
4668 bool is_ipv4 = strchr(network_s, '.') ? true : false;
4669 struct ds match = DS_EMPTY_INITIALIZER;
4670 const char *dir;
4671 uint16_t priority;
4672
4673 if (policy && !strcmp(policy, "src-ip")) {
4674 dir = "src";
4675 priority = plen * 2;
4676 } else {
4677 dir = "dst";
4678 priority = (plen * 2) + 1;
4679 }
4680
4681 /* IPv6 link-local addresses must be scoped to the local router port. */
4682 if (!is_ipv4) {
4683 struct in6_addr network;
4684 ovs_assert(ipv6_parse(network_s, &network));
4685 if (in6_is_lla(&network)) {
4686 ds_put_format(&match, "inport == %s && ", op->json_key);
4687 }
4688 }
4689 ds_put_format(&match, "ip%s.%s == %s/%d", is_ipv4 ? "4" : "6", dir,
4690 network_s, plen);
4691
4692 struct ds actions = DS_EMPTY_INITIALIZER;
4693 ds_put_format(&actions, "ip.ttl--; %sreg0 = ", is_ipv4 ? "" : "xx");
4694
4695 if (gateway) {
4696 ds_put_cstr(&actions, gateway);
4697 } else {
4698 ds_put_format(&actions, "ip%s.dst", is_ipv4 ? "4" : "6");
4699 }
4700 ds_put_format(&actions, "; "
4701 "%sreg1 = %s; "
4702 "eth.src = %s; "
4703 "outport = %s; "
4704 "flags.loopback = 1; "
4705 "next;",
4706 is_ipv4 ? "" : "xx",
4707 lrp_addr_s,
4708 op->lrp_networks.ea_s,
4709 op->json_key);
4710
4711 /* The priority here is calculated to implement longest-prefix-match
4712 * routing. */
4713 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_ROUTING, priority,
4714 ds_cstr(&match), ds_cstr(&actions));
4715 ds_destroy(&match);
4716 ds_destroy(&actions);
4717 }
4718
4719 static void
4720 build_static_route_flow(struct hmap *lflows, struct ovn_datapath *od,
4721 struct hmap *ports,
4722 const struct nbrec_logical_router_static_route *route)
4723 {
4724 ovs_be32 nexthop;
4725 const char *lrp_addr_s = NULL;
4726 unsigned int plen;
4727 bool is_ipv4;
4728
4729 /* Verify that the next hop is an IP address with an all-ones mask. */
4730 char *error = ip_parse_cidr(route->nexthop, &nexthop, &plen);
4731 if (!error) {
4732 if (plen != 32) {
4733 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4734 VLOG_WARN_RL(&rl, "bad next hop mask %s", route->nexthop);
4735 return;
4736 }
4737 is_ipv4 = true;
4738 } else {
4739 free(error);
4740
4741 struct in6_addr ip6;
4742 error = ipv6_parse_cidr(route->nexthop, &ip6, &plen);
4743 if (!error) {
4744 if (plen != 128) {
4745 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4746 VLOG_WARN_RL(&rl, "bad next hop mask %s", route->nexthop);
4747 return;
4748 }
4749 is_ipv4 = false;
4750 } else {
4751 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4752 VLOG_WARN_RL(&rl, "bad next hop ip address %s", route->nexthop);
4753 free(error);
4754 return;
4755 }
4756 }
4757
4758 char *prefix_s;
4759 if (is_ipv4) {
4760 ovs_be32 prefix;
4761 /* Verify that ip prefix is a valid IPv4 address. */
4762 error = ip_parse_cidr(route->ip_prefix, &prefix, &plen);
4763 if (error) {
4764 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4765 VLOG_WARN_RL(&rl, "bad 'ip_prefix' in static routes %s",
4766 route->ip_prefix);
4767 free(error);
4768 return;
4769 }
4770 prefix_s = xasprintf(IP_FMT, IP_ARGS(prefix & be32_prefix_mask(plen)));
4771 } else {
4772 /* Verify that ip prefix is a valid IPv6 address. */
4773 struct in6_addr prefix;
4774 error = ipv6_parse_cidr(route->ip_prefix, &prefix, &plen);
4775 if (error) {
4776 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4777 VLOG_WARN_RL(&rl, "bad 'ip_prefix' in static routes %s",
4778 route->ip_prefix);
4779 free(error);
4780 return;
4781 }
4782 struct in6_addr mask = ipv6_create_mask(plen);
4783 struct in6_addr network = ipv6_addr_bitand(&prefix, &mask);
4784 prefix_s = xmalloc(INET6_ADDRSTRLEN);
4785 inet_ntop(AF_INET6, &network, prefix_s, INET6_ADDRSTRLEN);
4786 }
4787
4788 /* Find the outgoing port. */
4789 struct ovn_port *out_port = NULL;
4790 if (route->output_port) {
4791 out_port = ovn_port_find(ports, route->output_port);
4792 if (!out_port) {
4793 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4794 VLOG_WARN_RL(&rl, "Bad out port %s for static route %s",
4795 route->output_port, route->ip_prefix);
4796 goto free_prefix_s;
4797 }
4798 lrp_addr_s = find_lrp_member_ip(out_port, route->nexthop);
4799 if (!lrp_addr_s) {
4800 /* There are no IP networks configured on the router's port via
4801 * which 'route->nexthop' is theoretically reachable. But since
4802 * 'out_port' has been specified, we honor it by trying to reach
4803 * 'route->nexthop' via the first IP address of 'out_port'.
4804 * (There are cases, e.g in GCE, where each VM gets a /32 IP
4805 * address and the default gateway is still reachable from it.) */
4806 if (is_ipv4) {
4807 if (out_port->lrp_networks.n_ipv4_addrs) {
4808 lrp_addr_s = out_port->lrp_networks.ipv4_addrs[0].addr_s;
4809 }
4810 } else {
4811 if (out_port->lrp_networks.n_ipv6_addrs) {
4812 lrp_addr_s = out_port->lrp_networks.ipv6_addrs[0].addr_s;
4813 }
4814 }
4815 }
4816 } else {
4817 /* output_port is not specified, find the
4818 * router port matching the next hop. */
4819 int i;
4820 for (i = 0; i < od->nbr->n_ports; i++) {
4821 struct nbrec_logical_router_port *lrp = od->nbr->ports[i];
4822 out_port = ovn_port_find(ports, lrp->name);
4823 if (!out_port) {
4824 /* This should not happen. */
4825 continue;
4826 }
4827
4828 lrp_addr_s = find_lrp_member_ip(out_port, route->nexthop);
4829 if (lrp_addr_s) {
4830 break;
4831 }
4832 }
4833 }
4834
4835 if (!out_port || !lrp_addr_s) {
4836 /* There is no matched out port. */
4837 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4838 VLOG_WARN_RL(&rl, "No path for static route %s; next hop %s",
4839 route->ip_prefix, route->nexthop);
4840 goto free_prefix_s;
4841 }
4842
4843 char *policy = route->policy ? route->policy : "dst-ip";
4844 add_route(lflows, out_port, lrp_addr_s, prefix_s, plen, route->nexthop,
4845 policy);
4846
4847 free_prefix_s:
4848 free(prefix_s);
4849 }
4850
4851 static void
4852 op_put_v4_networks(struct ds *ds, const struct ovn_port *op, bool add_bcast)
4853 {
4854 if (!add_bcast && op->lrp_networks.n_ipv4_addrs == 1) {
4855 ds_put_format(ds, "%s", op->lrp_networks.ipv4_addrs[0].addr_s);
4856 return;
4857 }
4858
4859 ds_put_cstr(ds, "{");
4860 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
4861 ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].addr_s);
4862 if (add_bcast) {
4863 ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].bcast_s);
4864 }
4865 }
4866 ds_chomp(ds, ' ');
4867 ds_chomp(ds, ',');
4868 ds_put_cstr(ds, "}");
4869 }
4870
4871 static void
4872 op_put_v6_networks(struct ds *ds, const struct ovn_port *op)
4873 {
4874 if (op->lrp_networks.n_ipv6_addrs == 1) {
4875 ds_put_format(ds, "%s", op->lrp_networks.ipv6_addrs[0].addr_s);
4876 return;
4877 }
4878
4879 ds_put_cstr(ds, "{");
4880 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
4881 ds_put_format(ds, "%s, ", op->lrp_networks.ipv6_addrs[i].addr_s);
4882 }
4883 ds_chomp(ds, ' ');
4884 ds_chomp(ds, ',');
4885 ds_put_cstr(ds, "}");
4886 }
4887
4888 static const char *
4889 get_force_snat_ip(struct ovn_datapath *od, const char *key_type, ovs_be32 *ip)
4890 {
4891 char *key = xasprintf("%s_force_snat_ip", key_type);
4892 const char *ip_address = smap_get(&od->nbr->options, key);
4893 free(key);
4894
4895 if (ip_address) {
4896 ovs_be32 mask;
4897 char *error = ip_parse_masked(ip_address, ip, &mask);
4898 if (error || mask != OVS_BE32_MAX) {
4899 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4900 VLOG_WARN_RL(&rl, "bad ip %s in options of router "UUID_FMT"",
4901 ip_address, UUID_ARGS(&od->key));
4902 free(error);
4903 *ip = 0;
4904 return NULL;
4905 }
4906 return ip_address;
4907 }
4908
4909 *ip = 0;
4910 return NULL;
4911 }
4912
4913 static void
4914 add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od,
4915 struct ds *match, struct ds *actions, int priority,
4916 const char *lb_force_snat_ip, char *backend_ips,
4917 bool is_udp, int addr_family)
4918 {
4919 /* A match and actions for new connections. */
4920 char *new_match = xasprintf("ct.new && %s", ds_cstr(match));
4921 if (lb_force_snat_ip) {
4922 char *new_actions = xasprintf("flags.force_snat_for_lb = 1; %s",
4923 ds_cstr(actions));
4924 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, new_match,
4925 new_actions);
4926 free(new_actions);
4927 } else {
4928 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, new_match,
4929 ds_cstr(actions));
4930 }
4931
4932 /* A match and actions for established connections. */
4933 char *est_match = xasprintf("ct.est && %s", ds_cstr(match));
4934 if (lb_force_snat_ip) {
4935 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, est_match,
4936 "flags.force_snat_for_lb = 1; ct_dnat;");
4937 } else {
4938 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, est_match,
4939 "ct_dnat;");
4940 }
4941
4942 free(new_match);
4943 free(est_match);
4944
4945 if (!od->l3dgw_port || !od->l3redirect_port || !backend_ips) {
4946 return;
4947 }
4948
4949 /* Add logical flows to UNDNAT the load balanced reverse traffic in
4950 * the router egress pipleine stage - S_ROUTER_OUT_UNDNAT if the logical
4951 * router has a gateway router port associated.
4952 */
4953 struct ds undnat_match = DS_EMPTY_INITIALIZER;
4954 if (addr_family == AF_INET) {
4955 ds_put_cstr(&undnat_match, "ip4 && (");
4956 } else {
4957 ds_put_cstr(&undnat_match, "ip6 && (");
4958 }
4959 char *start, *next, *ip_str;
4960 start = next = xstrdup(backend_ips);
4961 ip_str = strsep(&next, ",");
4962 bool backend_ips_found = false;
4963 while (ip_str && ip_str[0]) {
4964 char *ip_address = NULL;
4965 uint16_t port = 0;
4966 int addr_family_;
4967 ip_address_and_port_from_lb_key(ip_str, &ip_address, &port,
4968 &addr_family_);
4969 if (!ip_address) {
4970 break;
4971 }
4972
4973 if (addr_family_ == AF_INET) {
4974 ds_put_format(&undnat_match, "(ip4.src == %s", ip_address);
4975 } else {
4976 ds_put_format(&undnat_match, "(ip6.src == %s", ip_address);
4977 }
4978 free(ip_address);
4979 if (port) {
4980 ds_put_format(&undnat_match, " && %s.src == %d) || ",
4981 is_udp ? "udp" : "tcp", port);
4982 } else {
4983 ds_put_cstr(&undnat_match, ") || ");
4984 }
4985 ip_str = strsep(&next, ",");
4986 backend_ips_found = true;
4987 }
4988
4989 free(start);
4990 if (!backend_ips_found) {
4991 ds_destroy(&undnat_match);
4992 return;
4993 }
4994 ds_chomp(&undnat_match, ' ');
4995 ds_chomp(&undnat_match, '|');
4996 ds_chomp(&undnat_match, '|');
4997 ds_chomp(&undnat_match, ' ');
4998 ds_put_format(&undnat_match, ") && outport == %s && "
4999 "is_chassis_resident(%s)", od->l3dgw_port->json_key,
5000 od->l3redirect_port->json_key);
5001 if (lb_force_snat_ip) {
5002 ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 120,
5003 ds_cstr(&undnat_match),
5004 "flags.force_snat_for_lb = 1; ct_dnat;");
5005 } else {
5006 ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 120,
5007 ds_cstr(&undnat_match), "ct_dnat;");
5008 }
5009
5010 ds_destroy(&undnat_match);
5011 }
5012
5013 #define ND_RA_MAX_INTERVAL_MAX 1800
5014 #define ND_RA_MAX_INTERVAL_MIN 4
5015
5016 #define ND_RA_MIN_INTERVAL_MAX(max) ((max) * 3 / 4)
5017 #define ND_RA_MIN_INTERVAL_MIN 3
5018
5019 static void
5020 copy_ra_to_sb(struct ovn_port *op, const char *address_mode)
5021 {
5022 struct smap options;
5023 smap_clone(&options, &op->sb->options);
5024
5025 smap_add(&options, "ipv6_ra_send_periodic", "true");
5026 smap_add(&options, "ipv6_ra_address_mode", address_mode);
5027
5028 int max_interval = smap_get_int(&op->nbrp->ipv6_ra_configs,
5029 "max_interval", ND_RA_MAX_INTERVAL_DEFAULT);
5030 if (max_interval > ND_RA_MAX_INTERVAL_MAX) {
5031 max_interval = ND_RA_MAX_INTERVAL_MAX;
5032 }
5033 if (max_interval < ND_RA_MAX_INTERVAL_MIN) {
5034 max_interval = ND_RA_MAX_INTERVAL_MIN;
5035 }
5036 smap_add_format(&options, "ipv6_ra_max_interval", "%d", max_interval);
5037
5038 int min_interval = smap_get_int(&op->nbrp->ipv6_ra_configs,
5039 "min_interval", nd_ra_min_interval_default(max_interval));
5040 if (min_interval > ND_RA_MIN_INTERVAL_MAX(max_interval)) {
5041 min_interval = ND_RA_MIN_INTERVAL_MAX(max_interval);
5042 }
5043 if (min_interval < ND_RA_MIN_INTERVAL_MIN) {
5044 min_interval = ND_RA_MIN_INTERVAL_MIN;
5045 }
5046 smap_add_format(&options, "ipv6_ra_min_interval", "%d", min_interval);
5047
5048 int mtu = smap_get_int(&op->nbrp->ipv6_ra_configs, "mtu", ND_MTU_DEFAULT);
5049 /* RFC 2460 requires the MTU for IPv6 to be at least 1280 */
5050 if (mtu && mtu >= 1280) {
5051 smap_add_format(&options, "ipv6_ra_mtu", "%d", mtu);
5052 }
5053
5054 struct ds s = DS_EMPTY_INITIALIZER;
5055 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; ++i) {
5056 struct ipv6_netaddr *addrs = &op->lrp_networks.ipv6_addrs[i];
5057 if (in6_is_lla(&addrs->network)) {
5058 smap_add(&options, "ipv6_ra_src_addr", addrs->addr_s);
5059 continue;
5060 }
5061 ds_put_format(&s, "%s/%u ", addrs->network_s, addrs->plen);
5062 }
5063 /* Remove trailing space */
5064 ds_chomp(&s, ' ');
5065 smap_add(&options, "ipv6_ra_prefixes", ds_cstr(&s));
5066 ds_destroy(&s);
5067
5068 smap_add(&options, "ipv6_ra_src_eth", op->lrp_networks.ea_s);
5069
5070 sbrec_port_binding_set_options(op->sb, &options);
5071 smap_destroy(&options);
5072 }
5073
5074 static void
5075 build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
5076 struct hmap *lflows)
5077 {
5078 /* This flow table structure is documented in ovn-northd(8), so please
5079 * update ovn-northd.8.xml if you change anything. */
5080
5081 struct ds match = DS_EMPTY_INITIALIZER;
5082 struct ds actions = DS_EMPTY_INITIALIZER;
5083
5084 /* Logical router ingress table 0: Admission control framework. */
5085 struct ovn_datapath *od;
5086 HMAP_FOR_EACH (od, key_node, datapaths) {
5087 if (!od->nbr) {
5088 continue;
5089 }
5090
5091 /* Logical VLANs not supported.
5092 * Broadcast/multicast source address is invalid. */
5093 ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100,
5094 "vlan.present || eth.src[40]", "drop;");
5095 }
5096
5097 /* Logical router ingress table 0: match (priority 50). */
5098 struct ovn_port *op;
5099 HMAP_FOR_EACH (op, key_node, ports) {
5100 if (!op->nbrp) {
5101 continue;
5102 }
5103
5104 if (!lrport_is_enabled(op->nbrp)) {
5105 /* Drop packets from disabled logical ports (since logical flow
5106 * tables are default-drop). */
5107 continue;
5108 }
5109
5110 if (op->derived) {
5111 /* No ingress packets should be received on a chassisredirect
5112 * port. */
5113 continue;
5114 }
5115
5116 ds_clear(&match);
5117 ds_put_format(&match, "eth.mcast && inport == %s", op->json_key);
5118 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
5119 ds_cstr(&match), "next;");
5120
5121 ds_clear(&match);
5122 ds_put_format(&match, "eth.dst == %s && inport == %s",
5123 op->lrp_networks.ea_s, op->json_key);
5124 if (op->od->l3dgw_port && op == op->od->l3dgw_port
5125 && op->od->l3redirect_port) {
5126 /* Traffic with eth.dst = l3dgw_port->lrp_networks.ea_s
5127 * should only be received on the "redirect-chassis". */
5128 ds_put_format(&match, " && is_chassis_resident(%s)",
5129 op->od->l3redirect_port->json_key);
5130 }
5131 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
5132 ds_cstr(&match), "next;");
5133 }
5134
5135 /* Logical router ingress table 1: IP Input. */
5136 HMAP_FOR_EACH (od, key_node, datapaths) {
5137 if (!od->nbr) {
5138 continue;
5139 }
5140
5141 /* L3 admission control: drop multicast and broadcast source, localhost
5142 * source or destination, and zero network source or destination
5143 * (priority 100). */
5144 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100,
5145 "ip4.mcast || "
5146 "ip4.src == 255.255.255.255 || "
5147 "ip4.src == 127.0.0.0/8 || "
5148 "ip4.dst == 127.0.0.0/8 || "
5149 "ip4.src == 0.0.0.0/8 || "
5150 "ip4.dst == 0.0.0.0/8",
5151 "drop;");
5152
5153 /* ARP reply handling. Use ARP replies to populate the logical
5154 * router's ARP table. */
5155 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "arp.op == 2",
5156 "put_arp(inport, arp.spa, arp.sha);");
5157
5158 /* Drop Ethernet local broadcast. By definition this traffic should
5159 * not be forwarded.*/
5160 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
5161 "eth.bcast", "drop;");
5162
5163 /* TTL discard */
5164 ds_clear(&match);
5165 ds_put_cstr(&match, "ip4 && ip.ttl == {0, 1}");
5166 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30,
5167 ds_cstr(&match), "drop;");
5168
5169 /* ND advertisement handling. Use advertisements to populate
5170 * the logical router's ARP/ND table. */
5171 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "nd_na",
5172 "put_nd(inport, nd.target, nd.tll);");
5173
5174 /* Lean from neighbor solicitations that were not directed at
5175 * us. (A priority-90 flow will respond to requests to us and
5176 * learn the sender's mac address. */
5177 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 80, "nd_ns",
5178 "put_nd(inport, ip6.src, nd.sll);");
5179
5180 /* Pass other traffic not already handled to the next table for
5181 * routing. */
5182 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;");
5183 }
5184
5185 /* Logical router ingress table 1: IP Input for IPv4. */
5186 HMAP_FOR_EACH (op, key_node, ports) {
5187 if (!op->nbrp) {
5188 continue;
5189 }
5190
5191 if (op->derived) {
5192 /* No ingress packets are accepted on a chassisredirect
5193 * port, so no need to program flows for that port. */
5194 continue;
5195 }
5196
5197 if (op->lrp_networks.n_ipv4_addrs) {
5198 /* L3 admission control: drop packets that originate from an
5199 * IPv4 address owned by the router or a broadcast address
5200 * known to the router (priority 100). */
5201 ds_clear(&match);
5202 ds_put_cstr(&match, "ip4.src == ");
5203 op_put_v4_networks(&match, op, true);
5204 ds_put_cstr(&match, " && "REGBIT_EGRESS_LOOPBACK" == 0");
5205 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
5206 ds_cstr(&match), "drop;");
5207
5208 /* ICMP echo reply. These flows reply to ICMP echo requests
5209 * received for the router's IP address. Since packets only
5210 * get here as part of the logical router datapath, the inport
5211 * (i.e. the incoming locally attached net) does not matter.
5212 * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */
5213 ds_clear(&match);
5214 ds_put_cstr(&match, "ip4.dst == ");
5215 op_put_v4_networks(&match, op, false);
5216 ds_put_cstr(&match, " && icmp4.type == 8 && icmp4.code == 0");
5217
5218 ds_clear(&actions);
5219 ds_put_format(&actions,
5220 "ip4.dst <-> ip4.src; "
5221 "ip.ttl = 255; "
5222 "icmp4.type = 0; "
5223 "flags.loopback = 1; "
5224 "next; ");
5225 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
5226 ds_cstr(&match), ds_cstr(&actions));
5227 }
5228
5229 /* ICMP time exceeded */
5230 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
5231 ds_clear(&match);
5232 ds_clear(&actions);
5233
5234 ds_put_format(&match,
5235 "inport == %s && ip4 && "
5236 "ip.ttl == {0, 1} && !ip.later_frag", op->json_key);
5237 ds_put_format(&actions,
5238 "icmp4 {"
5239 "eth.dst <-> eth.src; "
5240 "icmp4.type = 11; /* Time exceeded */ "
5241 "icmp4.code = 0; /* TTL exceeded in transit */ "
5242 "ip4.dst = ip4.src; "
5243 "ip4.src = %s; "
5244 "ip.ttl = 255; "
5245 "next; };",
5246 op->lrp_networks.ipv4_addrs[i].addr_s);
5247 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 40,
5248 ds_cstr(&match), ds_cstr(&actions));
5249 }
5250
5251 /* ARP reply. These flows reply to ARP requests for the router's own
5252 * IP address. */
5253 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
5254 ds_clear(&match);
5255 ds_put_format(&match,
5256 "inport == %s && arp.spa == %s/%u && arp.tpa == %s"
5257 " && arp.op == 1",
5258 op->json_key,
5259 op->lrp_networks.ipv4_addrs[i].network_s,
5260 op->lrp_networks.ipv4_addrs[i].plen,
5261 op->lrp_networks.ipv4_addrs[i].addr_s);
5262
5263 if (op->od->l3dgw_port && op->od->l3redirect_port && op->peer
5264 && op->peer->od->localnet_port) {
5265 bool add_chassis_resident_check = false;
5266 if (op == op->od->l3dgw_port) {
5267 /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
5268 * should only be sent from the "redirect-chassis", so that
5269 * upstream MAC learning points to the "redirect-chassis".
5270 * Also need to avoid generation of multiple ARP responses
5271 * from different chassis. */
5272 add_chassis_resident_check = true;
5273 } else {
5274 /* Check if the option 'reside-on-redirect-chassis'
5275 * is set to true on the router port. If set to true
5276 * and if peer's logical switch has a localnet port, it
5277 * means the router pipeline for the packets from
5278 * peer's logical switch is be run on the chassis
5279 * hosting the gateway port and it should reply to the
5280 * ARP requests for the router port IPs.
5281 */
5282 add_chassis_resident_check = smap_get_bool(
5283 &op->nbrp->options,
5284 "reside-on-redirect-chassis", false);
5285 }
5286
5287 if (add_chassis_resident_check) {
5288 ds_put_format(&match, " && is_chassis_resident(%s)",
5289 op->od->l3redirect_port->json_key);
5290 }
5291 }
5292
5293 ds_clear(&actions);
5294 ds_put_format(&actions,
5295 "put_arp(inport, arp.spa, arp.sha); "
5296 "eth.dst = eth.src; "
5297 "eth.src = %s; "
5298 "arp.op = 2; /* ARP reply */ "
5299 "arp.tha = arp.sha; "
5300 "arp.sha = %s; "
5301 "arp.tpa = arp.spa; "
5302 "arp.spa = %s; "
5303 "outport = %s; "
5304 "flags.loopback = 1; "
5305 "output;",
5306 op->lrp_networks.ea_s,
5307 op->lrp_networks.ea_s,
5308 op->lrp_networks.ipv4_addrs[i].addr_s,
5309 op->json_key);
5310 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
5311 ds_cstr(&match), ds_cstr(&actions));
5312 }
5313
5314 /* Learn from ARP requests that were not directed at us. A typical
5315 * use case is GARP request handling. (A priority-90 flow will
5316 * respond to request to us and learn the sender's mac address.) */
5317 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
5318 ds_clear(&match);
5319 ds_put_format(&match,
5320 "inport == %s && arp.spa == %s/%u && arp.op == 1",
5321 op->json_key,
5322 op->lrp_networks.ipv4_addrs[i].network_s,
5323 op->lrp_networks.ipv4_addrs[i].plen);
5324 if (op->od->l3dgw_port && op == op->od->l3dgw_port
5325 && op->od->l3redirect_port) {
5326 ds_put_format(&match, " && is_chassis_resident(%s)",
5327 op->od->l3redirect_port->json_key);
5328 }
5329 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 80,
5330 ds_cstr(&match),
5331 "put_arp(inport, arp.spa, arp.sha);");
5332
5333 }
5334
5335 /* A set to hold all load-balancer vips that need ARP responses. */
5336 struct sset all_ips = SSET_INITIALIZER(&all_ips);
5337 int addr_family;
5338 get_router_load_balancer_ips(op->od, &all_ips, &addr_family);
5339
5340 const char *ip_address;
5341 SSET_FOR_EACH(ip_address, &all_ips) {
5342 ds_clear(&match);
5343 if (addr_family == AF_INET) {
5344 ds_put_format(&match,
5345 "inport == %s && arp.tpa == %s && arp.op == 1",
5346 op->json_key, ip_address);
5347 } else {
5348 ds_put_format(&match,
5349 "inport == %s && nd_ns && nd.target == %s",
5350 op->json_key, ip_address);
5351 }
5352
5353 ds_clear(&actions);
5354 if (addr_family == AF_INET) {
5355 ds_put_format(&actions,
5356 "eth.dst = eth.src; "
5357 "eth.src = %s; "
5358 "arp.op = 2; /* ARP reply */ "
5359 "arp.tha = arp.sha; "
5360 "arp.sha = %s; "
5361 "arp.tpa = arp.spa; "
5362 "arp.spa = %s; "
5363 "outport = %s; "
5364 "flags.loopback = 1; "
5365 "output;",
5366 op->lrp_networks.ea_s,
5367 op->lrp_networks.ea_s,
5368 ip_address,
5369 op->json_key);
5370 } else {
5371 ds_put_format(&actions,
5372 "nd_na { "
5373 "eth.src = %s; "
5374 "ip6.src = %s; "
5375 "nd.target = %s; "
5376 "nd.tll = %s; "
5377 "outport = inport; "
5378 "flags.loopback = 1; "
5379 "output; "
5380 "};",
5381 op->lrp_networks.ea_s,
5382 ip_address,
5383 ip_address,
5384 op->lrp_networks.ea_s);
5385 }
5386 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
5387 ds_cstr(&match), ds_cstr(&actions));
5388 }
5389
5390 sset_destroy(&all_ips);
5391
5392 /* A gateway router can have 2 SNAT IP addresses to force DNATed and
5393 * LBed traffic respectively to be SNATed. In addition, there can be
5394 * a number of SNAT rules in the NAT table. */
5395 ovs_be32 *snat_ips = xmalloc(sizeof *snat_ips *
5396 (op->od->nbr->n_nat + 2));
5397 size_t n_snat_ips = 0;
5398
5399 ovs_be32 snat_ip;
5400 const char *dnat_force_snat_ip = get_force_snat_ip(op->od, "dnat",
5401 &snat_ip);
5402 if (dnat_force_snat_ip) {
5403 snat_ips[n_snat_ips++] = snat_ip;
5404 }
5405
5406 const char *lb_force_snat_ip = get_force_snat_ip(op->od, "lb",
5407 &snat_ip);
5408 if (lb_force_snat_ip) {
5409 snat_ips[n_snat_ips++] = snat_ip;
5410 }
5411
5412 for (int i = 0; i < op->od->nbr->n_nat; i++) {
5413 const struct nbrec_nat *nat;
5414
5415 nat = op->od->nbr->nat[i];
5416
5417 ovs_be32 ip;
5418 if (!ip_parse(nat->external_ip, &ip) || !ip) {
5419 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
5420 VLOG_WARN_RL(&rl, "bad ip address %s in nat configuration "
5421 "for router %s", nat->external_ip, op->key);
5422 continue;
5423 }
5424
5425 if (!strcmp(nat->type, "snat")) {
5426 snat_ips[n_snat_ips++] = ip;
5427 continue;
5428 }
5429
5430 /* ARP handling for external IP addresses.
5431 *
5432 * DNAT IP addresses are external IP addresses that need ARP
5433 * handling. */
5434 ds_clear(&match);
5435 ds_put_format(&match,
5436 "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1",
5437 op->json_key, IP_ARGS(ip));
5438
5439 ds_clear(&actions);
5440 ds_put_format(&actions,
5441 "eth.dst = eth.src; "
5442 "arp.op = 2; /* ARP reply */ "
5443 "arp.tha = arp.sha; ");
5444
5445 if (op->od->l3dgw_port && op == op->od->l3dgw_port) {
5446 struct eth_addr mac;
5447 if (nat->external_mac &&
5448 eth_addr_from_string(nat->external_mac, &mac)
5449 && nat->logical_port) {
5450 /* distributed NAT case, use nat->external_mac */
5451 ds_put_format(&actions,
5452 "eth.src = "ETH_ADDR_FMT"; "
5453 "arp.sha = "ETH_ADDR_FMT"; ",
5454 ETH_ADDR_ARGS(mac),
5455 ETH_ADDR_ARGS(mac));
5456 /* Traffic with eth.src = nat->external_mac should only be
5457 * sent from the chassis where nat->logical_port is
5458 * resident, so that upstream MAC learning points to the
5459 * correct chassis. Also need to avoid generation of
5460 * multiple ARP responses from different chassis. */
5461 ds_put_format(&match, " && is_chassis_resident(\"%s\")",
5462 nat->logical_port);
5463 } else {
5464 ds_put_format(&actions,
5465 "eth.src = %s; "
5466 "arp.sha = %s; ",
5467 op->lrp_networks.ea_s,
5468 op->lrp_networks.ea_s);
5469 /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
5470 * should only be sent from the "redirect-chassis", so that
5471 * upstream MAC learning points to the "redirect-chassis".
5472 * Also need to avoid generation of multiple ARP responses
5473 * from different chassis. */
5474 if (op->od->l3redirect_port) {
5475 ds_put_format(&match, " && is_chassis_resident(%s)",
5476 op->od->l3redirect_port->json_key);
5477 }
5478 }
5479 } else {
5480 ds_put_format(&actions,
5481 "eth.src = %s; "
5482 "arp.sha = %s; ",
5483 op->lrp_networks.ea_s,
5484 op->lrp_networks.ea_s);
5485 }
5486 ds_put_format(&actions,
5487 "arp.tpa = arp.spa; "
5488 "arp.spa = "IP_FMT"; "
5489 "outport = %s; "
5490 "flags.loopback = 1; "
5491 "output;",
5492 IP_ARGS(ip),
5493 op->json_key);
5494 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
5495 ds_cstr(&match), ds_cstr(&actions));
5496 }
5497
5498 if (!smap_get(&op->od->nbr->options, "chassis")
5499 && !op->od->l3dgw_port) {
5500 /* UDP/TCP port unreachable. */
5501 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
5502 ds_clear(&match);
5503 ds_put_format(&match,
5504 "ip4 && ip4.dst == %s && !ip.later_frag && udp",
5505 op->lrp_networks.ipv4_addrs[i].addr_s);
5506 const char *action = "icmp4 {"
5507 "eth.dst <-> eth.src; "
5508 "ip4.dst <-> ip4.src; "
5509 "ip.ttl = 255; "
5510 "icmp4.type = 3; "
5511 "icmp4.code = 3; "
5512 "next; };";
5513 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 80,
5514 ds_cstr(&match), action);
5515
5516 ds_clear(&match);
5517 ds_put_format(&match,
5518 "ip4 && ip4.dst == %s && !ip.later_frag && tcp",
5519 op->lrp_networks.ipv4_addrs[i].addr_s);
5520 action = "tcp_reset {"
5521 "eth.dst <-> eth.src; "
5522 "ip4.dst <-> ip4.src; "
5523 "next; };";
5524 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 80,
5525 ds_cstr(&match), action);
5526
5527 ds_clear(&match);
5528 ds_put_format(&match,
5529 "ip4 && ip4.dst == %s && !ip.later_frag",
5530 op->lrp_networks.ipv4_addrs[i].addr_s);
5531 action = "icmp4 {"
5532 "eth.dst <-> eth.src; "
5533 "ip4.dst <-> ip4.src; "
5534 "ip.ttl = 255; "
5535 "icmp4.type = 3; "
5536 "icmp4.code = 2; "
5537 "next; };";
5538 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 70,
5539 ds_cstr(&match), action);
5540 }
5541 }
5542
5543 ds_clear(&match);
5544 ds_put_cstr(&match, "ip4.dst == {");
5545 bool has_drop_ips = false;
5546 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
5547 bool snat_ip_is_router_ip = false;
5548 for (int j = 0; j < n_snat_ips; j++) {
5549 /* Packets to SNAT IPs should not be dropped. */
5550 if (op->lrp_networks.ipv4_addrs[i].addr == snat_ips[j]) {
5551 snat_ip_is_router_ip = true;
5552 break;
5553 }
5554 }
5555 if (snat_ip_is_router_ip) {
5556 continue;
5557 }
5558 ds_put_format(&match, "%s, ",
5559 op->lrp_networks.ipv4_addrs[i].addr_s);
5560 has_drop_ips = true;
5561 }
5562 ds_chomp(&match, ' ');
5563 ds_chomp(&match, ',');
5564 ds_put_cstr(&match, "}");
5565
5566 if (has_drop_ips) {
5567 /* Drop IP traffic to this router. */
5568 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
5569 ds_cstr(&match), "drop;");
5570 }
5571
5572 free(snat_ips);
5573 }
5574
5575 /* Logical router ingress table 1: IP Input for IPv6. */
5576 HMAP_FOR_EACH (op, key_node, ports) {
5577 if (!op->nbrp) {
5578 continue;
5579 }
5580
5581 if (op->derived) {
5582 /* No ingress packets are accepted on a chassisredirect
5583 * port, so no need to program flows for that port. */
5584 continue;
5585 }
5586
5587 if (op->lrp_networks.n_ipv6_addrs) {
5588 /* L3 admission control: drop packets that originate from an
5589 * IPv6 address owned by the router (priority 100). */
5590 ds_clear(&match);
5591 ds_put_cstr(&match, "ip6.src == ");
5592 op_put_v6_networks(&match, op);
5593 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
5594 ds_cstr(&match), "drop;");
5595
5596 /* ICMPv6 echo reply. These flows reply to echo requests
5597 * received for the router's IP address. */
5598 ds_clear(&match);
5599 ds_put_cstr(&match, "ip6.dst == ");
5600 op_put_v6_networks(&match, op);
5601 ds_put_cstr(&match, " && icmp6.type == 128 && icmp6.code == 0");
5602
5603 ds_clear(&actions);
5604 ds_put_cstr(&actions,
5605 "ip6.dst <-> ip6.src; "
5606 "ip.ttl = 255; "
5607 "icmp6.type = 129; "
5608 "flags.loopback = 1; "
5609 "next; ");
5610 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
5611 ds_cstr(&match), ds_cstr(&actions));
5612
5613 /* Drop IPv6 traffic to this router. */
5614 ds_clear(&match);
5615 ds_put_cstr(&match, "ip6.dst == ");
5616 op_put_v6_networks(&match, op);
5617 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
5618 ds_cstr(&match), "drop;");
5619 }
5620
5621 /* ND reply. These flows reply to ND solicitations for the
5622 * router's own IP address. */
5623 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
5624 ds_clear(&match);
5625 ds_put_format(&match,
5626 "inport == %s && nd_ns && ip6.dst == {%s, %s} "
5627 "&& nd.target == %s",
5628 op->json_key,
5629 op->lrp_networks.ipv6_addrs[i].addr_s,
5630 op->lrp_networks.ipv6_addrs[i].sn_addr_s,
5631 op->lrp_networks.ipv6_addrs[i].addr_s);
5632 if (op->od->l3dgw_port && op == op->od->l3dgw_port
5633 && op->od->l3redirect_port) {
5634 /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
5635 * should only be sent from the "redirect-chassis", so that
5636 * upstream MAC learning points to the "redirect-chassis".
5637 * Also need to avoid generation of multiple ND replies
5638 * from different chassis. */
5639 ds_put_format(&match, " && is_chassis_resident(%s)",
5640 op->od->l3redirect_port->json_key);
5641 }
5642
5643 ds_clear(&actions);
5644 ds_put_format(&actions,
5645 "put_nd(inport, ip6.src, nd.sll); "
5646 "nd_na_router { "
5647 "eth.src = %s; "
5648 "ip6.src = %s; "
5649 "nd.target = %s; "
5650 "nd.tll = %s; "
5651 "outport = inport; "
5652 "flags.loopback = 1; "
5653 "output; "
5654 "};",
5655 op->lrp_networks.ea_s,
5656 op->lrp_networks.ipv6_addrs[i].addr_s,
5657 op->lrp_networks.ipv6_addrs[i].addr_s,
5658 op->lrp_networks.ea_s);
5659 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
5660 ds_cstr(&match), ds_cstr(&actions));
5661 }
5662
5663 /* UDP/TCP port unreachable */
5664 if (!smap_get(&op->od->nbr->options, "chassis")
5665 && !op->od->l3dgw_port) {
5666 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
5667 ds_clear(&match);
5668 ds_put_format(&match,
5669 "ip6 && ip6.dst == %s && !ip.later_frag && tcp",
5670 op->lrp_networks.ipv6_addrs[i].addr_s);
5671 const char *action = "tcp_reset {"
5672 "eth.dst <-> eth.src; "
5673 "ip6.dst <-> ip6.src; "
5674 "next; };";
5675 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 80,
5676 ds_cstr(&match), action);
5677
5678 ds_clear(&match);
5679 ds_put_format(&match,
5680 "ip6 && ip6.dst == %s && !ip.later_frag && udp",
5681 op->lrp_networks.ipv6_addrs[i].addr_s);
5682 action = "icmp6 {"
5683 "eth.dst <-> eth.src; "
5684 "ip6.dst <-> ip6.src; "
5685 "ip.ttl = 255; "
5686 "icmp6.type = 1; "
5687 "icmp6.code = 4; "
5688 "next; };";
5689 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 80,
5690 ds_cstr(&match), action);
5691
5692 ds_clear(&match);
5693 ds_put_format(&match,
5694 "ip6 && ip6.dst == %s && !ip.later_frag",
5695 op->lrp_networks.ipv6_addrs[i].addr_s);
5696 action = "icmp6 {"
5697 "eth.dst <-> eth.src; "
5698 "ip6.dst <-> ip6.src; "
5699 "ip.ttl = 255; "
5700 "icmp6.type = 1; "
5701 "icmp6.code = 3; "
5702 "next; };";
5703 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 70,
5704 ds_cstr(&match), action);
5705 }
5706 }
5707
5708 /* ICMPv6 time exceeded */
5709 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
5710 /* skip link-local address */
5711 if (in6_is_lla(&op->lrp_networks.ipv6_addrs[i].network)) {
5712 continue;
5713 }
5714
5715 ds_clear(&match);
5716 ds_clear(&actions);
5717
5718 ds_put_format(&match,
5719 "inport == %s && ip6 && "
5720 "ip6.src == %s/%d && "
5721 "ip.ttl == {0, 1} && !ip.later_frag",
5722 op->json_key,
5723 op->lrp_networks.ipv6_addrs[i].network_s,
5724 op->lrp_networks.ipv6_addrs[i].plen);
5725 ds_put_format(&actions,
5726 "icmp6 {"
5727 "eth.dst <-> eth.src; "
5728 "ip6.dst = ip6.src; "
5729 "ip6.src = %s; "
5730 "ip.ttl = 255; "
5731 "icmp6.type = 3; /* Time exceeded */ "
5732 "icmp6.code = 0; /* TTL exceeded in transit */ "
5733 "next; };",
5734 op->lrp_networks.ipv6_addrs[i].addr_s);
5735 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 40,
5736 ds_cstr(&match), ds_cstr(&actions));
5737 }
5738 }
5739
5740 /* NAT, Defrag and load balancing. */
5741 HMAP_FOR_EACH (od, key_node, datapaths) {
5742 if (!od->nbr) {
5743 continue;
5744 }
5745
5746 /* Packets are allowed by default. */
5747 ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG, 0, "1", "next;");
5748 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;");
5749 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;");
5750 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;");
5751 ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 0, "1", "next;");
5752 ovn_lflow_add(lflows, od, S_ROUTER_OUT_EGR_LOOP, 0, "1", "next;");
5753
5754 /* NAT rules are only valid on Gateway routers and routers with
5755 * l3dgw_port (router has a port with "redirect-chassis"
5756 * specified). */
5757 if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) {
5758 continue;
5759 }
5760
5761 ovs_be32 snat_ip;
5762 const char *dnat_force_snat_ip = get_force_snat_ip(od, "dnat",
5763 &snat_ip);
5764 const char *lb_force_snat_ip = get_force_snat_ip(od, "lb",
5765 &snat_ip);
5766
5767 for (int i = 0; i < od->nbr->n_nat; i++) {
5768 const struct nbrec_nat *nat;
5769
5770 nat = od->nbr->nat[i];
5771
5772 ovs_be32 ip, mask;
5773
5774 char *error = ip_parse_masked(nat->external_ip, &ip, &mask);
5775 if (error || mask != OVS_BE32_MAX) {
5776 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
5777 VLOG_WARN_RL(&rl, "bad external ip %s for nat",
5778 nat->external_ip);
5779 free(error);
5780 continue;
5781 }
5782
5783 /* Check the validity of nat->logical_ip. 'logical_ip' can
5784 * be a subnet when the type is "snat". */
5785 error = ip_parse_masked(nat->logical_ip, &ip, &mask);
5786 if (!strcmp(nat->type, "snat")) {
5787 if (error) {
5788 static struct vlog_rate_limit rl =
5789 VLOG_RATE_LIMIT_INIT(5, 1);
5790 VLOG_WARN_RL(&rl, "bad ip network or ip %s for snat "
5791 "in router "UUID_FMT"",
5792 nat->logical_ip, UUID_ARGS(&od->key));
5793 free(error);
5794 continue;
5795 }
5796 } else {
5797 if (error || mask != OVS_BE32_MAX) {
5798 static struct vlog_rate_limit rl =
5799 VLOG_RATE_LIMIT_INIT(5, 1);
5800 VLOG_WARN_RL(&rl, "bad ip %s for dnat in router "
5801 ""UUID_FMT"", nat->logical_ip, UUID_ARGS(&od->key));
5802 free(error);
5803 continue;
5804 }
5805 }
5806
5807 /* For distributed router NAT, determine whether this NAT rule
5808 * satisfies the conditions for distributed NAT processing. */
5809 bool distributed = false;
5810 struct eth_addr mac;
5811 if (od->l3dgw_port && !strcmp(nat->type, "dnat_and_snat") &&
5812 nat->logical_port && nat->external_mac) {
5813 if (eth_addr_from_string(nat->external_mac, &mac)) {
5814 distributed = true;
5815 } else {
5816 static struct vlog_rate_limit rl =
5817 VLOG_RATE_LIMIT_INIT(5, 1);
5818 VLOG_WARN_RL(&rl, "bad mac %s for dnat in router "
5819 ""UUID_FMT"", nat->external_mac, UUID_ARGS(&od->key));
5820 continue;
5821 }
5822 }
5823
5824 /* Ingress UNSNAT table: It is for already established connections'
5825 * reverse traffic. i.e., SNAT has already been done in egress
5826 * pipeline and now the packet has entered the ingress pipeline as
5827 * part of a reply. We undo the SNAT here.
5828 *
5829 * Undoing SNAT has to happen before DNAT processing. This is
5830 * because when the packet was DNATed in ingress pipeline, it did
5831 * not know about the possibility of eventual additional SNAT in
5832 * egress pipeline. */
5833 if (!strcmp(nat->type, "snat")
5834 || !strcmp(nat->type, "dnat_and_snat")) {
5835 if (!od->l3dgw_port) {
5836 /* Gateway router. */
5837 ds_clear(&match);
5838 ds_put_format(&match, "ip && ip4.dst == %s",
5839 nat->external_ip);
5840 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 90,
5841 ds_cstr(&match), "ct_snat;");
5842 } else {
5843 /* Distributed router. */
5844
5845 /* Traffic received on l3dgw_port is subject to NAT. */
5846 ds_clear(&match);
5847 ds_put_format(&match, "ip && ip4.dst == %s"
5848 " && inport == %s",
5849 nat->external_ip,
5850 od->l3dgw_port->json_key);
5851 if (!distributed && od->l3redirect_port) {
5852 /* Flows for NAT rules that are centralized are only
5853 * programmed on the "redirect-chassis". */
5854 ds_put_format(&match, " && is_chassis_resident(%s)",
5855 od->l3redirect_port->json_key);
5856 }
5857 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 100,
5858 ds_cstr(&match), "ct_snat;");
5859
5860 /* Traffic received on other router ports must be
5861 * redirected to the central instance of the l3dgw_port
5862 * for NAT processing. */
5863 ds_clear(&match);
5864 ds_put_format(&match, "ip && ip4.dst == %s",
5865 nat->external_ip);
5866 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 50,
5867 ds_cstr(&match),
5868 REGBIT_NAT_REDIRECT" = 1; next;");
5869 }
5870 }
5871
5872 /* Ingress DNAT table: Packets enter the pipeline with destination
5873 * IP address that needs to be DNATted from a external IP address
5874 * to a logical IP address. */
5875 if (!strcmp(nat->type, "dnat")
5876 || !strcmp(nat->type, "dnat_and_snat")) {
5877 if (!od->l3dgw_port) {
5878 /* Gateway router. */
5879 /* Packet when it goes from the initiator to destination.
5880 * We need to set flags.loopback because the router can
5881 * send the packet back through the same interface. */
5882 ds_clear(&match);
5883 ds_put_format(&match, "ip && ip4.dst == %s",
5884 nat->external_ip);
5885 ds_clear(&actions);
5886 if (dnat_force_snat_ip) {
5887 /* Indicate to the future tables that a DNAT has taken
5888 * place and a force SNAT needs to be done in the
5889 * Egress SNAT table. */
5890 ds_put_format(&actions,
5891 "flags.force_snat_for_dnat = 1; ");
5892 }
5893 ds_put_format(&actions, "flags.loopback = 1; ct_dnat(%s);",
5894 nat->logical_ip);
5895 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100,
5896 ds_cstr(&match), ds_cstr(&actions));
5897 } else {
5898 /* Distributed router. */
5899
5900 /* Traffic received on l3dgw_port is subject to NAT. */
5901 ds_clear(&match);
5902 ds_put_format(&match, "ip && ip4.dst == %s"
5903 " && inport == %s",
5904 nat->external_ip,
5905 od->l3dgw_port->json_key);
5906 if (!distributed && od->l3redirect_port) {
5907 /* Flows for NAT rules that are centralized are only
5908 * programmed on the "redirect-chassis". */
5909 ds_put_format(&match, " && is_chassis_resident(%s)",
5910 od->l3redirect_port->json_key);
5911 }
5912 ds_clear(&actions);
5913 ds_put_format(&actions, "ct_dnat(%s);",
5914 nat->logical_ip);
5915 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100,
5916 ds_cstr(&match), ds_cstr(&actions));
5917
5918 /* Traffic received on other router ports must be
5919 * redirected to the central instance of the l3dgw_port
5920 * for NAT processing. */
5921 ds_clear(&match);
5922 ds_put_format(&match, "ip && ip4.dst == %s",
5923 nat->external_ip);
5924 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50,
5925 ds_cstr(&match),
5926 REGBIT_NAT_REDIRECT" = 1; next;");
5927 }
5928 }
5929
5930 /* Egress UNDNAT table: It is for already established connections'
5931 * reverse traffic. i.e., DNAT has already been done in ingress
5932 * pipeline and now the packet has entered the egress pipeline as
5933 * part of a reply. We undo the DNAT here.
5934 *
5935 * Note that this only applies for NAT on a distributed router.
5936 * Undo DNAT on a gateway router is done in the ingress DNAT
5937 * pipeline stage. */
5938 if (od->l3dgw_port && (!strcmp(nat->type, "dnat")
5939 || !strcmp(nat->type, "dnat_and_snat"))) {
5940 ds_clear(&match);
5941 ds_put_format(&match, "ip && ip4.src == %s"
5942 " && outport == %s",
5943 nat->logical_ip,
5944 od->l3dgw_port->json_key);
5945 if (!distributed && od->l3redirect_port) {
5946 /* Flows for NAT rules that are centralized are only
5947 * programmed on the "redirect-chassis". */
5948 ds_put_format(&match, " && is_chassis_resident(%s)",
5949 od->l3redirect_port->json_key);
5950 }
5951 ds_clear(&actions);
5952 if (distributed) {
5953 ds_put_format(&actions, "eth.src = "ETH_ADDR_FMT"; ",
5954 ETH_ADDR_ARGS(mac));
5955 }
5956 ds_put_format(&actions, "ct_dnat;");
5957 ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 100,
5958 ds_cstr(&match), ds_cstr(&actions));
5959 }
5960
5961 /* Egress SNAT table: Packets enter the egress pipeline with
5962 * source ip address that needs to be SNATted to a external ip
5963 * address. */
5964 if (!strcmp(nat->type, "snat")
5965 || !strcmp(nat->type, "dnat_and_snat")) {
5966 if (!od->l3dgw_port) {
5967 /* Gateway router. */
5968 ds_clear(&match);
5969 ds_put_format(&match, "ip && ip4.src == %s",
5970 nat->logical_ip);
5971 ds_clear(&actions);
5972 ds_put_format(&actions, "ct_snat(%s);", nat->external_ip);
5973
5974 /* The priority here is calculated such that the
5975 * nat->logical_ip with the longest mask gets a higher
5976 * priority. */
5977 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT,
5978 count_1bits(ntohl(mask)) + 1,
5979 ds_cstr(&match), ds_cstr(&actions));
5980 } else {
5981 /* Distributed router. */
5982 ds_clear(&match);
5983 ds_put_format(&match, "ip && ip4.src == %s"
5984 " && outport == %s",
5985 nat->logical_ip,
5986 od->l3dgw_port->json_key);
5987 if (!distributed && od->l3redirect_port) {
5988 /* Flows for NAT rules that are centralized are only
5989 * programmed on the "redirect-chassis". */
5990 ds_put_format(&match, " && is_chassis_resident(%s)",
5991 od->l3redirect_port->json_key);
5992 }
5993 ds_clear(&actions);
5994 if (distributed) {
5995 ds_put_format(&actions, "eth.src = "ETH_ADDR_FMT"; ",
5996 ETH_ADDR_ARGS(mac));
5997 }
5998 ds_put_format(&actions, "ct_snat(%s);", nat->external_ip);
5999
6000 /* The priority here is calculated such that the
6001 * nat->logical_ip with the longest mask gets a higher
6002 * priority. */
6003 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT,
6004 count_1bits(ntohl(mask)) + 1,
6005 ds_cstr(&match), ds_cstr(&actions));
6006 }
6007 }
6008
6009 /* Logical router ingress table 0:
6010 * For NAT on a distributed router, add rules allowing
6011 * ingress traffic with eth.dst matching nat->external_mac
6012 * on the l3dgw_port instance where nat->logical_port is
6013 * resident. */
6014 if (distributed) {
6015 ds_clear(&match);
6016 ds_put_format(&match,
6017 "eth.dst == "ETH_ADDR_FMT" && inport == %s"
6018 " && is_chassis_resident(\"%s\")",
6019 ETH_ADDR_ARGS(mac),
6020 od->l3dgw_port->json_key,
6021 nat->logical_port);
6022 ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 50,
6023 ds_cstr(&match), "next;");
6024 }
6025
6026 /* Ingress Gateway Redirect Table: For NAT on a distributed
6027 * router, add flows that are specific to a NAT rule. These
6028 * flows indicate the presence of an applicable NAT rule that
6029 * can be applied in a distributed manner. */
6030 if (distributed) {
6031 ds_clear(&match);
6032 ds_put_format(&match, "ip4.src == %s && outport == %s",
6033 nat->logical_ip,
6034 od->l3dgw_port->json_key);
6035 ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 100,
6036 ds_cstr(&match), "next;");
6037 }
6038
6039 /* Egress Loopback table: For NAT on a distributed router.
6040 * If packets in the egress pipeline on the distributed
6041 * gateway port have ip.dst matching a NAT external IP, then
6042 * loop a clone of the packet back to the beginning of the
6043 * ingress pipeline with inport = outport. */
6044 if (od->l3dgw_port) {
6045 /* Distributed router. */
6046 ds_clear(&match);
6047 ds_put_format(&match, "ip4.dst == %s && outport == %s",
6048 nat->external_ip,
6049 od->l3dgw_port->json_key);
6050 ds_clear(&actions);
6051 ds_put_format(&actions,
6052 "clone { ct_clear; "
6053 "inport = outport; outport = \"\"; "
6054 "flags = 0; flags.loopback = 1; ");
6055 for (int j = 0; j < MFF_N_LOG_REGS; j++) {
6056 ds_put_format(&actions, "reg%d = 0; ", j);
6057 }
6058 ds_put_format(&actions, REGBIT_EGRESS_LOOPBACK" = 1; "
6059 "next(pipeline=ingress, table=0); };");
6060 ovn_lflow_add(lflows, od, S_ROUTER_OUT_EGR_LOOP, 100,
6061 ds_cstr(&match), ds_cstr(&actions));
6062 }
6063 }
6064
6065 /* Handle force SNAT options set in the gateway router. */
6066 if (dnat_force_snat_ip && !od->l3dgw_port) {
6067 /* If a packet with destination IP address as that of the
6068 * gateway router (as set in options:dnat_force_snat_ip) is seen,
6069 * UNSNAT it. */
6070 ds_clear(&match);
6071 ds_put_format(&match, "ip && ip4.dst == %s", dnat_force_snat_ip);
6072 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 110,
6073 ds_cstr(&match), "ct_snat;");
6074
6075 /* Higher priority rules to force SNAT with the IP addresses
6076 * configured in the Gateway router. This only takes effect
6077 * when the packet has already been DNATed once. */
6078 ds_clear(&match);
6079 ds_put_format(&match, "flags.force_snat_for_dnat == 1 && ip");
6080 ds_clear(&actions);
6081 ds_put_format(&actions, "ct_snat(%s);", dnat_force_snat_ip);
6082 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 100,
6083 ds_cstr(&match), ds_cstr(&actions));
6084 }
6085 if (lb_force_snat_ip && !od->l3dgw_port) {
6086 /* If a packet with destination IP address as that of the
6087 * gateway router (as set in options:lb_force_snat_ip) is seen,
6088 * UNSNAT it. */
6089 ds_clear(&match);
6090 ds_put_format(&match, "ip && ip4.dst == %s", lb_force_snat_ip);
6091 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 100,
6092 ds_cstr(&match), "ct_snat;");
6093
6094 /* Load balanced traffic will have flags.force_snat_for_lb set.
6095 * Force SNAT it. */
6096 ds_clear(&match);
6097 ds_put_format(&match, "flags.force_snat_for_lb == 1 && ip");
6098 ds_clear(&actions);
6099 ds_put_format(&actions, "ct_snat(%s);", lb_force_snat_ip);
6100 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 100,
6101 ds_cstr(&match), ds_cstr(&actions));
6102 }
6103
6104 if (!od->l3dgw_port) {
6105 /* For gateway router, re-circulate every packet through
6106 * the DNAT zone. This helps with the following.
6107 *
6108 * Any packet that needs to be unDNATed in the reverse
6109 * direction gets unDNATed. Ideally this could be done in
6110 * the egress pipeline. But since the gateway router
6111 * does not have any feature that depends on the source
6112 * ip address being external IP address for IP routing,
6113 * we can do it here, saving a future re-circulation. */
6114 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50,
6115 "ip", "flags.loopback = 1; ct_dnat;");
6116 } else {
6117 /* For NAT on a distributed router, add flows to Ingress
6118 * IP Routing table, Ingress ARP Resolution table, and
6119 * Ingress Gateway Redirect Table that are not specific to a
6120 * NAT rule. */
6121
6122 /* The highest priority IN_IP_ROUTING rule matches packets
6123 * with REGBIT_NAT_REDIRECT (set in DNAT or UNSNAT stages),
6124 * with action "ip.ttl--; next;". The IN_GW_REDIRECT table
6125 * will take care of setting the outport. */
6126 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 300,
6127 REGBIT_NAT_REDIRECT" == 1", "ip.ttl--; next;");
6128
6129 /* The highest priority IN_ARP_RESOLVE rule matches packets
6130 * with REGBIT_NAT_REDIRECT (set in DNAT or UNSNAT stages),
6131 * then sets eth.dst to the distributed gateway port's
6132 * ethernet address. */
6133 ds_clear(&actions);
6134 ds_put_format(&actions, "eth.dst = %s; next;",
6135 od->l3dgw_port->lrp_networks.ea_s);
6136 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 200,
6137 REGBIT_NAT_REDIRECT" == 1", ds_cstr(&actions));
6138
6139 /* The highest priority IN_GW_REDIRECT rule redirects packets
6140 * with REGBIT_NAT_REDIRECT (set in DNAT or UNSNAT stages) to
6141 * the central instance of the l3dgw_port for NAT processing. */
6142 ds_clear(&actions);
6143 ds_put_format(&actions, "outport = %s; next;",
6144 od->l3redirect_port->json_key);
6145 ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 200,
6146 REGBIT_NAT_REDIRECT" == 1", ds_cstr(&actions));
6147 }
6148
6149 /* Load balancing and packet defrag are only valid on
6150 * Gateway routers or router with gateway port. */
6151 if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) {
6152 continue;
6153 }
6154
6155 /* A set to hold all ips that need defragmentation and tracking. */
6156 struct sset all_ips = SSET_INITIALIZER(&all_ips);
6157
6158 for (int i = 0; i < od->nbr->n_load_balancer; i++) {
6159 struct nbrec_load_balancer *lb = od->nbr->load_balancer[i];
6160 struct smap *vips = &lb->vips;
6161 struct smap_node *node;
6162
6163 SMAP_FOR_EACH (node, vips) {
6164 uint16_t port = 0;
6165 int addr_family;
6166
6167 /* node->key contains IP:port or just IP. */
6168 char *ip_address = NULL;
6169 ip_address_and_port_from_lb_key(node->key, &ip_address, &port,
6170 &addr_family);
6171 if (!ip_address) {
6172 continue;
6173 }
6174
6175 if (!sset_contains(&all_ips, ip_address)) {
6176 sset_add(&all_ips, ip_address);
6177 /* If there are any load balancing rules, we should send
6178 * the packet to conntrack for defragmentation and
6179 * tracking. This helps with two things.
6180 *
6181 * 1. With tracking, we can send only new connections to
6182 * pick a DNAT ip address from a group.
6183 * 2. If there are L4 ports in load balancing rules, we
6184 * need the defragmentation to match on L4 ports. */
6185 ds_clear(&match);
6186 if (addr_family == AF_INET) {
6187 ds_put_format(&match, "ip && ip4.dst == %s",
6188 ip_address);
6189 } else {
6190 ds_put_format(&match, "ip && ip6.dst == %s",
6191 ip_address);
6192 }
6193 ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG,
6194 100, ds_cstr(&match), "ct_next;");
6195 }
6196
6197 /* Higher priority rules are added for load-balancing in DNAT
6198 * table. For every match (on a VIP[:port]), we add two flows
6199 * via add_router_lb_flow(). One flow is for specific matching
6200 * on ct.new with an action of "ct_lb($targets);". The other
6201 * flow is for ct.est with an action of "ct_dnat;". */
6202 ds_clear(&actions);
6203 ds_put_format(&actions, "ct_lb(%s);", node->value);
6204
6205 ds_clear(&match);
6206 if (addr_family == AF_INET) {
6207 ds_put_format(&match, "ip && ip4.dst == %s",
6208 ip_address);
6209 } else {
6210 ds_put_format(&match, "ip && ip6.dst == %s",
6211 ip_address);
6212 }
6213 free(ip_address);
6214
6215 int prio = 110;
6216 bool is_udp = lb->protocol && !strcmp(lb->protocol, "udp") ?
6217 true : false;
6218 if (port) {
6219 if (is_udp) {
6220 ds_put_format(&match, " && udp && udp.dst == %d",
6221 port);
6222 } else {
6223 ds_put_format(&match, " && tcp && tcp.dst == %d",
6224 port);
6225 }
6226 prio = 120;
6227 }
6228
6229 if (od->l3redirect_port) {
6230 ds_put_format(&match, " && is_chassis_resident(%s)",
6231 od->l3redirect_port->json_key);
6232 }
6233 add_router_lb_flow(lflows, od, &match, &actions, prio,
6234 lb_force_snat_ip, node->value, is_udp,
6235 addr_family);
6236 }
6237 }
6238 sset_destroy(&all_ips);
6239 }
6240
6241 /* Logical router ingress table 5 and 6: IPv6 Router Adv (RA) options and
6242 * response. */
6243 HMAP_FOR_EACH (op, key_node, ports) {
6244 if (!op->nbrp || op->nbrp->peer || !op->peer) {
6245 continue;
6246 }
6247
6248 if (!op->lrp_networks.n_ipv6_addrs) {
6249 continue;
6250 }
6251
6252 const char *address_mode = smap_get(
6253 &op->nbrp->ipv6_ra_configs, "address_mode");
6254
6255 if (!address_mode) {
6256 continue;
6257 }
6258 if (strcmp(address_mode, "slaac") &&
6259 strcmp(address_mode, "dhcpv6_stateful") &&
6260 strcmp(address_mode, "dhcpv6_stateless")) {
6261 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
6262 VLOG_WARN_RL(&rl, "Invalid address mode [%s] defined",
6263 address_mode);
6264 continue;
6265 }
6266
6267 if (smap_get_bool(&op->nbrp->ipv6_ra_configs, "send_periodic",
6268 false)) {
6269 copy_ra_to_sb(op, address_mode);
6270 }
6271
6272 ds_clear(&match);
6273 ds_put_format(&match, "inport == %s && ip6.dst == ff02::2 && nd_rs",
6274 op->json_key);
6275 ds_clear(&actions);
6276
6277 const char *mtu_s = smap_get(
6278 &op->nbrp->ipv6_ra_configs, "mtu");
6279
6280 /* As per RFC 2460, 1280 is minimum IPv6 MTU. */
6281 uint32_t mtu = (mtu_s && atoi(mtu_s) >= 1280) ? atoi(mtu_s) : 0;
6282
6283 ds_put_format(&actions, REGBIT_ND_RA_OPTS_RESULT" = put_nd_ra_opts("
6284 "addr_mode = \"%s\", slla = %s",
6285 address_mode, op->lrp_networks.ea_s);
6286 if (mtu > 0) {
6287 ds_put_format(&actions, ", mtu = %u", mtu);
6288 }
6289
6290 bool add_rs_response_flow = false;
6291
6292 for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
6293 if (in6_is_lla(&op->lrp_networks.ipv6_addrs[i].network)) {
6294 continue;
6295 }
6296
6297 /* Add the prefix option if the address mode is slaac or
6298 * dhcpv6_stateless. */
6299 if (strcmp(address_mode, "dhcpv6_stateful")) {
6300 ds_put_format(&actions, ", prefix = %s/%u",
6301 op->lrp_networks.ipv6_addrs[i].network_s,
6302 op->lrp_networks.ipv6_addrs[i].plen);
6303 }
6304 add_rs_response_flow = true;
6305 }
6306
6307 if (add_rs_response_flow) {
6308 ds_put_cstr(&actions, "); next;");
6309 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ND_RA_OPTIONS, 50,
6310 ds_cstr(&match), ds_cstr(&actions));
6311 ds_clear(&actions);
6312 ds_clear(&match);
6313 ds_put_format(&match, "inport == %s && ip6.dst == ff02::2 && "
6314 "nd_ra && "REGBIT_ND_RA_OPTS_RESULT, op->json_key);
6315
6316 char ip6_str[INET6_ADDRSTRLEN + 1];
6317 struct in6_addr lla;
6318 in6_generate_lla(op->lrp_networks.ea, &lla);
6319 memset(ip6_str, 0, sizeof(ip6_str));
6320 ipv6_string_mapped(ip6_str, &lla);
6321 ds_put_format(&actions, "eth.dst = eth.src; eth.src = %s; "
6322 "ip6.dst = ip6.src; ip6.src = %s; "
6323 "outport = inport; flags.loopback = 1; "
6324 "output;",
6325 op->lrp_networks.ea_s, ip6_str);
6326 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ND_RA_RESPONSE, 50,
6327 ds_cstr(&match), ds_cstr(&actions));
6328 }
6329 }
6330
6331 /* Logical router ingress table 5, 6: RS responder, by default goto next.
6332 * (priority 0)*/
6333 HMAP_FOR_EACH (od, key_node, datapaths) {
6334 if (!od->nbr) {
6335 continue;
6336 }
6337
6338 ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_OPTIONS, 0, "1", "next;");
6339 ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_RESPONSE, 0, "1", "next;");
6340 }
6341
6342 /* Logical router ingress table 7: IP Routing.
6343 *
6344 * A packet that arrives at this table is an IP packet that should be
6345 * routed to the address in 'ip[46].dst'. This table sets outport to
6346 * the correct output port, eth.src to the output port's MAC
6347 * address, and '[xx]reg0' to the next-hop IP address (leaving
6348 * 'ip[46].dst', the packet’s final destination, unchanged), and
6349 * advances to the next table for ARP/ND resolution. */
6350 HMAP_FOR_EACH (op, key_node, ports) {
6351 if (!op->nbrp) {
6352 continue;
6353 }
6354
6355 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
6356 add_route(lflows, op, op->lrp_networks.ipv4_addrs[i].addr_s,
6357 op->lrp_networks.ipv4_addrs[i].network_s,
6358 op->lrp_networks.ipv4_addrs[i].plen, NULL, NULL);
6359 }
6360
6361 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
6362 add_route(lflows, op, op->lrp_networks.ipv6_addrs[i].addr_s,
6363 op->lrp_networks.ipv6_addrs[i].network_s,
6364 op->lrp_networks.ipv6_addrs[i].plen, NULL, NULL);
6365 }
6366 }
6367
6368 /* Convert the static routes to flows. */
6369 HMAP_FOR_EACH (od, key_node, datapaths) {
6370 if (!od->nbr) {
6371 continue;
6372 }
6373
6374 for (int i = 0; i < od->nbr->n_static_routes; i++) {
6375 const struct nbrec_logical_router_static_route *route;
6376
6377 route = od->nbr->static_routes[i];
6378 build_static_route_flow(lflows, od, ports, route);
6379 }
6380 }
6381
6382 /* XXX destination unreachable */
6383
6384 /* Local router ingress table 8: ARP Resolution.
6385 *
6386 * Any packet that reaches this table is an IP packet whose next-hop IP
6387 * address is in reg0. (ip4.dst is the final destination.) This table
6388 * resolves the IP address in reg0 into an output port in outport and an
6389 * Ethernet address in eth.dst. */
6390 HMAP_FOR_EACH (op, key_node, ports) {
6391 if (op->nbsp && !lsp_is_enabled(op->nbsp)) {
6392 continue;
6393 }
6394
6395 if (op->nbrp) {
6396 /* This is a logical router port. If next-hop IP address in
6397 * '[xx]reg0' matches IP address of this router port, then
6398 * the packet is intended to eventually be sent to this
6399 * logical port. Set the destination mac address using this
6400 * port's mac address.
6401 *
6402 * The packet is still in peer's logical pipeline. So the match
6403 * should be on peer's outport. */
6404 if (op->peer && op->nbrp->peer) {
6405 if (op->lrp_networks.n_ipv4_addrs) {
6406 ds_clear(&match);
6407 ds_put_format(&match, "outport == %s && reg0 == ",
6408 op->peer->json_key);
6409 op_put_v4_networks(&match, op, false);
6410
6411 ds_clear(&actions);
6412 ds_put_format(&actions, "eth.dst = %s; next;",
6413 op->lrp_networks.ea_s);
6414 ovn_lflow_add(lflows, op->peer->od, S_ROUTER_IN_ARP_RESOLVE,
6415 100, ds_cstr(&match), ds_cstr(&actions));
6416 }
6417
6418 if (op->lrp_networks.n_ipv6_addrs) {
6419 ds_clear(&match);
6420 ds_put_format(&match, "outport == %s && xxreg0 == ",
6421 op->peer->json_key);
6422 op_put_v6_networks(&match, op);
6423
6424 ds_clear(&actions);
6425 ds_put_format(&actions, "eth.dst = %s; next;",
6426 op->lrp_networks.ea_s);
6427 ovn_lflow_add(lflows, op->peer->od, S_ROUTER_IN_ARP_RESOLVE,
6428 100, ds_cstr(&match), ds_cstr(&actions));
6429 }
6430 }
6431 } else if (op->od->n_router_ports && strcmp(op->nbsp->type, "router")) {
6432 /* This is a logical switch port that backs a VM or a container.
6433 * Extract its addresses. For each of the address, go through all
6434 * the router ports attached to the switch (to which this port
6435 * connects) and if the address in question is reachable from the
6436 * router port, add an ARP/ND entry in that router's pipeline. */
6437
6438 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
6439 const char *ea_s = op->lsp_addrs[i].ea_s;
6440 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
6441 const char *ip_s = op->lsp_addrs[i].ipv4_addrs[j].addr_s;
6442 for (size_t k = 0; k < op->od->n_router_ports; k++) {
6443 /* Get the Logical_Router_Port that the
6444 * Logical_Switch_Port is connected to, as
6445 * 'peer'. */
6446 const char *peer_name = smap_get(
6447 &op->od->router_ports[k]->nbsp->options,
6448 "router-port");
6449 if (!peer_name) {
6450 continue;
6451 }
6452
6453 struct ovn_port *peer = ovn_port_find(ports, peer_name);
6454 if (!peer || !peer->nbrp) {
6455 continue;
6456 }
6457
6458 if (!find_lrp_member_ip(peer, ip_s)) {
6459 continue;
6460 }
6461
6462 ds_clear(&match);
6463 ds_put_format(&match, "outport == %s && reg0 == %s",
6464 peer->json_key, ip_s);
6465
6466 ds_clear(&actions);
6467 ds_put_format(&actions, "eth.dst = %s; next;", ea_s);
6468 ovn_lflow_add(lflows, peer->od,
6469 S_ROUTER_IN_ARP_RESOLVE, 100,
6470 ds_cstr(&match), ds_cstr(&actions));
6471 }
6472 }
6473
6474 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
6475 const char *ip_s = op->lsp_addrs[i].ipv6_addrs[j].addr_s;
6476 for (size_t k = 0; k < op->od->n_router_ports; k++) {
6477 /* Get the Logical_Router_Port that the
6478 * Logical_Switch_Port is connected to, as
6479 * 'peer'. */
6480 const char *peer_name = smap_get(
6481 &op->od->router_ports[k]->nbsp->options,
6482 "router-port");
6483 if (!peer_name) {
6484 continue;
6485 }
6486
6487 struct ovn_port *peer = ovn_port_find(ports, peer_name);
6488 if (!peer || !peer->nbrp) {
6489 continue;
6490 }
6491
6492 if (!find_lrp_member_ip(peer, ip_s)) {
6493 continue;
6494 }
6495
6496 ds_clear(&match);
6497 ds_put_format(&match, "outport == %s && xxreg0 == %s",
6498 peer->json_key, ip_s);
6499
6500 ds_clear(&actions);
6501 ds_put_format(&actions, "eth.dst = %s; next;", ea_s);
6502 ovn_lflow_add(lflows, peer->od,
6503 S_ROUTER_IN_ARP_RESOLVE, 100,
6504 ds_cstr(&match), ds_cstr(&actions));
6505 }
6506 }
6507 }
6508 } else if (!strcmp(op->nbsp->type, "router")) {
6509 /* This is a logical switch port that connects to a router. */
6510
6511 /* The peer of this switch port is the router port for which
6512 * we need to add logical flows such that it can resolve
6513 * ARP entries for all the other router ports connected to
6514 * the switch in question. */
6515
6516 const char *peer_name = smap_get(&op->nbsp->options,
6517 "router-port");
6518 if (!peer_name) {
6519 continue;
6520 }
6521
6522 struct ovn_port *peer = ovn_port_find(ports, peer_name);
6523 if (!peer || !peer->nbrp) {
6524 continue;
6525 }
6526
6527 for (size_t i = 0; i < op->od->n_router_ports; i++) {
6528 const char *router_port_name = smap_get(
6529 &op->od->router_ports[i]->nbsp->options,
6530 "router-port");
6531 struct ovn_port *router_port = ovn_port_find(ports,
6532 router_port_name);
6533 if (!router_port || !router_port->nbrp) {
6534 continue;
6535 }
6536
6537 /* Skip the router port under consideration. */
6538 if (router_port == peer) {
6539 continue;
6540 }
6541
6542 if (router_port->lrp_networks.n_ipv4_addrs) {
6543 ds_clear(&match);
6544 ds_put_format(&match, "outport == %s && reg0 == ",
6545 peer->json_key);
6546 op_put_v4_networks(&match, router_port, false);
6547
6548 ds_clear(&actions);
6549 ds_put_format(&actions, "eth.dst = %s; next;",
6550 router_port->lrp_networks.ea_s);
6551 ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE,
6552 100, ds_cstr(&match), ds_cstr(&actions));
6553 }
6554
6555 if (router_port->lrp_networks.n_ipv6_addrs) {
6556 ds_clear(&match);
6557 ds_put_format(&match, "outport == %s && xxreg0 == ",
6558 peer->json_key);
6559 op_put_v6_networks(&match, router_port);
6560
6561 ds_clear(&actions);
6562 ds_put_format(&actions, "eth.dst = %s; next;",
6563 router_port->lrp_networks.ea_s);
6564 ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE,
6565 100, ds_cstr(&match), ds_cstr(&actions));
6566 }
6567 }
6568 }
6569 }
6570
6571 HMAP_FOR_EACH (od, key_node, datapaths) {
6572 if (!od->nbr) {
6573 continue;
6574 }
6575
6576 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip4",
6577 "get_arp(outport, reg0); next;");
6578
6579 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip6",
6580 "get_nd(outport, xxreg0); next;");
6581 }
6582
6583 /* Logical router ingress table 9: Gateway redirect.
6584 *
6585 * For traffic with outport equal to the l3dgw_port
6586 * on a distributed router, this table redirects a subset
6587 * of the traffic to the l3redirect_port which represents
6588 * the central instance of the l3dgw_port.
6589 */
6590 HMAP_FOR_EACH (od, key_node, datapaths) {
6591 if (!od->nbr) {
6592 continue;
6593 }
6594 if (od->l3dgw_port && od->l3redirect_port) {
6595 /* For traffic with outport == l3dgw_port, if the
6596 * packet did not match any higher priority redirect
6597 * rule, then the traffic is redirected to the central
6598 * instance of the l3dgw_port. */
6599 ds_clear(&match);
6600 ds_put_format(&match, "outport == %s",
6601 od->l3dgw_port->json_key);
6602 ds_clear(&actions);
6603 ds_put_format(&actions, "outport = %s; next;",
6604 od->l3redirect_port->json_key);
6605 ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 50,
6606 ds_cstr(&match), ds_cstr(&actions));
6607
6608 /* If the Ethernet destination has not been resolved,
6609 * redirect to the central instance of the l3dgw_port.
6610 * Such traffic will be replaced by an ARP request or ND
6611 * Neighbor Solicitation in the ARP request ingress
6612 * table, before being redirected to the central instance.
6613 */
6614 ds_put_format(&match, " && eth.dst == 00:00:00:00:00:00");
6615 ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 150,
6616 ds_cstr(&match), ds_cstr(&actions));
6617 }
6618
6619 /* Packets are allowed by default. */
6620 ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 0, "1", "next;");
6621 }
6622
6623 /* Local router ingress table 10: ARP request.
6624 *
6625 * In the common case where the Ethernet destination has been resolved,
6626 * this table outputs the packet (priority 0). Otherwise, it composes
6627 * and sends an ARP/IPv6 NA request (priority 100). */
6628 HMAP_FOR_EACH (od, key_node, datapaths) {
6629 if (!od->nbr) {
6630 continue;
6631 }
6632
6633 for (int i = 0; i < od->nbr->n_static_routes; i++) {
6634 const struct nbrec_logical_router_static_route *route;
6635
6636 route = od->nbr->static_routes[i];
6637 struct in6_addr gw_ip6;
6638 unsigned int plen;
6639 char *error = ipv6_parse_cidr(route->nexthop, &gw_ip6, &plen);
6640 if (error || plen != 128) {
6641 free(error);
6642 continue;
6643 }
6644
6645 ds_clear(&match);
6646 ds_put_format(&match, "eth.dst == 00:00:00:00:00:00 && "
6647 "ip6 && xxreg0 == %s", route->nexthop);
6648 struct in6_addr sn_addr;
6649 struct eth_addr eth_dst;
6650 in6_addr_solicited_node(&sn_addr, &gw_ip6);
6651 ipv6_multicast_to_ethernet(&eth_dst, &sn_addr);
6652
6653 char sn_addr_s[INET6_ADDRSTRLEN + 1];
6654 ipv6_string_mapped(sn_addr_s, &sn_addr);
6655
6656 ds_clear(&actions);
6657 ds_put_format(&actions,
6658 "nd_ns { "
6659 "eth.dst = "ETH_ADDR_FMT"; "
6660 "ip6.dst = %s; "
6661 "nd.target = %s; "
6662 "output; "
6663 "};", ETH_ADDR_ARGS(eth_dst), sn_addr_s,
6664 route->nexthop);
6665 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 200,
6666 ds_cstr(&match), ds_cstr(&actions));
6667 }
6668
6669 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100,
6670 "eth.dst == 00:00:00:00:00:00",
6671 "arp { "
6672 "eth.dst = ff:ff:ff:ff:ff:ff; "
6673 "arp.spa = reg1; "
6674 "arp.tpa = reg0; "
6675 "arp.op = 1; " /* ARP request */
6676 "output; "
6677 "};");
6678 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100,
6679 "eth.dst == 00:00:00:00:00:00",
6680 "nd_ns { "
6681 "nd.target = xxreg0; "
6682 "output; "
6683 "};");
6684 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1", "output;");
6685 }
6686
6687 /* Logical router egress table 1: Delivery (priority 100).
6688 *
6689 * Priority 100 rules deliver packets to enabled logical ports. */
6690 HMAP_FOR_EACH (op, key_node, ports) {
6691 if (!op->nbrp) {
6692 continue;
6693 }
6694
6695 if (!lrport_is_enabled(op->nbrp)) {
6696 /* Drop packets to disabled logical ports (since logical flow
6697 * tables are default-drop). */
6698 continue;
6699 }
6700
6701 if (op->derived) {
6702 /* No egress packets should be processed in the context of
6703 * a chassisredirect port. The chassisredirect port should
6704 * be replaced by the l3dgw port in the local output
6705 * pipeline stage before egress processing. */
6706 continue;
6707 }
6708
6709 ds_clear(&match);
6710 ds_put_format(&match, "outport == %s", op->json_key);
6711 ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100,
6712 ds_cstr(&match), "output;");
6713 }
6714
6715 ds_destroy(&match);
6716 ds_destroy(&actions);
6717 }
6718
6719 /* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
6720 * constructing their contents based on the OVN_NB database. */
6721 static void
6722 build_lflows(struct northd_context *ctx, struct hmap *datapaths,
6723 struct hmap *ports, struct hmap *port_groups)
6724 {
6725 struct hmap lflows = HMAP_INITIALIZER(&lflows);
6726 struct hmap mcgroups = HMAP_INITIALIZER(&mcgroups);
6727
6728 build_lswitch_flows(datapaths, ports, port_groups, &lflows, &mcgroups);
6729 build_lrouter_flows(datapaths, ports, &lflows);
6730
6731 /* Push changes to the Logical_Flow table to database. */
6732 const struct sbrec_logical_flow *sbflow, *next_sbflow;
6733 SBREC_LOGICAL_FLOW_FOR_EACH_SAFE (sbflow, next_sbflow, ctx->ovnsb_idl) {
6734 struct ovn_datapath *od
6735 = ovn_datapath_from_sbrec(datapaths, sbflow->logical_datapath);
6736 if (!od) {
6737 sbrec_logical_flow_delete(sbflow);
6738 continue;
6739 }
6740
6741 enum ovn_datapath_type dp_type = od->nbs ? DP_SWITCH : DP_ROUTER;
6742 enum ovn_pipeline pipeline
6743 = !strcmp(sbflow->pipeline, "ingress") ? P_IN : P_OUT;
6744 struct ovn_lflow *lflow = ovn_lflow_find(
6745 &lflows, od, ovn_stage_build(dp_type, pipeline, sbflow->table_id),
6746 sbflow->priority, sbflow->match, sbflow->actions, sbflow->hash);
6747 if (lflow) {
6748 ovn_lflow_destroy(&lflows, lflow);
6749 } else {
6750 sbrec_logical_flow_delete(sbflow);
6751 }
6752 }
6753 struct ovn_lflow *lflow, *next_lflow;
6754 HMAP_FOR_EACH_SAFE (lflow, next_lflow, hmap_node, &lflows) {
6755 const char *pipeline = ovn_stage_get_pipeline_name(lflow->stage);
6756 uint8_t table = ovn_stage_get_table(lflow->stage);
6757
6758 sbflow = sbrec_logical_flow_insert(ctx->ovnsb_txn);
6759 sbrec_logical_flow_set_logical_datapath(sbflow, lflow->od->sb);
6760 sbrec_logical_flow_set_pipeline(sbflow, pipeline);
6761 sbrec_logical_flow_set_table_id(sbflow, table);
6762 sbrec_logical_flow_set_priority(sbflow, lflow->priority);
6763 sbrec_logical_flow_set_match(sbflow, lflow->match);
6764 sbrec_logical_flow_set_actions(sbflow, lflow->actions);
6765
6766 /* Trim the source locator lflow->where, which looks something like
6767 * "ovn/northd/ovn-northd.c:1234", down to just the part following the
6768 * last slash, e.g. "ovn-northd.c:1234". */
6769 const char *slash = strrchr(lflow->where, '/');
6770 #if _WIN32
6771 const char *backslash = strrchr(lflow->where, '\\');
6772 if (!slash || backslash > slash) {
6773 slash = backslash;
6774 }
6775 #endif
6776 const char *where = slash ? slash + 1 : lflow->where;
6777
6778 struct smap ids = SMAP_INITIALIZER(&ids);
6779 smap_add(&ids, "stage-name", ovn_stage_to_str(lflow->stage));
6780 smap_add(&ids, "source", where);
6781 if (lflow->stage_hint) {
6782 smap_add(&ids, "stage-hint", lflow->stage_hint);
6783 }
6784 sbrec_logical_flow_set_external_ids(sbflow, &ids);
6785 smap_destroy(&ids);
6786
6787 ovn_lflow_destroy(&lflows, lflow);
6788 }
6789 hmap_destroy(&lflows);
6790
6791 /* Push changes to the Multicast_Group table to database. */
6792 const struct sbrec_multicast_group *sbmc, *next_sbmc;
6793 SBREC_MULTICAST_GROUP_FOR_EACH_SAFE (sbmc, next_sbmc, ctx->ovnsb_idl) {
6794 struct ovn_datapath *od = ovn_datapath_from_sbrec(datapaths,
6795 sbmc->datapath);
6796 if (!od) {
6797 sbrec_multicast_group_delete(sbmc);
6798 continue;
6799 }
6800
6801 struct multicast_group group = { .name = sbmc->name,
6802 .key = sbmc->tunnel_key };
6803 struct ovn_multicast *mc = ovn_multicast_find(&mcgroups, od, &group);
6804 if (mc) {
6805 ovn_multicast_update_sbrec(mc, sbmc);
6806 ovn_multicast_destroy(&mcgroups, mc);
6807 } else {
6808 sbrec_multicast_group_delete(sbmc);
6809 }
6810 }
6811 struct ovn_multicast *mc, *next_mc;
6812 HMAP_FOR_EACH_SAFE (mc, next_mc, hmap_node, &mcgroups) {
6813 sbmc = sbrec_multicast_group_insert(ctx->ovnsb_txn);
6814 sbrec_multicast_group_set_datapath(sbmc, mc->datapath->sb);
6815 sbrec_multicast_group_set_name(sbmc, mc->group->name);
6816 sbrec_multicast_group_set_tunnel_key(sbmc, mc->group->key);
6817 ovn_multicast_update_sbrec(mc, sbmc);
6818 ovn_multicast_destroy(&mcgroups, mc);
6819 }
6820 hmap_destroy(&mcgroups);
6821 }
6822
6823 static void
6824 sync_address_set(struct northd_context *ctx, const char *name,
6825 const char **addrs, size_t n_addrs,
6826 struct shash *sb_address_sets)
6827 {
6828 const struct sbrec_address_set *sb_address_set;
6829 sb_address_set = shash_find_and_delete(sb_address_sets,
6830 name);
6831 if (!sb_address_set) {
6832 sb_address_set = sbrec_address_set_insert(ctx->ovnsb_txn);
6833 sbrec_address_set_set_name(sb_address_set, name);
6834 }
6835
6836 sbrec_address_set_set_addresses(sb_address_set,
6837 addrs, n_addrs);
6838 }
6839
6840 /* Go through 'addresses' and add found IPv4 addresses to 'ipv4_addrs' and IPv6
6841 * addresses to 'ipv6_addrs'.
6842 */
6843 static void
6844 split_addresses(const char *addresses, struct svec *ipv4_addrs,
6845 struct svec *ipv6_addrs)
6846 {
6847 struct lport_addresses laddrs;
6848 extract_lsp_addresses(addresses, &laddrs);
6849 for (size_t k = 0; k < laddrs.n_ipv4_addrs; k++) {
6850 svec_add(ipv4_addrs, laddrs.ipv4_addrs[k].addr_s);
6851 }
6852 for (size_t k = 0; k < laddrs.n_ipv6_addrs; k++) {
6853 svec_add(ipv6_addrs, laddrs.ipv6_addrs[k].addr_s);
6854 }
6855 destroy_lport_addresses(&laddrs);
6856 }
6857
6858 /* OVN_Southbound Address_Set table contains same records as in north
6859 * bound, plus the records generated from Port_Group table in north bound.
6860 *
6861 * There are 2 records generated from each port group, one for IPv4, and
6862 * one for IPv6, named in the format: <port group name>_ip4 and
6863 * <port group name>_ip6 respectively. MAC addresses are ignored.
6864 *
6865 * We always update OVN_Southbound to match the Address_Set and Port_Group
6866 * in OVN_Northbound, so that the address sets used in Logical_Flows in
6867 * OVN_Southbound is checked against the proper set.*/
6868 static void
6869 sync_address_sets(struct northd_context *ctx)
6870 {
6871 struct shash sb_address_sets = SHASH_INITIALIZER(&sb_address_sets);
6872
6873 const struct sbrec_address_set *sb_address_set;
6874 SBREC_ADDRESS_SET_FOR_EACH (sb_address_set, ctx->ovnsb_idl) {
6875 shash_add(&sb_address_sets, sb_address_set->name, sb_address_set);
6876 }
6877
6878 /* sync port group generated address sets first */
6879 const struct nbrec_port_group *nb_port_group;
6880 NBREC_PORT_GROUP_FOR_EACH (nb_port_group, ctx->ovnnb_idl) {
6881 struct svec ipv4_addrs = SVEC_EMPTY_INITIALIZER;
6882 struct svec ipv6_addrs = SVEC_EMPTY_INITIALIZER;
6883 for (size_t i = 0; i < nb_port_group->n_ports; i++) {
6884 for (size_t j = 0; j < nb_port_group->ports[i]->n_addresses; j++) {
6885 const char *addrs = nb_port_group->ports[i]->addresses[j];
6886 if (!is_dynamic_lsp_address(addrs)) {
6887 split_addresses(addrs, &ipv4_addrs, &ipv6_addrs);
6888 }
6889 }
6890 if (nb_port_group->ports[i]->dynamic_addresses) {
6891 split_addresses(nb_port_group->ports[i]->dynamic_addresses,
6892 &ipv4_addrs, &ipv6_addrs);
6893 }
6894 }
6895 char *ipv4_addrs_name = xasprintf("%s_ip4", nb_port_group->name);
6896 char *ipv6_addrs_name = xasprintf("%s_ip6", nb_port_group->name);
6897 sync_address_set(ctx, ipv4_addrs_name,
6898 /* "char **" is not compatible with "const char **" */
6899 (const char **)ipv4_addrs.names,
6900 ipv4_addrs.n, &sb_address_sets);
6901 sync_address_set(ctx, ipv6_addrs_name,
6902 /* "char **" is not compatible with "const char **" */
6903 (const char **)ipv6_addrs.names,
6904 ipv6_addrs.n, &sb_address_sets);
6905 free(ipv4_addrs_name);
6906 free(ipv6_addrs_name);
6907 svec_destroy(&ipv4_addrs);
6908 svec_destroy(&ipv6_addrs);
6909 }
6910
6911 /* sync user defined address sets, which may overwrite port group
6912 * generated address sets if same name is used */
6913 const struct nbrec_address_set *nb_address_set;
6914 NBREC_ADDRESS_SET_FOR_EACH (nb_address_set, ctx->ovnnb_idl) {
6915 sync_address_set(ctx, nb_address_set->name,
6916 /* "char **" is not compatible with "const char **" */
6917 (const char **)nb_address_set->addresses,
6918 nb_address_set->n_addresses, &sb_address_sets);
6919 }
6920
6921 struct shash_node *node, *next;
6922 SHASH_FOR_EACH_SAFE (node, next, &sb_address_sets) {
6923 sbrec_address_set_delete(node->data);
6924 shash_delete(&sb_address_sets, node);
6925 }
6926 shash_destroy(&sb_address_sets);
6927 }
6928
6929 /* Each port group in Port_Group table in OVN_Northbound has a corresponding
6930 * entry in Port_Group table in OVN_Southbound. In OVN_Northbound the entries
6931 * contains lport uuids, while in OVN_Southbound we store the lport names.
6932 */
6933 static void
6934 sync_port_groups(struct northd_context *ctx)
6935 {
6936 struct shash sb_port_groups = SHASH_INITIALIZER(&sb_port_groups);
6937
6938 const struct sbrec_port_group *sb_port_group;
6939 SBREC_PORT_GROUP_FOR_EACH (sb_port_group, ctx->ovnsb_idl) {
6940 shash_add(&sb_port_groups, sb_port_group->name, sb_port_group);
6941 }
6942
6943 const struct nbrec_port_group *nb_port_group;
6944 NBREC_PORT_GROUP_FOR_EACH (nb_port_group, ctx->ovnnb_idl) {
6945 sb_port_group = shash_find_and_delete(&sb_port_groups,
6946 nb_port_group->name);
6947 if (!sb_port_group) {
6948 sb_port_group = sbrec_port_group_insert(ctx->ovnsb_txn);
6949 sbrec_port_group_set_name(sb_port_group, nb_port_group->name);
6950 }
6951
6952 const char **nb_port_names = xcalloc(nb_port_group->n_ports,
6953 sizeof *nb_port_names);
6954 int i;
6955 for (i = 0; i < nb_port_group->n_ports; i++) {
6956 nb_port_names[i] = nb_port_group->ports[i]->name;
6957 }
6958 sbrec_port_group_set_ports(sb_port_group,
6959 nb_port_names,
6960 nb_port_group->n_ports);
6961 free(nb_port_names);
6962 }
6963
6964 struct shash_node *node, *next;
6965 SHASH_FOR_EACH_SAFE (node, next, &sb_port_groups) {
6966 sbrec_port_group_delete(node->data);
6967 shash_delete(&sb_port_groups, node);
6968 }
6969 shash_destroy(&sb_port_groups);
6970 }
6971
6972 struct band_entry {
6973 int64_t rate;
6974 int64_t burst_size;
6975 const char *action;
6976 };
6977
6978 static int
6979 band_cmp(const void *band1_, const void *band2_)
6980 {
6981 const struct band_entry *band1p = band1_;
6982 const struct band_entry *band2p = band2_;
6983
6984 if (band1p->rate != band2p->rate) {
6985 return band1p->rate > band2p->rate ? -1 : 1;
6986 } else if (band1p->burst_size != band2p->burst_size) {
6987 return band1p->burst_size > band2p->burst_size ? -1 : 1;
6988 } else {
6989 return strcmp(band1p->action, band2p->action);
6990 }
6991 }
6992
6993 static bool
6994 bands_need_update(const struct nbrec_meter *nb_meter,
6995 const struct sbrec_meter *sb_meter)
6996 {
6997 if (nb_meter->n_bands != sb_meter->n_bands) {
6998 return true;
6999 }
7000
7001 /* A single band is the most common scenario, so speed up that
7002 * check. */
7003 if (nb_meter->n_bands == 1) {
7004 struct nbrec_meter_band *nb_band = nb_meter->bands[0];
7005 struct sbrec_meter_band *sb_band = sb_meter->bands[0];
7006
7007 return !(nb_band->rate == sb_band->rate
7008 && nb_band->burst_size == sb_band->burst_size
7009 && !strcmp(sb_band->action, nb_band->action));
7010 }
7011
7012 /* Place the Northbound entries in sorted order. */
7013 struct band_entry *nb_bands;
7014 nb_bands = xmalloc(sizeof *nb_bands * nb_meter->n_bands);
7015 for (size_t i = 0; i < nb_meter->n_bands; i++) {
7016 struct nbrec_meter_band *nb_band = nb_meter->bands[i];
7017
7018 nb_bands[i].rate = nb_band->rate;
7019 nb_bands[i].burst_size = nb_band->burst_size;
7020 nb_bands[i].action = nb_band->action;
7021 }
7022 qsort(nb_bands, nb_meter->n_bands, sizeof *nb_bands, band_cmp);
7023
7024 /* Place the Southbound entries in sorted order. */
7025 struct band_entry *sb_bands;
7026 sb_bands = xmalloc(sizeof *sb_bands * sb_meter->n_bands);
7027 for (size_t i = 0; i < sb_meter->n_bands; i++) {
7028 struct sbrec_meter_band *sb_band = sb_meter->bands[i];
7029
7030 sb_bands[i].rate = sb_band->rate;
7031 sb_bands[i].burst_size = sb_band->burst_size;
7032 sb_bands[i].action = sb_band->action;
7033 }
7034 qsort(sb_bands, sb_meter->n_bands, sizeof *sb_bands, band_cmp);
7035
7036 bool need_update = false;
7037 for (size_t i = 0; i < nb_meter->n_bands; i++) {
7038 if (nb_bands[i].rate != sb_bands[i].rate
7039 || nb_bands[i].burst_size != sb_bands[i].burst_size
7040 || strcmp(nb_bands[i].action, sb_bands[i].action)) {
7041 need_update = true;
7042 goto done;
7043 }
7044 }
7045
7046 done:
7047 free(nb_bands);
7048 free(sb_bands);
7049
7050 return need_update;
7051 }
7052
7053 /* Each entry in the Meter and Meter_Band tables in OVN_Northbound have
7054 * a corresponding entries in the Meter and Meter_Band tables in
7055 * OVN_Southbound.
7056 */
7057 static void
7058 sync_meters(struct northd_context *ctx)
7059 {
7060 struct shash sb_meters = SHASH_INITIALIZER(&sb_meters);
7061
7062 const struct sbrec_meter *sb_meter;
7063 SBREC_METER_FOR_EACH (sb_meter, ctx->ovnsb_idl) {
7064 shash_add(&sb_meters, sb_meter->name, sb_meter);
7065 }
7066
7067 const struct nbrec_meter *nb_meter;
7068 NBREC_METER_FOR_EACH (nb_meter, ctx->ovnnb_idl) {
7069 bool new_sb_meter = false;
7070
7071 sb_meter = shash_find_and_delete(&sb_meters, nb_meter->name);
7072 if (!sb_meter) {
7073 sb_meter = sbrec_meter_insert(ctx->ovnsb_txn);
7074 sbrec_meter_set_name(sb_meter, nb_meter->name);
7075 new_sb_meter = true;
7076 }
7077
7078 if (new_sb_meter || bands_need_update(nb_meter, sb_meter)) {
7079 struct sbrec_meter_band **sb_bands;
7080 sb_bands = xcalloc(nb_meter->n_bands, sizeof *sb_bands);
7081 for (size_t i = 0; i < nb_meter->n_bands; i++) {
7082 const struct nbrec_meter_band *nb_band = nb_meter->bands[i];
7083
7084 sb_bands[i] = sbrec_meter_band_insert(ctx->ovnsb_txn);
7085
7086 sbrec_meter_band_set_action(sb_bands[i], nb_band->action);
7087 sbrec_meter_band_set_rate(sb_bands[i], nb_band->rate);
7088 sbrec_meter_band_set_burst_size(sb_bands[i],
7089 nb_band->burst_size);
7090 }
7091 sbrec_meter_set_bands(sb_meter, sb_bands, nb_meter->n_bands);
7092 free(sb_bands);
7093 }
7094
7095 sbrec_meter_set_unit(sb_meter, nb_meter->unit);
7096 }
7097
7098 struct shash_node *node, *next;
7099 SHASH_FOR_EACH_SAFE (node, next, &sb_meters) {
7100 sbrec_meter_delete(node->data);
7101 shash_delete(&sb_meters, node);
7102 }
7103 shash_destroy(&sb_meters);
7104 }
7105
7106 /*
7107 * struct 'dns_info' is used to sync the DNS records between OVN Northbound db
7108 * and Southbound db.
7109 */
7110 struct dns_info {
7111 struct hmap_node hmap_node;
7112 const struct nbrec_dns *nb_dns; /* DNS record in the Northbound db. */
7113 const struct sbrec_dns *sb_dns; /* DNS record in the Soutbound db. */
7114
7115 /* Datapaths to which the DNS entry is associated with it. */
7116 const struct sbrec_datapath_binding **sbs;
7117 size_t n_sbs;
7118 };
7119
7120 static inline struct dns_info *
7121 get_dns_info_from_hmap(struct hmap *dns_map, struct uuid *uuid)
7122 {
7123 struct dns_info *dns_info;
7124 size_t hash = uuid_hash(uuid);
7125 HMAP_FOR_EACH_WITH_HASH (dns_info, hmap_node, hash, dns_map) {
7126 if (uuid_equals(&dns_info->nb_dns->header_.uuid, uuid)) {
7127 return dns_info;
7128 }
7129 }
7130
7131 return NULL;
7132 }
7133
7134 static void
7135 sync_dns_entries(struct northd_context *ctx, struct hmap *datapaths)
7136 {
7137 struct hmap dns_map = HMAP_INITIALIZER(&dns_map);
7138 struct ovn_datapath *od;
7139 HMAP_FOR_EACH (od, key_node, datapaths) {
7140 if (!od->nbs || !od->nbs->n_dns_records) {
7141 continue;
7142 }
7143
7144 for (size_t i = 0; i < od->nbs->n_dns_records; i++) {
7145 struct dns_info *dns_info = get_dns_info_from_hmap(
7146 &dns_map, &od->nbs->dns_records[i]->header_.uuid);
7147 if (!dns_info) {
7148 size_t hash = uuid_hash(
7149 &od->nbs->dns_records[i]->header_.uuid);
7150 dns_info = xzalloc(sizeof *dns_info);;
7151 dns_info->nb_dns = od->nbs->dns_records[i];
7152 hmap_insert(&dns_map, &dns_info->hmap_node, hash);
7153 }
7154
7155 dns_info->n_sbs++;
7156 dns_info->sbs = xrealloc(dns_info->sbs,
7157 dns_info->n_sbs * sizeof *dns_info->sbs);
7158 dns_info->sbs[dns_info->n_sbs - 1] = od->sb;
7159 }
7160 }
7161
7162 const struct sbrec_dns *sbrec_dns, *next;
7163 SBREC_DNS_FOR_EACH_SAFE (sbrec_dns, next, ctx->ovnsb_idl) {
7164 const char *nb_dns_uuid = smap_get(&sbrec_dns->external_ids, "dns_id");
7165 struct uuid dns_uuid;
7166 if (!nb_dns_uuid || !uuid_from_string(&dns_uuid, nb_dns_uuid)) {
7167 sbrec_dns_delete(sbrec_dns);
7168 continue;
7169 }
7170
7171 struct dns_info *dns_info =
7172 get_dns_info_from_hmap(&dns_map, &dns_uuid);
7173 if (dns_info) {
7174 dns_info->sb_dns = sbrec_dns;
7175 } else {
7176 sbrec_dns_delete(sbrec_dns);
7177 }
7178 }
7179
7180 struct dns_info *dns_info;
7181 HMAP_FOR_EACH_POP (dns_info, hmap_node, &dns_map) {
7182 if (!dns_info->sb_dns) {
7183 sbrec_dns = sbrec_dns_insert(ctx->ovnsb_txn);
7184 dns_info->sb_dns = sbrec_dns;
7185 char *dns_id = xasprintf(
7186 UUID_FMT, UUID_ARGS(&dns_info->nb_dns->header_.uuid));
7187 const struct smap external_ids =
7188 SMAP_CONST1(&external_ids, "dns_id", dns_id);
7189 sbrec_dns_set_external_ids(sbrec_dns, &external_ids);
7190 free(dns_id);
7191 }
7192
7193 /* Set the datapaths and records. If nothing has changed, then
7194 * this will be a no-op.
7195 */
7196 sbrec_dns_set_datapaths(
7197 dns_info->sb_dns,
7198 (struct sbrec_datapath_binding **)dns_info->sbs,
7199 dns_info->n_sbs);
7200 sbrec_dns_set_records(dns_info->sb_dns, &dns_info->nb_dns->records);
7201 free(dns_info->sbs);
7202 free(dns_info);
7203 }
7204 hmap_destroy(&dns_map);
7205 }
7206
7207
7208 \f
7209 static void
7210 ovnnb_db_run(struct northd_context *ctx,
7211 struct ovsdb_idl_index *sbrec_chassis_by_name,
7212 struct ovsdb_idl_loop *sb_loop)
7213 {
7214 if (!ctx->ovnsb_txn || !ctx->ovnnb_txn) {
7215 return;
7216 }
7217 struct hmap datapaths, ports, port_groups;
7218 build_datapaths(ctx, &datapaths);
7219 build_ports(ctx, sbrec_chassis_by_name, &datapaths, &ports);
7220 build_ipam(&datapaths, &ports);
7221 build_port_group_lswitches(ctx, &port_groups, &ports);
7222 build_lflows(ctx, &datapaths, &ports, &port_groups);
7223
7224 sync_address_sets(ctx);
7225 sync_port_groups(ctx);
7226 sync_meters(ctx);
7227 sync_dns_entries(ctx, &datapaths);
7228
7229 struct ovn_port_group *pg, *next_pg;
7230 HMAP_FOR_EACH_SAFE (pg, next_pg, key_node, &port_groups) {
7231 ovn_port_group_destroy(&port_groups, pg);
7232 }
7233 hmap_destroy(&port_groups);
7234
7235 struct ovn_datapath *dp, *next_dp;
7236 HMAP_FOR_EACH_SAFE (dp, next_dp, key_node, &datapaths) {
7237 ovn_datapath_destroy(&datapaths, dp);
7238 }
7239 hmap_destroy(&datapaths);
7240
7241 struct ovn_port *port, *next_port;
7242 HMAP_FOR_EACH_SAFE (port, next_port, key_node, &ports) {
7243 ovn_port_destroy(&ports, port);
7244 }
7245 hmap_destroy(&ports);
7246
7247 /* Sync ipsec configuration.
7248 * Copy nb_cfg from northbound to southbound database.
7249 * Also set up to update sb_cfg once our southbound transaction commits. */
7250 const struct nbrec_nb_global *nb = nbrec_nb_global_first(ctx->ovnnb_idl);
7251 if (!nb) {
7252 nb = nbrec_nb_global_insert(ctx->ovnnb_txn);
7253 }
7254 const struct sbrec_sb_global *sb = sbrec_sb_global_first(ctx->ovnsb_idl);
7255 if (!sb) {
7256 sb = sbrec_sb_global_insert(ctx->ovnsb_txn);
7257 }
7258 if (nb->ipsec != sb->ipsec) {
7259 sbrec_sb_global_set_ipsec(sb, nb->ipsec);
7260 }
7261 sbrec_sb_global_set_nb_cfg(sb, nb->nb_cfg);
7262 sbrec_sb_global_set_options(sb, &nb->options);
7263 sb_loop->next_cfg = nb->nb_cfg;
7264
7265 const char *mac_addr_prefix = smap_get(&nb->options, "mac_prefix");
7266 if (mac_addr_prefix) {
7267 struct eth_addr addr;
7268
7269 memset(&addr, 0, sizeof addr);
7270 if (ovs_scan(mac_addr_prefix, "%"SCNx8":%"SCNx8":%"SCNx8,
7271 &addr.ea[0], &addr.ea[1], &addr.ea[2])) {
7272 mac_prefix = addr;
7273 }
7274 }
7275
7276 cleanup_macam(&macam);
7277 }
7278
7279 /* Handle changes to the 'chassis' column of the 'Port_Binding' table. When
7280 * this column is not empty, it means we need to set the corresponding logical
7281 * port as 'up' in the northbound DB. */
7282 static void
7283 update_logical_port_status(struct northd_context *ctx)
7284 {
7285 struct hmap lports_hmap;
7286 const struct sbrec_port_binding *sb;
7287 const struct nbrec_logical_switch_port *nbsp;
7288
7289 struct lport_hash_node {
7290 struct hmap_node node;
7291 const struct nbrec_logical_switch_port *nbsp;
7292 } *hash_node;
7293
7294 hmap_init(&lports_hmap);
7295
7296 NBREC_LOGICAL_SWITCH_PORT_FOR_EACH(nbsp, ctx->ovnnb_idl) {
7297 hash_node = xzalloc(sizeof *hash_node);
7298 hash_node->nbsp = nbsp;
7299 hmap_insert(&lports_hmap, &hash_node->node, hash_string(nbsp->name, 0));
7300 }
7301
7302 SBREC_PORT_BINDING_FOR_EACH(sb, ctx->ovnsb_idl) {
7303 nbsp = NULL;
7304 HMAP_FOR_EACH_WITH_HASH(hash_node, node,
7305 hash_string(sb->logical_port, 0),
7306 &lports_hmap) {
7307 if (!strcmp(sb->logical_port, hash_node->nbsp->name)) {
7308 nbsp = hash_node->nbsp;
7309 break;
7310 }
7311 }
7312
7313 if (!nbsp) {
7314 /* The logical port doesn't exist for this port binding. This can
7315 * happen under normal circumstances when ovn-northd hasn't gotten
7316 * around to pruning the Port_Binding yet. */
7317 continue;
7318 }
7319
7320 bool up = (sb->chassis || !strcmp(nbsp->type, "router"));
7321 if (!nbsp->up || *nbsp->up != up) {
7322 nbrec_logical_switch_port_set_up(nbsp, &up, 1);
7323 }
7324 }
7325
7326 HMAP_FOR_EACH_POP(hash_node, node, &lports_hmap) {
7327 free(hash_node);
7328 }
7329 hmap_destroy(&lports_hmap);
7330 }
7331
7332 static struct gen_opts_map supported_dhcp_opts[] = {
7333 OFFERIP,
7334 DHCP_OPT_NETMASK,
7335 DHCP_OPT_ROUTER,
7336 DHCP_OPT_DNS_SERVER,
7337 DHCP_OPT_LOG_SERVER,
7338 DHCP_OPT_LPR_SERVER,
7339 DHCP_OPT_SWAP_SERVER,
7340 DHCP_OPT_POLICY_FILTER,
7341 DHCP_OPT_ROUTER_SOLICITATION,
7342 DHCP_OPT_NIS_SERVER,
7343 DHCP_OPT_NTP_SERVER,
7344 DHCP_OPT_SERVER_ID,
7345 DHCP_OPT_TFTP_SERVER,
7346 DHCP_OPT_CLASSLESS_STATIC_ROUTE,
7347 DHCP_OPT_MS_CLASSLESS_STATIC_ROUTE,
7348 DHCP_OPT_IP_FORWARD_ENABLE,
7349 DHCP_OPT_ROUTER_DISCOVERY,
7350 DHCP_OPT_ETHERNET_ENCAP,
7351 DHCP_OPT_DEFAULT_TTL,
7352 DHCP_OPT_TCP_TTL,
7353 DHCP_OPT_MTU,
7354 DHCP_OPT_LEASE_TIME,
7355 DHCP_OPT_T1,
7356 DHCP_OPT_T2,
7357 DHCP_OPT_WPAD,
7358 };
7359
7360 static struct gen_opts_map supported_dhcpv6_opts[] = {
7361 DHCPV6_OPT_IA_ADDR,
7362 DHCPV6_OPT_SERVER_ID,
7363 DHCPV6_OPT_DOMAIN_SEARCH,
7364 DHCPV6_OPT_DNS_SERVER
7365 };
7366
7367 static void
7368 check_and_add_supported_dhcp_opts_to_sb_db(struct northd_context *ctx)
7369 {
7370 struct hmap dhcp_opts_to_add = HMAP_INITIALIZER(&dhcp_opts_to_add);
7371 for (size_t i = 0; (i < sizeof(supported_dhcp_opts) /
7372 sizeof(supported_dhcp_opts[0])); i++) {
7373 hmap_insert(&dhcp_opts_to_add, &supported_dhcp_opts[i].hmap_node,
7374 dhcp_opt_hash(supported_dhcp_opts[i].name));
7375 }
7376
7377 const struct sbrec_dhcp_options *opt_row, *opt_row_next;
7378 SBREC_DHCP_OPTIONS_FOR_EACH_SAFE(opt_row, opt_row_next, ctx->ovnsb_idl) {
7379 struct gen_opts_map *dhcp_opt =
7380 dhcp_opts_find(&dhcp_opts_to_add, opt_row->name);
7381 if (dhcp_opt) {
7382 hmap_remove(&dhcp_opts_to_add, &dhcp_opt->hmap_node);
7383 } else {
7384 sbrec_dhcp_options_delete(opt_row);
7385 }
7386 }
7387
7388 struct gen_opts_map *opt;
7389 HMAP_FOR_EACH (opt, hmap_node, &dhcp_opts_to_add) {
7390 struct sbrec_dhcp_options *sbrec_dhcp_option =
7391 sbrec_dhcp_options_insert(ctx->ovnsb_txn);
7392 sbrec_dhcp_options_set_name(sbrec_dhcp_option, opt->name);
7393 sbrec_dhcp_options_set_code(sbrec_dhcp_option, opt->code);
7394 sbrec_dhcp_options_set_type(sbrec_dhcp_option, opt->type);
7395 }
7396
7397 hmap_destroy(&dhcp_opts_to_add);
7398 }
7399
7400 static void
7401 check_and_add_supported_dhcpv6_opts_to_sb_db(struct northd_context *ctx)
7402 {
7403 struct hmap dhcpv6_opts_to_add = HMAP_INITIALIZER(&dhcpv6_opts_to_add);
7404 for (size_t i = 0; (i < sizeof(supported_dhcpv6_opts) /
7405 sizeof(supported_dhcpv6_opts[0])); i++) {
7406 hmap_insert(&dhcpv6_opts_to_add, &supported_dhcpv6_opts[i].hmap_node,
7407 dhcp_opt_hash(supported_dhcpv6_opts[i].name));
7408 }
7409
7410 const struct sbrec_dhcpv6_options *opt_row, *opt_row_next;
7411 SBREC_DHCPV6_OPTIONS_FOR_EACH_SAFE(opt_row, opt_row_next, ctx->ovnsb_idl) {
7412 struct gen_opts_map *dhcp_opt =
7413 dhcp_opts_find(&dhcpv6_opts_to_add, opt_row->name);
7414 if (dhcp_opt) {
7415 hmap_remove(&dhcpv6_opts_to_add, &dhcp_opt->hmap_node);
7416 } else {
7417 sbrec_dhcpv6_options_delete(opt_row);
7418 }
7419 }
7420
7421 struct gen_opts_map *opt;
7422 HMAP_FOR_EACH(opt, hmap_node, &dhcpv6_opts_to_add) {
7423 struct sbrec_dhcpv6_options *sbrec_dhcpv6_option =
7424 sbrec_dhcpv6_options_insert(ctx->ovnsb_txn);
7425 sbrec_dhcpv6_options_set_name(sbrec_dhcpv6_option, opt->name);
7426 sbrec_dhcpv6_options_set_code(sbrec_dhcpv6_option, opt->code);
7427 sbrec_dhcpv6_options_set_type(sbrec_dhcpv6_option, opt->type);
7428 }
7429
7430 hmap_destroy(&dhcpv6_opts_to_add);
7431 }
7432
7433 static const char *rbac_chassis_auth[] =
7434 {"name"};
7435 static const char *rbac_chassis_update[] =
7436 {"nb_cfg", "external_ids", "encaps", "vtep_logical_switches"};
7437
7438 static const char *rbac_encap_auth[] =
7439 {"chassis_name"};
7440 static const char *rbac_encap_update[] =
7441 {"type", "options", "ip"};
7442
7443 static const char *rbac_port_binding_auth[] =
7444 {""};
7445 static const char *rbac_port_binding_update[] =
7446 {"chassis"};
7447
7448 static const char *rbac_mac_binding_auth[] =
7449 {""};
7450 static const char *rbac_mac_binding_update[] =
7451 {"logical_port", "ip", "mac", "datapath"};
7452
7453 static struct rbac_perm_cfg {
7454 const char *table;
7455 const char **auth;
7456 int n_auth;
7457 bool insdel;
7458 const char **update;
7459 int n_update;
7460 const struct sbrec_rbac_permission *row;
7461 } rbac_perm_cfg[] = {
7462 {
7463 .table = "Chassis",
7464 .auth = rbac_chassis_auth,
7465 .n_auth = ARRAY_SIZE(rbac_chassis_auth),
7466 .insdel = true,
7467 .update = rbac_chassis_update,
7468 .n_update = ARRAY_SIZE(rbac_chassis_update),
7469 .row = NULL
7470 },{
7471 .table = "Encap",
7472 .auth = rbac_encap_auth,
7473 .n_auth = ARRAY_SIZE(rbac_encap_auth),
7474 .insdel = true,
7475 .update = rbac_encap_update,
7476 .n_update = ARRAY_SIZE(rbac_encap_update),
7477 .row = NULL
7478 },{
7479 .table = "Port_Binding",
7480 .auth = rbac_port_binding_auth,
7481 .n_auth = ARRAY_SIZE(rbac_port_binding_auth),
7482 .insdel = false,
7483 .update = rbac_port_binding_update,
7484 .n_update = ARRAY_SIZE(rbac_port_binding_update),
7485 .row = NULL
7486 },{
7487 .table = "MAC_Binding",
7488 .auth = rbac_mac_binding_auth,
7489 .n_auth = ARRAY_SIZE(rbac_mac_binding_auth),
7490 .insdel = true,
7491 .update = rbac_mac_binding_update,
7492 .n_update = ARRAY_SIZE(rbac_mac_binding_update),
7493 .row = NULL
7494 },{
7495 .table = NULL,
7496 .auth = NULL,
7497 .n_auth = 0,
7498 .insdel = false,
7499 .update = NULL,
7500 .n_update = 0,
7501 .row = NULL
7502 }
7503 };
7504
7505 static bool
7506 ovn_rbac_validate_perm(const struct sbrec_rbac_permission *perm)
7507 {
7508 struct rbac_perm_cfg *pcfg;
7509 int i, j, n_found;
7510
7511 for (pcfg = rbac_perm_cfg; pcfg->table; pcfg++) {
7512 if (!strcmp(perm->table, pcfg->table)) {
7513 break;
7514 }
7515 }
7516 if (!pcfg->table) {
7517 return false;
7518 }
7519 if (perm->n_authorization != pcfg->n_auth ||
7520 perm->n_update != pcfg->n_update) {
7521 return false;
7522 }
7523 if (perm->insert_delete != pcfg->insdel) {
7524 return false;
7525 }
7526 /* verify perm->authorization vs. pcfg->auth */
7527 n_found = 0;
7528 for (i = 0; i < pcfg->n_auth; i++) {
7529 for (j = 0; j < perm->n_authorization; j++) {
7530 if (!strcmp(pcfg->auth[i], perm->authorization[j])) {
7531 n_found++;
7532 break;
7533 }
7534 }
7535 }
7536 if (n_found != pcfg->n_auth) {
7537 return false;
7538 }
7539
7540 /* verify perm->update vs. pcfg->update */
7541 n_found = 0;
7542 for (i = 0; i < pcfg->n_update; i++) {
7543 for (j = 0; j < perm->n_update; j++) {
7544 if (!strcmp(pcfg->update[i], perm->update[j])) {
7545 n_found++;
7546 break;
7547 }
7548 }
7549 }
7550 if (n_found != pcfg->n_update) {
7551 return false;
7552 }
7553
7554 /* Success, db state matches expected state */
7555 pcfg->row = perm;
7556 return true;
7557 }
7558
7559 static void
7560 ovn_rbac_create_perm(struct rbac_perm_cfg *pcfg,
7561 struct northd_context *ctx,
7562 const struct sbrec_rbac_role *rbac_role)
7563 {
7564 struct sbrec_rbac_permission *rbac_perm;
7565
7566 rbac_perm = sbrec_rbac_permission_insert(ctx->ovnsb_txn);
7567 sbrec_rbac_permission_set_table(rbac_perm, pcfg->table);
7568 sbrec_rbac_permission_set_authorization(rbac_perm,
7569 pcfg->auth,
7570 pcfg->n_auth);
7571 sbrec_rbac_permission_set_insert_delete(rbac_perm, pcfg->insdel);
7572 sbrec_rbac_permission_set_update(rbac_perm,
7573 pcfg->update,
7574 pcfg->n_update);
7575 sbrec_rbac_role_update_permissions_setkey(rbac_role, pcfg->table,
7576 rbac_perm);
7577 }
7578
7579 static void
7580 check_and_update_rbac(struct northd_context *ctx)
7581 {
7582 const struct sbrec_rbac_role *rbac_role = NULL;
7583 const struct sbrec_rbac_permission *perm_row, *perm_next;
7584 const struct sbrec_rbac_role *role_row, *role_row_next;
7585 struct rbac_perm_cfg *pcfg;
7586
7587 for (pcfg = rbac_perm_cfg; pcfg->table; pcfg++) {
7588 pcfg->row = NULL;
7589 }
7590
7591 SBREC_RBAC_PERMISSION_FOR_EACH_SAFE (perm_row, perm_next, ctx->ovnsb_idl) {
7592 if (!ovn_rbac_validate_perm(perm_row)) {
7593 sbrec_rbac_permission_delete(perm_row);
7594 }
7595 }
7596 SBREC_RBAC_ROLE_FOR_EACH_SAFE (role_row, role_row_next, ctx->ovnsb_idl) {
7597 if (strcmp(role_row->name, "ovn-controller")) {
7598 sbrec_rbac_role_delete(role_row);
7599 } else {
7600 rbac_role = role_row;
7601 }
7602 }
7603
7604 if (!rbac_role) {
7605 rbac_role = sbrec_rbac_role_insert(ctx->ovnsb_txn);
7606 sbrec_rbac_role_set_name(rbac_role, "ovn-controller");
7607 }
7608
7609 for (pcfg = rbac_perm_cfg; pcfg->table; pcfg++) {
7610 if (!pcfg->row) {
7611 ovn_rbac_create_perm(pcfg, ctx, rbac_role);
7612 }
7613 }
7614 }
7615
7616 /* Updates the sb_cfg and hv_cfg columns in the northbound NB_Global table. */
7617 static void
7618 update_northbound_cfg(struct northd_context *ctx,
7619 struct ovsdb_idl_loop *sb_loop)
7620 {
7621 /* Update northbound sb_cfg if appropriate. */
7622 const struct nbrec_nb_global *nbg = nbrec_nb_global_first(ctx->ovnnb_idl);
7623 int64_t sb_cfg = sb_loop->cur_cfg;
7624 if (nbg && sb_cfg && nbg->sb_cfg != sb_cfg) {
7625 nbrec_nb_global_set_sb_cfg(nbg, sb_cfg);
7626 }
7627
7628 /* Update northbound hv_cfg if appropriate. */
7629 if (nbg) {
7630 /* Find minimum nb_cfg among all chassis. */
7631 const struct sbrec_chassis *chassis;
7632 int64_t hv_cfg = nbg->nb_cfg;
7633 SBREC_CHASSIS_FOR_EACH (chassis, ctx->ovnsb_idl) {
7634 if (chassis->nb_cfg < hv_cfg) {
7635 hv_cfg = chassis->nb_cfg;
7636 }
7637 }
7638
7639 /* Update hv_cfg. */
7640 if (nbg->hv_cfg != hv_cfg) {
7641 nbrec_nb_global_set_hv_cfg(nbg, hv_cfg);
7642 }
7643 }
7644 }
7645
7646 /* Handle a fairly small set of changes in the southbound database. */
7647 static void
7648 ovnsb_db_run(struct northd_context *ctx, struct ovsdb_idl_loop *sb_loop)
7649 {
7650 if (!ctx->ovnnb_txn || !ovsdb_idl_has_ever_connected(ctx->ovnsb_idl)) {
7651 return;
7652 }
7653
7654 update_logical_port_status(ctx);
7655 update_northbound_cfg(ctx, sb_loop);
7656 }
7657 \f
7658 static void
7659 parse_options(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
7660 {
7661 enum {
7662 DAEMON_OPTION_ENUMS,
7663 VLOG_OPTION_ENUMS,
7664 SSL_OPTION_ENUMS,
7665 };
7666 static const struct option long_options[] = {
7667 {"ovnsb-db", required_argument, NULL, 'd'},
7668 {"ovnnb-db", required_argument, NULL, 'D'},
7669 {"unixctl", required_argument, NULL, 'u'},
7670 {"help", no_argument, NULL, 'h'},
7671 {"options", no_argument, NULL, 'o'},
7672 {"version", no_argument, NULL, 'V'},
7673 DAEMON_LONG_OPTIONS,
7674 VLOG_LONG_OPTIONS,
7675 STREAM_SSL_LONG_OPTIONS,
7676 {NULL, 0, NULL, 0},
7677 };
7678 char *short_options = ovs_cmdl_long_options_to_short_options(long_options);
7679
7680 for (;;) {
7681 int c;
7682
7683 c = getopt_long(argc, argv, short_options, long_options, NULL);
7684 if (c == -1) {
7685 break;
7686 }
7687
7688 switch (c) {
7689 DAEMON_OPTION_HANDLERS;
7690 VLOG_OPTION_HANDLERS;
7691 STREAM_SSL_OPTION_HANDLERS;
7692
7693 case 'd':
7694 ovnsb_db = optarg;
7695 break;
7696
7697 case 'D':
7698 ovnnb_db = optarg;
7699 break;
7700
7701 case 'u':
7702 unixctl_path = optarg;
7703 break;
7704
7705 case 'h':
7706 usage();
7707 exit(EXIT_SUCCESS);
7708
7709 case 'o':
7710 ovs_cmdl_print_options(long_options);
7711 exit(EXIT_SUCCESS);
7712
7713 case 'V':
7714 ovs_print_version(0, 0);
7715 exit(EXIT_SUCCESS);
7716
7717 default:
7718 break;
7719 }
7720 }
7721
7722 if (!ovnsb_db) {
7723 ovnsb_db = default_sb_db();
7724 }
7725
7726 if (!ovnnb_db) {
7727 ovnnb_db = default_nb_db();
7728 }
7729
7730 free(short_options);
7731 }
7732
7733 static void
7734 add_column_noalert(struct ovsdb_idl *idl,
7735 const struct ovsdb_idl_column *column)
7736 {
7737 ovsdb_idl_add_column(idl, column);
7738 ovsdb_idl_omit_alert(idl, column);
7739 }
7740
7741 int
7742 main(int argc, char *argv[])
7743 {
7744 int res = EXIT_SUCCESS;
7745 struct unixctl_server *unixctl;
7746 int retval;
7747 bool exiting;
7748
7749 fatal_ignore_sigpipe();
7750 ovs_cmdl_proctitle_init(argc, argv);
7751 set_program_name(argv[0]);
7752 service_start(&argc, &argv);
7753 parse_options(argc, argv);
7754
7755 daemonize_start(false);
7756
7757 retval = unixctl_server_create(unixctl_path, &unixctl);
7758 if (retval) {
7759 exit(EXIT_FAILURE);
7760 }
7761 unixctl_command_register("exit", "", 0, 0, ovn_northd_exit, &exiting);
7762
7763 daemonize_complete();
7764
7765 /* We want to detect (almost) all changes to the ovn-nb db. */
7766 struct ovsdb_idl_loop ovnnb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
7767 ovsdb_idl_create(ovnnb_db, &nbrec_idl_class, true, true));
7768 ovsdb_idl_omit_alert(ovnnb_idl_loop.idl, &nbrec_nb_global_col_sb_cfg);
7769 ovsdb_idl_omit_alert(ovnnb_idl_loop.idl, &nbrec_nb_global_col_hv_cfg);
7770
7771 /* We want to detect only selected changes to the ovn-sb db. */
7772 struct ovsdb_idl_loop ovnsb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
7773 ovsdb_idl_create(ovnsb_db, &sbrec_idl_class, false, true));
7774
7775 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_sb_global);
7776 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_sb_global_col_nb_cfg);
7777 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_sb_global_col_options);
7778 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_sb_global_col_ipsec);
7779
7780 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_logical_flow);
7781 add_column_noalert(ovnsb_idl_loop.idl,
7782 &sbrec_logical_flow_col_logical_datapath);
7783 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_pipeline);
7784 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_table_id);
7785 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_priority);
7786 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_match);
7787 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_actions);
7788
7789 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_multicast_group);
7790 add_column_noalert(ovnsb_idl_loop.idl,
7791 &sbrec_multicast_group_col_datapath);
7792 add_column_noalert(ovnsb_idl_loop.idl,
7793 &sbrec_multicast_group_col_tunnel_key);
7794 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_name);
7795 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_ports);
7796
7797 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_datapath_binding);
7798 add_column_noalert(ovnsb_idl_loop.idl,
7799 &sbrec_datapath_binding_col_tunnel_key);
7800 add_column_noalert(ovnsb_idl_loop.idl,
7801 &sbrec_datapath_binding_col_external_ids);
7802
7803 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_port_binding);
7804 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_datapath);
7805 add_column_noalert(ovnsb_idl_loop.idl,
7806 &sbrec_port_binding_col_logical_port);
7807 add_column_noalert(ovnsb_idl_loop.idl,
7808 &sbrec_port_binding_col_tunnel_key);
7809 add_column_noalert(ovnsb_idl_loop.idl,
7810 &sbrec_port_binding_col_parent_port);
7811 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_tag);
7812 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_type);
7813 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_options);
7814 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_mac);
7815 add_column_noalert(ovnsb_idl_loop.idl,
7816 &sbrec_port_binding_col_nat_addresses);
7817 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_port_binding_col_chassis);
7818 ovsdb_idl_add_column(ovnsb_idl_loop.idl,
7819 &sbrec_port_binding_col_gateway_chassis);
7820 ovsdb_idl_add_column(ovnsb_idl_loop.idl,
7821 &sbrec_gateway_chassis_col_chassis);
7822 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_gateway_chassis_col_name);
7823 ovsdb_idl_add_column(ovnsb_idl_loop.idl,
7824 &sbrec_gateway_chassis_col_priority);
7825 ovsdb_idl_add_column(ovnsb_idl_loop.idl,
7826 &sbrec_gateway_chassis_col_external_ids);
7827 ovsdb_idl_add_column(ovnsb_idl_loop.idl,
7828 &sbrec_gateway_chassis_col_options);
7829 add_column_noalert(ovnsb_idl_loop.idl,
7830 &sbrec_port_binding_col_external_ids);
7831 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_mac_binding);
7832 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_datapath);
7833 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_ip);
7834 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_mac);
7835 add_column_noalert(ovnsb_idl_loop.idl,
7836 &sbrec_mac_binding_col_logical_port);
7837 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_dhcp_options);
7838 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_code);
7839 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_type);
7840 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_name);
7841 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_dhcpv6_options);
7842 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_code);
7843 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_type);
7844 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_name);
7845 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_address_set);
7846 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_name);
7847 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_addresses);
7848 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_port_group);
7849 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_group_col_name);
7850 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_group_col_ports);
7851
7852 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_dns);
7853 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dns_col_datapaths);
7854 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dns_col_records);
7855 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dns_col_external_ids);
7856
7857 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_rbac_role);
7858 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_rbac_role_col_name);
7859 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_rbac_role_col_permissions);
7860
7861 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_rbac_permission);
7862 add_column_noalert(ovnsb_idl_loop.idl,
7863 &sbrec_rbac_permission_col_table);
7864 add_column_noalert(ovnsb_idl_loop.idl,
7865 &sbrec_rbac_permission_col_authorization);
7866 add_column_noalert(ovnsb_idl_loop.idl,
7867 &sbrec_rbac_permission_col_insert_delete);
7868 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_rbac_permission_col_update);
7869
7870 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_meter);
7871 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_meter_col_name);
7872 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_meter_col_unit);
7873 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_meter_col_bands);
7874
7875 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_meter_band);
7876 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_meter_band_col_action);
7877 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_meter_band_col_rate);
7878 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_meter_band_col_burst_size);
7879
7880 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_chassis);
7881 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_nb_cfg);
7882 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_name);
7883
7884 struct ovsdb_idl_index *sbrec_chassis_by_name
7885 = chassis_index_create(ovnsb_idl_loop.idl);
7886
7887 /* Ensure that only a single ovn-northd is active in the deployment by
7888 * acquiring a lock called "ovn_northd" on the southbound database
7889 * and then only performing DB transactions if the lock is held. */
7890 ovsdb_idl_set_lock(ovnsb_idl_loop.idl, "ovn_northd");
7891 bool had_lock = false;
7892
7893 /* Main loop. */
7894 exiting = false;
7895 while (!exiting) {
7896 struct northd_context ctx = {
7897 .ovnnb_idl = ovnnb_idl_loop.idl,
7898 .ovnnb_txn = ovsdb_idl_loop_run(&ovnnb_idl_loop),
7899 .ovnsb_idl = ovnsb_idl_loop.idl,
7900 .ovnsb_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop),
7901 };
7902
7903 if (!had_lock && ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) {
7904 VLOG_INFO("ovn-northd lock acquired. "
7905 "This ovn-northd instance is now active.");
7906 had_lock = true;
7907 } else if (had_lock && !ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) {
7908 VLOG_INFO("ovn-northd lock lost. "
7909 "This ovn-northd instance is now on standby.");
7910 had_lock = false;
7911 }
7912
7913 if (ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) {
7914 ovnnb_db_run(&ctx, sbrec_chassis_by_name, &ovnsb_idl_loop);
7915 ovnsb_db_run(&ctx, &ovnsb_idl_loop);
7916 if (ctx.ovnsb_txn) {
7917 check_and_add_supported_dhcp_opts_to_sb_db(&ctx);
7918 check_and_add_supported_dhcpv6_opts_to_sb_db(&ctx);
7919 check_and_update_rbac(&ctx);
7920 }
7921 }
7922
7923 unixctl_server_run(unixctl);
7924 unixctl_server_wait(unixctl);
7925 if (exiting) {
7926 poll_immediate_wake();
7927 }
7928 ovsdb_idl_loop_commit_and_wait(&ovnnb_idl_loop);
7929 ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);
7930
7931 poll_block();
7932 if (should_service_stop()) {
7933 exiting = true;
7934 }
7935 }
7936
7937 unixctl_server_destroy(unixctl);
7938 ovsdb_idl_loop_destroy(&ovnnb_idl_loop);
7939 ovsdb_idl_loop_destroy(&ovnsb_idl_loop);
7940 service_stop();
7941
7942 exit(res);
7943 }
7944
7945 static void
7946 ovn_northd_exit(struct unixctl_conn *conn, int argc OVS_UNUSED,
7947 const char *argv[] OVS_UNUSED, void *exiting_)
7948 {
7949 bool *exiting = exiting_;
7950 *exiting = true;
7951
7952 unixctl_command_reply(conn, NULL);
7953 }