]> git.proxmox.com Git - mirror_ovs.git/blob - ovn/northd/ovn-northd.c
OVN: configure L2 address according to the used IP address
[mirror_ovs.git] / ovn / northd / ovn-northd.c
1 /*
2 * Licensed under the Apache License, Version 2.0 (the "License");
3 * you may not use this file except in compliance with the License.
4 * You may obtain a copy of the License at:
5 *
6 * http://www.apache.org/licenses/LICENSE-2.0
7 *
8 * Unless required by applicable law or agreed to in writing, software
9 * distributed under the License is distributed on an "AS IS" BASIS,
10 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 * See the License for the specific language governing permissions and
12 * limitations under the License.
13 */
14
15 #include <config.h>
16
17 #include <getopt.h>
18 #include <stdlib.h>
19 #include <stdio.h>
20
21 #include "bitmap.h"
22 #include "command-line.h"
23 #include "daemon.h"
24 #include "dirs.h"
25 #include "openvswitch/dynamic-string.h"
26 #include "fatal-signal.h"
27 #include "hash.h"
28 #include "openvswitch/hmap.h"
29 #include "openvswitch/json.h"
30 #include "ovn/lex.h"
31 #include "ovn/lib/chassis-index.h"
32 #include "ovn/lib/logical-fields.h"
33 #include "ovn/lib/ovn-l7.h"
34 #include "ovn/lib/ovn-nb-idl.h"
35 #include "ovn/lib/ovn-sb-idl.h"
36 #include "ovn/lib/ovn-util.h"
37 #include "ovn/actions.h"
38 #include "packets.h"
39 #include "openvswitch/poll-loop.h"
40 #include "smap.h"
41 #include "sset.h"
42 #include "svec.h"
43 #include "stream.h"
44 #include "stream-ssl.h"
45 #include "unixctl.h"
46 #include "util.h"
47 #include "uuid.h"
48 #include "openvswitch/vlog.h"
49
50 VLOG_DEFINE_THIS_MODULE(ovn_northd);
51
52 static unixctl_cb_func ovn_northd_exit;
53
54 struct northd_context {
55 struct ovsdb_idl *ovnnb_idl;
56 struct ovsdb_idl *ovnsb_idl;
57 struct ovsdb_idl_txn *ovnnb_txn;
58 struct ovsdb_idl_txn *ovnsb_txn;
59 };
60
61 static const char *ovnnb_db;
62 static const char *ovnsb_db;
63 static const char *unixctl_path;
64
65 #define MAC_ADDR_PREFIX 0x0A0000000000ULL
66 #define MAC_ADDR_SPACE 0xffffff
67
68 /* MAC address management (macam) table of "struct eth_addr"s, that holds the
69 * MAC addresses allocated by the OVN ipam module. */
70 static struct hmap macam = HMAP_INITIALIZER(&macam);
71 static struct eth_addr mac_prefix;
72
73 #define MAX_OVN_TAGS 4096
74 \f
75 /* Pipeline stages. */
76
77 /* The two pipelines in an OVN logical flow table. */
78 enum ovn_pipeline {
79 P_IN, /* Ingress pipeline. */
80 P_OUT /* Egress pipeline. */
81 };
82
83 /* The two purposes for which ovn-northd uses OVN logical datapaths. */
84 enum ovn_datapath_type {
85 DP_SWITCH, /* OVN logical switch. */
86 DP_ROUTER /* OVN logical router. */
87 };
88
89 /* Returns an "enum ovn_stage" built from the arguments.
90 *
91 * (It's better to use ovn_stage_build() for type-safety reasons, but inline
92 * functions can't be used in enums or switch cases.) */
93 #define OVN_STAGE_BUILD(DP_TYPE, PIPELINE, TABLE) \
94 (((DP_TYPE) << 9) | ((PIPELINE) << 8) | (TABLE))
95
96 /* A stage within an OVN logical switch or router.
97 *
98 * An "enum ovn_stage" indicates whether the stage is part of a logical switch
99 * or router, whether the stage is part of the ingress or egress pipeline, and
100 * the table within that pipeline. The first three components are combined to
101 * form the stage's full name, e.g. S_SWITCH_IN_PORT_SEC_L2,
102 * S_ROUTER_OUT_DELIVERY. */
103 enum ovn_stage {
104 #define PIPELINE_STAGES \
105 /* Logical switch ingress stages. */ \
106 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_L2, 0, "ls_in_port_sec_l2") \
107 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_IP, 1, "ls_in_port_sec_ip") \
108 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_ND, 2, "ls_in_port_sec_nd") \
109 PIPELINE_STAGE(SWITCH, IN, PRE_ACL, 3, "ls_in_pre_acl") \
110 PIPELINE_STAGE(SWITCH, IN, PRE_LB, 4, "ls_in_pre_lb") \
111 PIPELINE_STAGE(SWITCH, IN, PRE_STATEFUL, 5, "ls_in_pre_stateful") \
112 PIPELINE_STAGE(SWITCH, IN, ACL, 6, "ls_in_acl") \
113 PIPELINE_STAGE(SWITCH, IN, QOS_MARK, 7, "ls_in_qos_mark") \
114 PIPELINE_STAGE(SWITCH, IN, QOS_METER, 8, "ls_in_qos_meter") \
115 PIPELINE_STAGE(SWITCH, IN, LB, 9, "ls_in_lb") \
116 PIPELINE_STAGE(SWITCH, IN, STATEFUL, 10, "ls_in_stateful") \
117 PIPELINE_STAGE(SWITCH, IN, ARP_ND_RSP, 11, "ls_in_arp_rsp") \
118 PIPELINE_STAGE(SWITCH, IN, DHCP_OPTIONS, 12, "ls_in_dhcp_options") \
119 PIPELINE_STAGE(SWITCH, IN, DHCP_RESPONSE, 13, "ls_in_dhcp_response") \
120 PIPELINE_STAGE(SWITCH, IN, DNS_LOOKUP, 14, "ls_in_dns_lookup") \
121 PIPELINE_STAGE(SWITCH, IN, DNS_RESPONSE, 15, "ls_in_dns_response") \
122 PIPELINE_STAGE(SWITCH, IN, L2_LKUP, 16, "ls_in_l2_lkup") \
123 \
124 /* Logical switch egress stages. */ \
125 PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 0, "ls_out_pre_lb") \
126 PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 1, "ls_out_pre_acl") \
127 PIPELINE_STAGE(SWITCH, OUT, PRE_STATEFUL, 2, "ls_out_pre_stateful") \
128 PIPELINE_STAGE(SWITCH, OUT, LB, 3, "ls_out_lb") \
129 PIPELINE_STAGE(SWITCH, OUT, ACL, 4, "ls_out_acl") \
130 PIPELINE_STAGE(SWITCH, OUT, QOS_MARK, 5, "ls_out_qos_mark") \
131 PIPELINE_STAGE(SWITCH, OUT, QOS_METER, 6, "ls_out_qos_meter") \
132 PIPELINE_STAGE(SWITCH, OUT, STATEFUL, 7, "ls_out_stateful") \
133 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_IP, 8, "ls_out_port_sec_ip") \
134 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_L2, 9, "ls_out_port_sec_l2") \
135 \
136 /* Logical router ingress stages. */ \
137 PIPELINE_STAGE(ROUTER, IN, ADMISSION, 0, "lr_in_admission") \
138 PIPELINE_STAGE(ROUTER, IN, IP_INPUT, 1, "lr_in_ip_input") \
139 PIPELINE_STAGE(ROUTER, IN, DEFRAG, 2, "lr_in_defrag") \
140 PIPELINE_STAGE(ROUTER, IN, UNSNAT, 3, "lr_in_unsnat") \
141 PIPELINE_STAGE(ROUTER, IN, DNAT, 4, "lr_in_dnat") \
142 PIPELINE_STAGE(ROUTER, IN, ND_RA_OPTIONS, 5, "lr_in_nd_ra_options") \
143 PIPELINE_STAGE(ROUTER, IN, ND_RA_RESPONSE, 6, "lr_in_nd_ra_response") \
144 PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 7, "lr_in_ip_routing") \
145 PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 8, "lr_in_arp_resolve") \
146 PIPELINE_STAGE(ROUTER, IN, GW_REDIRECT, 9, "lr_in_gw_redirect") \
147 PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 10, "lr_in_arp_request") \
148 \
149 /* Logical router egress stages. */ \
150 PIPELINE_STAGE(ROUTER, OUT, UNDNAT, 0, "lr_out_undnat") \
151 PIPELINE_STAGE(ROUTER, OUT, SNAT, 1, "lr_out_snat") \
152 PIPELINE_STAGE(ROUTER, OUT, EGR_LOOP, 2, "lr_out_egr_loop") \
153 PIPELINE_STAGE(ROUTER, OUT, DELIVERY, 3, "lr_out_delivery")
154
155 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
156 S_##DP_TYPE##_##PIPELINE##_##STAGE \
157 = OVN_STAGE_BUILD(DP_##DP_TYPE, P_##PIPELINE, TABLE),
158 PIPELINE_STAGES
159 #undef PIPELINE_STAGE
160 };
161
162 /* Due to various hard-coded priorities need to implement ACLs, the
163 * northbound database supports a smaller range of ACL priorities than
164 * are available to logical flows. This value is added to an ACL
165 * priority to determine the ACL's logical flow priority. */
166 #define OVN_ACL_PRI_OFFSET 1000
167
168 /* Register definitions specific to switches. */
169 #define REGBIT_CONNTRACK_DEFRAG "reg0[0]"
170 #define REGBIT_CONNTRACK_COMMIT "reg0[1]"
171 #define REGBIT_CONNTRACK_NAT "reg0[2]"
172 #define REGBIT_DHCP_OPTS_RESULT "reg0[3]"
173 #define REGBIT_DNS_LOOKUP_RESULT "reg0[4]"
174 #define REGBIT_ND_RA_OPTS_RESULT "reg0[5]"
175
176 /* Register definitions for switches and routers. */
177 #define REGBIT_NAT_REDIRECT "reg9[0]"
178 /* Indicate that this packet has been recirculated using egress
179 * loopback. This allows certain checks to be bypassed, such as a
180 * logical router dropping packets with source IP address equals
181 * one of the logical router's own IP addresses. */
182 #define REGBIT_EGRESS_LOOPBACK "reg9[1]"
183
184 /* Returns an "enum ovn_stage" built from the arguments. */
185 static enum ovn_stage
186 ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline,
187 uint8_t table)
188 {
189 return OVN_STAGE_BUILD(dp_type, pipeline, table);
190 }
191
192 /* Returns the pipeline to which 'stage' belongs. */
193 static enum ovn_pipeline
194 ovn_stage_get_pipeline(enum ovn_stage stage)
195 {
196 return (stage >> 8) & 1;
197 }
198
199 /* Returns the pipeline name to which 'stage' belongs. */
200 static const char *
201 ovn_stage_get_pipeline_name(enum ovn_stage stage)
202 {
203 return ovn_stage_get_pipeline(stage) == P_IN ? "ingress" : "egress";
204 }
205
206 /* Returns the table to which 'stage' belongs. */
207 static uint8_t
208 ovn_stage_get_table(enum ovn_stage stage)
209 {
210 return stage & 0xff;
211 }
212
213 /* Returns a string name for 'stage'. */
214 static const char *
215 ovn_stage_to_str(enum ovn_stage stage)
216 {
217 switch (stage) {
218 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
219 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return NAME;
220 PIPELINE_STAGES
221 #undef PIPELINE_STAGE
222 default: return "<unknown>";
223 }
224 }
225
226 /* Returns the type of the datapath to which a flow with the given 'stage' may
227 * be added. */
228 static enum ovn_datapath_type
229 ovn_stage_to_datapath_type(enum ovn_stage stage)
230 {
231 switch (stage) {
232 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
233 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return DP_##DP_TYPE;
234 PIPELINE_STAGES
235 #undef PIPELINE_STAGE
236 default: OVS_NOT_REACHED();
237 }
238 }
239 \f
240 static void
241 usage(void)
242 {
243 printf("\
244 %s: OVN northbound management daemon\n\
245 usage: %s [OPTIONS]\n\
246 \n\
247 Options:\n\
248 --ovnnb-db=DATABASE connect to ovn-nb database at DATABASE\n\
249 (default: %s)\n\
250 --ovnsb-db=DATABASE connect to ovn-sb database at DATABASE\n\
251 (default: %s)\n\
252 --unixctl=SOCKET override default control socket name\n\
253 -h, --help display this help message\n\
254 -o, --options list available options\n\
255 -V, --version display version information\n\
256 ", program_name, program_name, default_nb_db(), default_sb_db());
257 daemon_usage();
258 vlog_usage();
259 stream_usage("database", true, true, false);
260 }
261 \f
262 struct tnlid_node {
263 struct hmap_node hmap_node;
264 uint32_t tnlid;
265 };
266
267 static void
268 destroy_tnlids(struct hmap *tnlids)
269 {
270 struct tnlid_node *node;
271 HMAP_FOR_EACH_POP (node, hmap_node, tnlids) {
272 free(node);
273 }
274 hmap_destroy(tnlids);
275 }
276
277 static void
278 add_tnlid(struct hmap *set, uint32_t tnlid)
279 {
280 struct tnlid_node *node = xmalloc(sizeof *node);
281 hmap_insert(set, &node->hmap_node, hash_int(tnlid, 0));
282 node->tnlid = tnlid;
283 }
284
285 static bool
286 tnlid_in_use(const struct hmap *set, uint32_t tnlid)
287 {
288 const struct tnlid_node *node;
289 HMAP_FOR_EACH_IN_BUCKET (node, hmap_node, hash_int(tnlid, 0), set) {
290 if (node->tnlid == tnlid) {
291 return true;
292 }
293 }
294 return false;
295 }
296
297 static uint32_t
298 next_tnlid(uint32_t tnlid, uint32_t max)
299 {
300 return tnlid + 1 <= max ? tnlid + 1 : 1;
301 }
302
303 static uint32_t
304 allocate_tnlid(struct hmap *set, const char *name, uint32_t max,
305 uint32_t *hint)
306 {
307 for (uint32_t tnlid = next_tnlid(*hint, max); tnlid != *hint;
308 tnlid = next_tnlid(tnlid, max)) {
309 if (!tnlid_in_use(set, tnlid)) {
310 add_tnlid(set, tnlid);
311 *hint = tnlid;
312 return tnlid;
313 }
314 }
315
316 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
317 VLOG_WARN_RL(&rl, "all %s tunnel ids exhausted", name);
318 return 0;
319 }
320 \f
321 struct ovn_chassis_qdisc_queues {
322 struct hmap_node key_node;
323 uint32_t queue_id;
324 struct uuid chassis_uuid;
325 };
326
327 static uint32_t
328 hash_chassis_queue(const struct uuid *chassis_uuid, uint32_t queue_id)
329 {
330 return hash_2words(uuid_hash(chassis_uuid), queue_id);
331 }
332
333 static void
334 destroy_chassis_queues(struct hmap *set)
335 {
336 struct ovn_chassis_qdisc_queues *node;
337 HMAP_FOR_EACH_POP (node, key_node, set) {
338 free(node);
339 }
340 hmap_destroy(set);
341 }
342
343 static void
344 add_chassis_queue(struct hmap *set, struct uuid *chassis_uuid,
345 uint32_t queue_id)
346 {
347 struct ovn_chassis_qdisc_queues *node = xmalloc(sizeof *node);
348 node->queue_id = queue_id;
349 node->chassis_uuid = *chassis_uuid;
350 hmap_insert(set, &node->key_node,
351 hash_chassis_queue(chassis_uuid, queue_id));
352 }
353
354 static bool
355 chassis_queueid_in_use(const struct hmap *set, struct uuid *chassis_uuid,
356 uint32_t queue_id)
357 {
358 const struct ovn_chassis_qdisc_queues *node;
359 HMAP_FOR_EACH_WITH_HASH (node, key_node,
360 hash_chassis_queue(chassis_uuid, queue_id), set) {
361 if (uuid_equals(chassis_uuid, &node->chassis_uuid)
362 && node->queue_id == queue_id) {
363 return true;
364 }
365 }
366 return false;
367 }
368
369 static uint32_t
370 allocate_chassis_queueid(struct hmap *set, struct sbrec_chassis *chassis)
371 {
372 for (uint32_t queue_id = QDISC_MIN_QUEUE_ID + 1;
373 queue_id <= QDISC_MAX_QUEUE_ID;
374 queue_id++) {
375 if (!chassis_queueid_in_use(set, &chassis->header_.uuid, queue_id)) {
376 add_chassis_queue(set, &chassis->header_.uuid, queue_id);
377 return queue_id;
378 }
379 }
380
381 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
382 VLOG_WARN_RL(&rl, "all %s queue ids exhausted", chassis->name);
383 return 0;
384 }
385
386 static void
387 free_chassis_queueid(struct hmap *set, struct sbrec_chassis *chassis,
388 uint32_t queue_id)
389 {
390 const struct uuid *chassis_uuid = &chassis->header_.uuid;
391 struct ovn_chassis_qdisc_queues *node;
392 HMAP_FOR_EACH_WITH_HASH (node, key_node,
393 hash_chassis_queue(chassis_uuid, queue_id), set) {
394 if (uuid_equals(chassis_uuid, &node->chassis_uuid)
395 && node->queue_id == queue_id) {
396 hmap_remove(set, &node->key_node);
397 free(node);
398 break;
399 }
400 }
401 }
402
403 static inline bool
404 port_has_qos_params(const struct smap *opts)
405 {
406 return (smap_get(opts, "qos_max_rate") ||
407 smap_get(opts, "qos_burst"));
408 }
409 \f
410
411 struct ipam_info {
412 uint32_t start_ipv4;
413 size_t total_ipv4s;
414 unsigned long *allocated_ipv4s; /* A bitmap of allocated IPv4s */
415 bool ipv6_prefix_set;
416 struct in6_addr ipv6_prefix;
417 };
418
419 /* The 'key' comes from nbs->header_.uuid or nbr->header_.uuid or
420 * sb->external_ids:logical-switch. */
421 struct ovn_datapath {
422 struct hmap_node key_node; /* Index on 'key'. */
423 struct uuid key; /* (nbs/nbr)->header_.uuid. */
424
425 const struct nbrec_logical_switch *nbs; /* May be NULL. */
426 const struct nbrec_logical_router *nbr; /* May be NULL. */
427 const struct sbrec_datapath_binding *sb; /* May be NULL. */
428
429 struct ovs_list list; /* In list of similar records. */
430
431 /* Logical switch data. */
432 struct ovn_port **router_ports;
433 size_t n_router_ports;
434
435 struct hmap port_tnlids;
436 uint32_t port_key_hint;
437
438 bool has_unknown;
439
440 /* IPAM data. */
441 struct ipam_info ipam_info;
442
443 /* OVN northd only needs to know about the logical router gateway port for
444 * NAT on a distributed router. This "distributed gateway port" is
445 * populated only when there is a "redirect-chassis" specified for one of
446 * the ports on the logical router. Otherwise this will be NULL. */
447 struct ovn_port *l3dgw_port;
448 /* The "derived" OVN port representing the instance of l3dgw_port on
449 * the "redirect-chassis". */
450 struct ovn_port *l3redirect_port;
451 struct ovn_port *localnet_port;
452
453 /* Port groups related to the datapath, used only when nbs is NOT NULL. */
454 struct hmap nb_pgs;
455 };
456
457 struct macam_node {
458 struct hmap_node hmap_node;
459 struct eth_addr mac_addr; /* Allocated MAC address. */
460 };
461
462 static void
463 cleanup_macam(struct hmap *macam_)
464 {
465 struct macam_node *node;
466 HMAP_FOR_EACH_POP (node, hmap_node, macam_) {
467 free(node);
468 }
469 }
470
471 static struct ovn_datapath *
472 ovn_datapath_create(struct hmap *datapaths, const struct uuid *key,
473 const struct nbrec_logical_switch *nbs,
474 const struct nbrec_logical_router *nbr,
475 const struct sbrec_datapath_binding *sb)
476 {
477 struct ovn_datapath *od = xzalloc(sizeof *od);
478 od->key = *key;
479 od->sb = sb;
480 od->nbs = nbs;
481 od->nbr = nbr;
482 hmap_init(&od->port_tnlids);
483 hmap_init(&od->nb_pgs);
484 od->port_key_hint = 0;
485 hmap_insert(datapaths, &od->key_node, uuid_hash(&od->key));
486 return od;
487 }
488
489 static void ovn_ls_port_group_destroy(struct hmap *nb_pgs);
490
491 static void
492 ovn_datapath_destroy(struct hmap *datapaths, struct ovn_datapath *od)
493 {
494 if (od) {
495 /* Don't remove od->list. It is used within build_datapaths() as a
496 * private list and once we've exited that function it is not safe to
497 * use it. */
498 hmap_remove(datapaths, &od->key_node);
499 destroy_tnlids(&od->port_tnlids);
500 bitmap_free(od->ipam_info.allocated_ipv4s);
501 free(od->router_ports);
502 ovn_ls_port_group_destroy(&od->nb_pgs);
503 free(od);
504 }
505 }
506
507 /* Returns 'od''s datapath type. */
508 static enum ovn_datapath_type
509 ovn_datapath_get_type(const struct ovn_datapath *od)
510 {
511 return od->nbs ? DP_SWITCH : DP_ROUTER;
512 }
513
514 static struct ovn_datapath *
515 ovn_datapath_find(struct hmap *datapaths, const struct uuid *uuid)
516 {
517 struct ovn_datapath *od;
518
519 HMAP_FOR_EACH_WITH_HASH (od, key_node, uuid_hash(uuid), datapaths) {
520 if (uuid_equals(uuid, &od->key)) {
521 return od;
522 }
523 }
524 return NULL;
525 }
526
527 static struct ovn_datapath *
528 ovn_datapath_from_sbrec(struct hmap *datapaths,
529 const struct sbrec_datapath_binding *sb)
530 {
531 struct uuid key;
532
533 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
534 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
535 return NULL;
536 }
537 return ovn_datapath_find(datapaths, &key);
538 }
539
540 static bool
541 lrouter_is_enabled(const struct nbrec_logical_router *lrouter)
542 {
543 return !lrouter->enabled || *lrouter->enabled;
544 }
545
546 static void
547 init_ipam_info_for_datapath(struct ovn_datapath *od)
548 {
549 if (!od->nbs) {
550 return;
551 }
552
553 const char *subnet_str = smap_get(&od->nbs->other_config, "subnet");
554 const char *ipv6_prefix = smap_get(&od->nbs->other_config, "ipv6_prefix");
555
556 if (ipv6_prefix) {
557 od->ipam_info.ipv6_prefix_set = ipv6_parse(
558 ipv6_prefix, &od->ipam_info.ipv6_prefix);
559 }
560
561 if (!subnet_str) {
562 return;
563 }
564
565 ovs_be32 subnet, mask;
566 char *error = ip_parse_masked(subnet_str, &subnet, &mask);
567 if (error || mask == OVS_BE32_MAX || !ip_is_cidr(mask)) {
568 static struct vlog_rate_limit rl
569 = VLOG_RATE_LIMIT_INIT(5, 1);
570 VLOG_WARN_RL(&rl, "bad 'subnet' %s", subnet_str);
571 free(error);
572 return;
573 }
574
575 od->ipam_info.start_ipv4 = ntohl(subnet) + 1;
576 od->ipam_info.total_ipv4s = ~ntohl(mask);
577 od->ipam_info.allocated_ipv4s =
578 bitmap_allocate(od->ipam_info.total_ipv4s);
579
580 /* Mark first IP as taken */
581 bitmap_set1(od->ipam_info.allocated_ipv4s, 0);
582
583 /* Check if there are any reserver IPs (list) to be excluded from IPAM */
584 const char *exclude_ip_list = smap_get(&od->nbs->other_config,
585 "exclude_ips");
586 if (!exclude_ip_list) {
587 return;
588 }
589
590 struct lexer lexer;
591 lexer_init(&lexer, exclude_ip_list);
592 /* exclude_ip_list could be in the format -
593 * "10.0.0.4 10.0.0.10 10.0.0.20..10.0.0.50 10.0.0.100..10.0.0.110".
594 */
595 lexer_get(&lexer);
596 while (lexer.token.type != LEX_T_END) {
597 if (lexer.token.type != LEX_T_INTEGER) {
598 lexer_syntax_error(&lexer, "expecting address");
599 break;
600 }
601 uint32_t start = ntohl(lexer.token.value.ipv4);
602 lexer_get(&lexer);
603
604 uint32_t end = start + 1;
605 if (lexer_match(&lexer, LEX_T_ELLIPSIS)) {
606 if (lexer.token.type != LEX_T_INTEGER) {
607 lexer_syntax_error(&lexer, "expecting address range");
608 break;
609 }
610 end = ntohl(lexer.token.value.ipv4) + 1;
611 lexer_get(&lexer);
612 }
613
614 /* Clamp start...end to fit the subnet. */
615 start = MAX(od->ipam_info.start_ipv4, start);
616 end = MIN(od->ipam_info.start_ipv4 + od->ipam_info.total_ipv4s, end);
617 if (end > start) {
618 bitmap_set_multiple(od->ipam_info.allocated_ipv4s,
619 start - od->ipam_info.start_ipv4,
620 end - start, 1);
621 } else {
622 lexer_error(&lexer, "excluded addresses not in subnet");
623 }
624 }
625 if (lexer.error) {
626 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
627 VLOG_WARN_RL(&rl, "logical switch "UUID_FMT": bad exclude_ips (%s)",
628 UUID_ARGS(&od->key), lexer.error);
629 }
630 lexer_destroy(&lexer);
631 }
632
633 static void
634 ovn_datapath_update_external_ids(struct ovn_datapath *od)
635 {
636 /* Get the logical-switch or logical-router UUID to set in
637 * external-ids. */
638 char uuid_s[UUID_LEN + 1];
639 sprintf(uuid_s, UUID_FMT, UUID_ARGS(&od->key));
640 const char *key = od->nbs ? "logical-switch" : "logical-router";
641
642 /* Get names to set in external-ids. */
643 const char *name = od->nbs ? od->nbs->name : od->nbr->name;
644 const char *name2 = (od->nbs
645 ? smap_get(&od->nbs->external_ids,
646 "neutron:network_name")
647 : smap_get(&od->nbr->external_ids,
648 "neutron:router_name"));
649
650 /* Set external-ids. */
651 struct smap ids = SMAP_INITIALIZER(&ids);
652 smap_add(&ids, key, uuid_s);
653 smap_add(&ids, "name", name);
654 if (name2 && name2[0]) {
655 smap_add(&ids, "name2", name2);
656 }
657 sbrec_datapath_binding_set_external_ids(od->sb, &ids);
658 smap_destroy(&ids);
659 }
660
661 static void
662 join_datapaths(struct northd_context *ctx, struct hmap *datapaths,
663 struct ovs_list *sb_only, struct ovs_list *nb_only,
664 struct ovs_list *both)
665 {
666 hmap_init(datapaths);
667 ovs_list_init(sb_only);
668 ovs_list_init(nb_only);
669 ovs_list_init(both);
670
671 const struct sbrec_datapath_binding *sb, *sb_next;
672 SBREC_DATAPATH_BINDING_FOR_EACH_SAFE (sb, sb_next, ctx->ovnsb_idl) {
673 struct uuid key;
674 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
675 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
676 ovsdb_idl_txn_add_comment(
677 ctx->ovnsb_txn,
678 "deleting Datapath_Binding "UUID_FMT" that lacks "
679 "external-ids:logical-switch and "
680 "external-ids:logical-router",
681 UUID_ARGS(&sb->header_.uuid));
682 sbrec_datapath_binding_delete(sb);
683 continue;
684 }
685
686 if (ovn_datapath_find(datapaths, &key)) {
687 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
688 VLOG_INFO_RL(
689 &rl, "deleting Datapath_Binding "UUID_FMT" with "
690 "duplicate external-ids:logical-switch/router "UUID_FMT,
691 UUID_ARGS(&sb->header_.uuid), UUID_ARGS(&key));
692 sbrec_datapath_binding_delete(sb);
693 continue;
694 }
695
696 struct ovn_datapath *od = ovn_datapath_create(datapaths, &key,
697 NULL, NULL, sb);
698 ovs_list_push_back(sb_only, &od->list);
699 }
700
701 const struct nbrec_logical_switch *nbs;
702 NBREC_LOGICAL_SWITCH_FOR_EACH (nbs, ctx->ovnnb_idl) {
703 struct ovn_datapath *od = ovn_datapath_find(datapaths,
704 &nbs->header_.uuid);
705 if (od) {
706 od->nbs = nbs;
707 ovs_list_remove(&od->list);
708 ovs_list_push_back(both, &od->list);
709 ovn_datapath_update_external_ids(od);
710 } else {
711 od = ovn_datapath_create(datapaths, &nbs->header_.uuid,
712 nbs, NULL, NULL);
713 ovs_list_push_back(nb_only, &od->list);
714 }
715
716 init_ipam_info_for_datapath(od);
717 }
718
719 const struct nbrec_logical_router *nbr;
720 NBREC_LOGICAL_ROUTER_FOR_EACH (nbr, ctx->ovnnb_idl) {
721 if (!lrouter_is_enabled(nbr)) {
722 continue;
723 }
724
725 struct ovn_datapath *od = ovn_datapath_find(datapaths,
726 &nbr->header_.uuid);
727 if (od) {
728 if (!od->nbs) {
729 od->nbr = nbr;
730 ovs_list_remove(&od->list);
731 ovs_list_push_back(both, &od->list);
732 ovn_datapath_update_external_ids(od);
733 } else {
734 /* Can't happen! */
735 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
736 VLOG_WARN_RL(&rl,
737 "duplicate UUID "UUID_FMT" in OVN_Northbound",
738 UUID_ARGS(&nbr->header_.uuid));
739 continue;
740 }
741 } else {
742 od = ovn_datapath_create(datapaths, &nbr->header_.uuid,
743 NULL, nbr, NULL);
744 ovs_list_push_back(nb_only, &od->list);
745 }
746 }
747 }
748
749 static uint32_t
750 ovn_datapath_allocate_key(struct hmap *dp_tnlids)
751 {
752 static uint32_t hint;
753 return allocate_tnlid(dp_tnlids, "datapath", (1u << 24) - 1, &hint);
754 }
755
756 /* Updates the southbound Datapath_Binding table so that it contains the
757 * logical switches and routers specified by the northbound database.
758 *
759 * Initializes 'datapaths' to contain a "struct ovn_datapath" for every logical
760 * switch and router. */
761 static void
762 build_datapaths(struct northd_context *ctx, struct hmap *datapaths)
763 {
764 struct ovs_list sb_only, nb_only, both;
765
766 join_datapaths(ctx, datapaths, &sb_only, &nb_only, &both);
767
768 if (!ovs_list_is_empty(&nb_only)) {
769 /* First index the in-use datapath tunnel IDs. */
770 struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
771 struct ovn_datapath *od;
772 LIST_FOR_EACH (od, list, &both) {
773 add_tnlid(&dp_tnlids, od->sb->tunnel_key);
774 }
775
776 /* Add southbound record for each unmatched northbound record. */
777 LIST_FOR_EACH (od, list, &nb_only) {
778 uint16_t tunnel_key = ovn_datapath_allocate_key(&dp_tnlids);
779 if (!tunnel_key) {
780 break;
781 }
782
783 od->sb = sbrec_datapath_binding_insert(ctx->ovnsb_txn);
784 ovn_datapath_update_external_ids(od);
785 sbrec_datapath_binding_set_tunnel_key(od->sb, tunnel_key);
786 }
787 destroy_tnlids(&dp_tnlids);
788 }
789
790 /* Delete southbound records without northbound matches. */
791 struct ovn_datapath *od, *next;
792 LIST_FOR_EACH_SAFE (od, next, list, &sb_only) {
793 ovs_list_remove(&od->list);
794 sbrec_datapath_binding_delete(od->sb);
795 ovn_datapath_destroy(datapaths, od);
796 }
797 }
798 \f
799 struct ovn_port {
800 struct hmap_node key_node; /* Index on 'key'. */
801 char *key; /* nbs->name, nbr->name, sb->logical_port. */
802 char *json_key; /* 'key', quoted for use in JSON. */
803
804 const struct sbrec_port_binding *sb; /* May be NULL. */
805
806 /* Logical switch port data. */
807 const struct nbrec_logical_switch_port *nbsp; /* May be NULL. */
808
809 struct lport_addresses *lsp_addrs; /* Logical switch port addresses. */
810 unsigned int n_lsp_addrs;
811
812 struct lport_addresses *ps_addrs; /* Port security addresses. */
813 unsigned int n_ps_addrs;
814
815 /* Logical router port data. */
816 const struct nbrec_logical_router_port *nbrp; /* May be NULL. */
817
818 struct lport_addresses lrp_networks;
819
820 bool derived; /* Indicates whether this is an additional port
821 * derived from nbsp or nbrp. */
822
823 /* The port's peer:
824 *
825 * - A switch port S of type "router" has a router port R as a peer,
826 * and R in turn has S has its peer.
827 *
828 * - Two connected logical router ports have each other as peer. */
829 struct ovn_port *peer;
830
831 struct ovn_datapath *od;
832
833 struct ovs_list list; /* In list of similar records. */
834 };
835
836 static struct ovn_port *
837 ovn_port_create(struct hmap *ports, const char *key,
838 const struct nbrec_logical_switch_port *nbsp,
839 const struct nbrec_logical_router_port *nbrp,
840 const struct sbrec_port_binding *sb)
841 {
842 struct ovn_port *op = xzalloc(sizeof *op);
843
844 struct ds json_key = DS_EMPTY_INITIALIZER;
845 json_string_escape(key, &json_key);
846 op->json_key = ds_steal_cstr(&json_key);
847
848 op->key = xstrdup(key);
849 op->sb = sb;
850 op->nbsp = nbsp;
851 op->nbrp = nbrp;
852 op->derived = false;
853 hmap_insert(ports, &op->key_node, hash_string(op->key, 0));
854 return op;
855 }
856
857 static void
858 ovn_port_destroy(struct hmap *ports, struct ovn_port *port)
859 {
860 if (port) {
861 /* Don't remove port->list. It is used within build_ports() as a
862 * private list and once we've exited that function it is not safe to
863 * use it. */
864 hmap_remove(ports, &port->key_node);
865
866 for (int i = 0; i < port->n_lsp_addrs; i++) {
867 destroy_lport_addresses(&port->lsp_addrs[i]);
868 }
869 free(port->lsp_addrs);
870
871 for (int i = 0; i < port->n_ps_addrs; i++) {
872 destroy_lport_addresses(&port->ps_addrs[i]);
873 }
874 free(port->ps_addrs);
875
876 destroy_lport_addresses(&port->lrp_networks);
877 free(port->json_key);
878 free(port->key);
879 free(port);
880 }
881 }
882
883 static struct ovn_port *
884 ovn_port_find(struct hmap *ports, const char *name)
885 {
886 struct ovn_port *op;
887
888 HMAP_FOR_EACH_WITH_HASH (op, key_node, hash_string(name, 0), ports) {
889 if (!strcmp(op->key, name)) {
890 return op;
891 }
892 }
893 return NULL;
894 }
895
896 static uint32_t
897 ovn_port_allocate_key(struct ovn_datapath *od)
898 {
899 return allocate_tnlid(&od->port_tnlids, "port",
900 (1u << 15) - 1, &od->port_key_hint);
901 }
902
903 static char *
904 chassis_redirect_name(const char *port_name)
905 {
906 return xasprintf("cr-%s", port_name);
907 }
908
909 static bool
910 ipam_is_duplicate_mac(struct eth_addr *ea, uint64_t mac64, bool warn)
911 {
912 struct macam_node *macam_node;
913 HMAP_FOR_EACH_WITH_HASH (macam_node, hmap_node, hash_uint64(mac64),
914 &macam) {
915 if (eth_addr_equals(*ea, macam_node->mac_addr)) {
916 if (warn) {
917 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
918 VLOG_WARN_RL(&rl, "Duplicate MAC set: "ETH_ADDR_FMT,
919 ETH_ADDR_ARGS(macam_node->mac_addr));
920 }
921 return true;
922 }
923 }
924 return false;
925 }
926
927 static void
928 ipam_insert_mac(struct eth_addr *ea, bool check)
929 {
930 if (!ea) {
931 return;
932 }
933
934 uint64_t mac64 = eth_addr_to_uint64(*ea);
935 uint64_t prefix;
936
937 if (!eth_addr_is_zero(mac_prefix)) {
938 prefix = eth_addr_to_uint64(mac_prefix);
939 } else {
940 prefix = MAC_ADDR_PREFIX;
941 }
942 /* If the new MAC was not assigned by this address management system or
943 * check is true and the new MAC is a duplicate, do not insert it into the
944 * macam hmap. */
945 if (((mac64 ^ prefix) >> 24)
946 || (check && ipam_is_duplicate_mac(ea, mac64, true))) {
947 return;
948 }
949
950 struct macam_node *new_macam_node = xmalloc(sizeof *new_macam_node);
951 new_macam_node->mac_addr = *ea;
952 hmap_insert(&macam, &new_macam_node->hmap_node, hash_uint64(mac64));
953 }
954
955 static void
956 ipam_insert_ip(struct ovn_datapath *od, uint32_t ip)
957 {
958 if (!od || !od->ipam_info.allocated_ipv4s) {
959 return;
960 }
961
962 if (ip >= od->ipam_info.start_ipv4 &&
963 ip < (od->ipam_info.start_ipv4 + od->ipam_info.total_ipv4s)) {
964 if (bitmap_is_set(od->ipam_info.allocated_ipv4s,
965 ip - od->ipam_info.start_ipv4)) {
966 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
967 VLOG_WARN_RL(&rl, "Duplicate IP set on switch %s: "IP_FMT,
968 od->nbs->name, IP_ARGS(htonl(ip)));
969 }
970 bitmap_set1(od->ipam_info.allocated_ipv4s,
971 ip - od->ipam_info.start_ipv4);
972 }
973 }
974
975 static void
976 ipam_insert_lsp_addresses(struct ovn_datapath *od, struct ovn_port *op,
977 char *address)
978 {
979 if (!od || !op || !address || !strcmp(address, "unknown")
980 || !strcmp(address, "router") || is_dynamic_lsp_address(address)) {
981 return;
982 }
983
984 struct lport_addresses laddrs;
985 if (!extract_lsp_addresses(address, &laddrs)) {
986 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
987 VLOG_WARN_RL(&rl, "Extract addresses failed.");
988 return;
989 }
990 ipam_insert_mac(&laddrs.ea, true);
991
992 /* IP is only added to IPAM if the switch's subnet option
993 * is set, whereas MAC is always added to MACAM. */
994 if (!od->ipam_info.allocated_ipv4s) {
995 destroy_lport_addresses(&laddrs);
996 return;
997 }
998
999 for (size_t j = 0; j < laddrs.n_ipv4_addrs; j++) {
1000 uint32_t ip = ntohl(laddrs.ipv4_addrs[j].addr);
1001 ipam_insert_ip(od, ip);
1002 }
1003
1004 destroy_lport_addresses(&laddrs);
1005 }
1006
1007 static void
1008 ipam_add_port_addresses(struct ovn_datapath *od, struct ovn_port *op)
1009 {
1010 if (!od || !op) {
1011 return;
1012 }
1013
1014 if (op->nbsp) {
1015 /* Add all the port's addresses to address data structures. */
1016 for (size_t i = 0; i < op->nbsp->n_addresses; i++) {
1017 ipam_insert_lsp_addresses(od, op, op->nbsp->addresses[i]);
1018 }
1019 } else if (op->nbrp) {
1020 struct lport_addresses lrp_networks;
1021 if (!extract_lrp_networks(op->nbrp, &lrp_networks)) {
1022 static struct vlog_rate_limit rl
1023 = VLOG_RATE_LIMIT_INIT(1, 1);
1024 VLOG_WARN_RL(&rl, "Extract addresses failed.");
1025 return;
1026 }
1027 ipam_insert_mac(&lrp_networks.ea, true);
1028
1029 if (!op->peer || !op->peer->nbsp || !op->peer->od || !op->peer->od->nbs
1030 || !smap_get(&op->peer->od->nbs->other_config, "subnet")) {
1031 destroy_lport_addresses(&lrp_networks);
1032 return;
1033 }
1034
1035 for (size_t i = 0; i < lrp_networks.n_ipv4_addrs; i++) {
1036 uint32_t ip = ntohl(lrp_networks.ipv4_addrs[i].addr);
1037 ipam_insert_ip(op->peer->od, ip);
1038 }
1039
1040 destroy_lport_addresses(&lrp_networks);
1041 }
1042 }
1043
1044 static uint64_t
1045 ipam_get_unused_mac(ovs_be32 ip)
1046 {
1047 uint32_t mac_addr_suffix, i, base_addr = ntohl(ip) & MAC_ADDR_SPACE;
1048 struct eth_addr mac;
1049 uint64_t mac64;
1050
1051 for (i = 0; i < MAC_ADDR_SPACE - 1; i++) {
1052 /* The tentative MAC's suffix will be in the interval (1, 0xfffffe). */
1053 mac_addr_suffix = ((base_addr + i) % (MAC_ADDR_SPACE - 1)) + 1;
1054 if (!eth_addr_is_zero(mac_prefix)) {
1055 mac64 = eth_addr_to_uint64(mac_prefix) | mac_addr_suffix;
1056 } else {
1057 mac64 = MAC_ADDR_PREFIX | mac_addr_suffix;
1058 }
1059 eth_addr_from_uint64(mac64, &mac);
1060 if (!ipam_is_duplicate_mac(&mac, mac64, true)) {
1061 break;
1062 }
1063 }
1064
1065 if (i == MAC_ADDR_SPACE) {
1066 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1067 VLOG_WARN_RL(&rl, "MAC address space exhausted.");
1068 mac64 = 0;
1069 }
1070
1071 return mac64;
1072 }
1073
1074 static uint32_t
1075 ipam_get_unused_ip(struct ovn_datapath *od)
1076 {
1077 if (!od || !od->ipam_info.allocated_ipv4s) {
1078 return 0;
1079 }
1080
1081 size_t new_ip_index = bitmap_scan(od->ipam_info.allocated_ipv4s, 0, 0,
1082 od->ipam_info.total_ipv4s - 1);
1083 if (new_ip_index == od->ipam_info.total_ipv4s - 1) {
1084 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1085 VLOG_WARN_RL( &rl, "Subnet address space has been exhausted.");
1086 return 0;
1087 }
1088
1089 return od->ipam_info.start_ipv4 + new_ip_index;
1090 }
1091
1092 enum dynamic_update_type {
1093 NONE, /* No change to the address */
1094 REMOVE, /* Address is no longer dynamic */
1095 STATIC, /* Use static address (MAC only) */
1096 DYNAMIC, /* Assign a new dynamic address */
1097 };
1098
1099 struct dynamic_address_update {
1100 struct ovs_list node; /* In build_ipam()'s list of updates. */
1101
1102 struct ovn_datapath *od;
1103 struct ovn_port *op;
1104
1105 struct lport_addresses current_addresses;
1106 struct eth_addr static_mac;
1107 enum dynamic_update_type mac;
1108 enum dynamic_update_type ipv4;
1109 enum dynamic_update_type ipv6;
1110 };
1111
1112 static enum dynamic_update_type
1113 dynamic_mac_changed(const char *lsp_addresses,
1114 struct dynamic_address_update *update)
1115 {
1116 struct eth_addr ea;
1117
1118 if (ovs_scan(lsp_addresses, ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(ea))) {
1119 if (eth_addr_equals(ea, update->current_addresses.ea)) {
1120 return NONE;
1121 } else {
1122 /* MAC is still static, but it has changed */
1123 update->static_mac = ea;
1124 return STATIC;
1125 }
1126 }
1127
1128 uint64_t mac64 = eth_addr_to_uint64(update->current_addresses.ea);
1129 uint64_t prefix;
1130
1131 if (!eth_addr_is_zero(mac_prefix)) {
1132 prefix = eth_addr_to_uint64(mac_prefix);
1133 } else {
1134 prefix = MAC_ADDR_PREFIX;
1135 }
1136
1137 if ((mac64 ^ prefix) >> 24) {
1138 return DYNAMIC;
1139 } else {
1140 return NONE;
1141 }
1142 }
1143
1144 static enum dynamic_update_type
1145 dynamic_ip4_changed(struct dynamic_address_update *update)
1146 {
1147 const struct ipam_info *ipam = &update->op->od->ipam_info;
1148 const struct lport_addresses *cur_addresses = &update->current_addresses;
1149 bool dynamic_ip4 = ipam->allocated_ipv4s != NULL;
1150
1151 if (!dynamic_ip4) {
1152 if (update->current_addresses.n_ipv4_addrs) {
1153 return REMOVE;
1154 } else {
1155 return NONE;
1156 }
1157 }
1158
1159 if (!cur_addresses->n_ipv4_addrs) {
1160 /* IPv4 was previously static but now is dynamic */
1161 return DYNAMIC;
1162 }
1163
1164 uint32_t ip4 = ntohl(cur_addresses->ipv4_addrs[0].addr);
1165 if (ip4 < ipam->start_ipv4) {
1166 return DYNAMIC;
1167 }
1168
1169 uint32_t index = ip4 - ipam->start_ipv4;
1170 if (index > ipam->total_ipv4s ||
1171 bitmap_is_set(ipam->allocated_ipv4s, index)) {
1172 /* Previously assigned dynamic IPv4 address can no longer be used.
1173 * It's either outside the subnet, conflicts with an excluded IP,
1174 * or conflicts with a statically-assigned address on the switch
1175 */
1176 return DYNAMIC;
1177 } else {
1178 return NONE;
1179 }
1180 }
1181
1182 static enum dynamic_update_type
1183 dynamic_ip6_changed(struct dynamic_address_update *update)
1184 {
1185 bool dynamic_ip6 = update->op->od->ipam_info.ipv6_prefix_set;
1186
1187 if (!dynamic_ip6) {
1188 if (update->current_addresses.n_ipv6_addrs) {
1189 /* IPv6 was dynamic but now is not */
1190 return REMOVE;
1191 } else {
1192 /* IPv6 has never been dynamic */
1193 return NONE;
1194 }
1195 }
1196
1197 if (update->mac != NONE) {
1198 /* IPv6 address is based on MAC, so if MAC has been updated,
1199 * then we have to update IPv6 address too.
1200 */
1201 return DYNAMIC;
1202 }
1203
1204 if (!update->current_addresses.n_ipv6_addrs) {
1205 /* IPv6 was previously static but now is dynamic */
1206 return DYNAMIC;
1207 }
1208
1209 struct in6_addr masked = ipv6_addr_bitand(
1210 &update->current_addresses.ipv6_addrs[0].addr,
1211 &update->op->od->ipam_info.ipv6_prefix);
1212 if (!IN6_ARE_ADDR_EQUAL(&masked, &update->op->od->ipam_info.ipv6_prefix)) {
1213 return DYNAMIC;
1214 }
1215
1216 return NONE;
1217 }
1218
1219 /* Check previously assigned dynamic addresses for validity. This will
1220 * check if the assigned addresses need to change.
1221 *
1222 * Returns true if any changes to dynamic addresses are required
1223 */
1224 static bool
1225 dynamic_addresses_check_for_updates(const char *lsp_addrs,
1226 struct dynamic_address_update *update)
1227 {
1228 update->mac = dynamic_mac_changed(lsp_addrs, update);
1229 update->ipv4 = dynamic_ip4_changed(update);
1230 update->ipv6 = dynamic_ip6_changed(update);
1231 if (update->mac == NONE &&
1232 update->ipv4 == NONE &&
1233 update->ipv6 == NONE) {
1234 return false;
1235 } else {
1236 return true;
1237 }
1238 }
1239
1240 /* For addresses that do not need to be updated, go ahead and insert them
1241 * into IPAM. This way, their addresses will be claimed and cannot be assigned
1242 * elsewhere later.
1243 */
1244 static void
1245 update_unchanged_dynamic_addresses(struct dynamic_address_update *update)
1246 {
1247 if (update->mac == NONE) {
1248 ipam_insert_mac(&update->current_addresses.ea, false);
1249 }
1250 if (update->ipv4 == NONE && update->current_addresses.n_ipv4_addrs) {
1251 ipam_insert_ip(update->op->od,
1252 ntohl(update->current_addresses.ipv4_addrs[0].addr));
1253 }
1254 }
1255
1256 static void
1257 set_lsp_dynamic_addresses(const char *dynamic_addresses, struct ovn_port *op)
1258 {
1259 extract_lsp_addresses(dynamic_addresses, &op->lsp_addrs[op->n_lsp_addrs]);
1260 op->n_lsp_addrs++;
1261 }
1262
1263 /* Determines which components (MAC, IPv4, and IPv6) of dynamic
1264 * addresses need to be assigned. This is used exclusively for
1265 * ports that do not have dynamic addresses already assigned.
1266 */
1267 static void
1268 set_dynamic_updates(const char *addrspec,
1269 struct dynamic_address_update *update)
1270 {
1271 struct eth_addr mac;
1272 int n = 0;
1273 if (ovs_scan(addrspec, ETH_ADDR_SCAN_FMT" dynamic%n",
1274 ETH_ADDR_SCAN_ARGS(mac), &n)
1275 && addrspec[n] == '\0') {
1276 update->mac = STATIC;
1277 update->static_mac = mac;
1278 } else {
1279 update->mac = DYNAMIC;
1280 }
1281 if (update->op->od->ipam_info.allocated_ipv4s) {
1282 update->ipv4 = DYNAMIC;
1283 } else {
1284 update->ipv4 = NONE;
1285 }
1286 if (update->op->od->ipam_info.ipv6_prefix_set) {
1287 update->ipv6 = DYNAMIC;
1288 } else {
1289 update->ipv6 = NONE;
1290 }
1291 }
1292
1293 static void
1294 update_dynamic_addresses(struct dynamic_address_update *update)
1295 {
1296 ovs_be32 ip4 = 0;
1297 switch (update->ipv4) {
1298 case NONE:
1299 if (update->current_addresses.n_ipv4_addrs) {
1300 ip4 = update->current_addresses.ipv4_addrs[0].addr;
1301 }
1302 break;
1303 case REMOVE:
1304 break;
1305 case STATIC:
1306 OVS_NOT_REACHED();
1307 case DYNAMIC:
1308 ip4 = htonl(ipam_get_unused_ip(update->od));
1309 }
1310
1311 struct eth_addr mac;
1312 switch (update->mac) {
1313 case NONE:
1314 mac = update->current_addresses.ea;
1315 break;
1316 case REMOVE:
1317 OVS_NOT_REACHED();
1318 case STATIC:
1319 mac = update->static_mac;
1320 break;
1321 case DYNAMIC:
1322 eth_addr_from_uint64(ipam_get_unused_mac(ip4), &mac);
1323 break;
1324 }
1325
1326 struct in6_addr ip6 = in6addr_any;
1327 switch (update->ipv6) {
1328 case NONE:
1329 if (update->current_addresses.n_ipv6_addrs) {
1330 ip6 = update->current_addresses.ipv6_addrs[0].addr;
1331 }
1332 break;
1333 case REMOVE:
1334 break;
1335 case STATIC:
1336 OVS_NOT_REACHED();
1337 case DYNAMIC:
1338 in6_generate_eui64(mac, &update->od->ipam_info.ipv6_prefix, &ip6);
1339 break;
1340 }
1341
1342 struct ds new_addr = DS_EMPTY_INITIALIZER;
1343 ds_put_format(&new_addr, ETH_ADDR_FMT, ETH_ADDR_ARGS(mac));
1344 if (ip4) {
1345 ipam_insert_ip(update->od, ntohl(ip4));
1346 ds_put_format(&new_addr, " "IP_FMT, IP_ARGS(ip4));
1347 }
1348 if (!IN6_ARE_ADDR_EQUAL(&ip6, &in6addr_any)) {
1349 char ip6_s[INET6_ADDRSTRLEN + 1];
1350 ipv6_string_mapped(ip6_s, &ip6);
1351 ds_put_format(&new_addr, " %s", ip6_s);
1352 }
1353 nbrec_logical_switch_port_set_dynamic_addresses(update->op->nbsp,
1354 ds_cstr(&new_addr));
1355 set_lsp_dynamic_addresses(ds_cstr(&new_addr), update->op);
1356 ds_destroy(&new_addr);
1357 }
1358
1359 static void
1360 build_ipam(struct hmap *datapaths, struct hmap *ports)
1361 {
1362 /* IPAM generally stands for IP address management. In non-virtualized
1363 * world, MAC addresses come with the hardware. But, with virtualized
1364 * workloads, they need to be assigned and managed. This function
1365 * does both IP address management (ipam) and MAC address management
1366 * (macam). */
1367
1368 /* If the switch's other_config:subnet is set, allocate new addresses for
1369 * ports that have the "dynamic" keyword in their addresses column. */
1370 struct ovn_datapath *od;
1371 struct ovs_list updates;
1372
1373 ovs_list_init(&updates);
1374 HMAP_FOR_EACH (od, key_node, datapaths) {
1375 if (!od->nbs) {
1376 continue;
1377 }
1378
1379 for (size_t i = 0; i < od->nbs->n_ports; i++) {
1380 const struct nbrec_logical_switch_port *nbsp = od->nbs->ports[i];
1381
1382 if (!od->ipam_info.allocated_ipv4s &&
1383 !od->ipam_info.ipv6_prefix_set) {
1384 if (nbsp->dynamic_addresses) {
1385 nbrec_logical_switch_port_set_dynamic_addresses(nbsp,
1386 NULL);
1387 }
1388 continue;
1389 }
1390
1391 struct ovn_port *op = ovn_port_find(ports, nbsp->name);
1392 if (!op || op->nbsp != nbsp || op->peer) {
1393 /* Do not allocate addresses for logical switch ports that
1394 * have a peer. */
1395 continue;
1396 }
1397
1398 int num_dynamic_addresses = 0;
1399 for (size_t j = 0; j < nbsp->n_addresses; j++) {
1400 if (!is_dynamic_lsp_address(nbsp->addresses[j])) {
1401 continue;
1402 }
1403 if (num_dynamic_addresses) {
1404 static struct vlog_rate_limit rl
1405 = VLOG_RATE_LIMIT_INIT(1, 1);
1406 VLOG_WARN_RL(&rl, "More than one dynamic address "
1407 "configured for logical switch port '%s'",
1408 nbsp->name);
1409 continue;
1410 }
1411 num_dynamic_addresses++;
1412 struct dynamic_address_update *update
1413 = xzalloc(sizeof *update);
1414 update->op = op;
1415 update->od = od;
1416 if (nbsp->dynamic_addresses) {
1417 bool any_changed;
1418 extract_lsp_addresses(nbsp->dynamic_addresses,
1419 &update->current_addresses);
1420 any_changed = dynamic_addresses_check_for_updates(
1421 nbsp->addresses[j], update);
1422 update_unchanged_dynamic_addresses(update);
1423 if (any_changed) {
1424 ovs_list_push_back(&updates, &update->node);
1425 } else {
1426 /* No changes to dynamic addresses */
1427 set_lsp_dynamic_addresses(nbsp->dynamic_addresses, op);
1428 destroy_lport_addresses(&update->current_addresses);
1429 free(update);
1430 }
1431 } else {
1432 set_dynamic_updates(nbsp->addresses[j], update);
1433 ovs_list_push_back(&updates, &update->node);
1434 }
1435 }
1436
1437 if (!nbsp->n_addresses && nbsp->dynamic_addresses) {
1438 nbrec_logical_switch_port_set_dynamic_addresses(nbsp, NULL);
1439 }
1440 }
1441
1442 }
1443
1444 /* After retaining all unchanged dynamic addresses, now assign
1445 * new ones.
1446 */
1447 struct dynamic_address_update *update;
1448 LIST_FOR_EACH_POP (update, node, &updates) {
1449 update_dynamic_addresses(update);
1450 destroy_lport_addresses(&update->current_addresses);
1451 free(update);
1452 }
1453 }
1454 \f
1455 /* Tag allocation for nested containers.
1456 *
1457 * For a logical switch port with 'parent_name' and a request to allocate tags,
1458 * keeps a track of all allocated tags. */
1459 struct tag_alloc_node {
1460 struct hmap_node hmap_node;
1461 char *parent_name;
1462 unsigned long *allocated_tags; /* A bitmap to track allocated tags. */
1463 };
1464
1465 static void
1466 tag_alloc_destroy(struct hmap *tag_alloc_table)
1467 {
1468 struct tag_alloc_node *node;
1469 HMAP_FOR_EACH_POP (node, hmap_node, tag_alloc_table) {
1470 bitmap_free(node->allocated_tags);
1471 free(node->parent_name);
1472 free(node);
1473 }
1474 hmap_destroy(tag_alloc_table);
1475 }
1476
1477 static struct tag_alloc_node *
1478 tag_alloc_get_node(struct hmap *tag_alloc_table, const char *parent_name)
1479 {
1480 /* If a node for the 'parent_name' exists, return it. */
1481 struct tag_alloc_node *tag_alloc_node;
1482 HMAP_FOR_EACH_WITH_HASH (tag_alloc_node, hmap_node,
1483 hash_string(parent_name, 0),
1484 tag_alloc_table) {
1485 if (!strcmp(tag_alloc_node->parent_name, parent_name)) {
1486 return tag_alloc_node;
1487 }
1488 }
1489
1490 /* Create a new node. */
1491 tag_alloc_node = xmalloc(sizeof *tag_alloc_node);
1492 tag_alloc_node->parent_name = xstrdup(parent_name);
1493 tag_alloc_node->allocated_tags = bitmap_allocate(MAX_OVN_TAGS);
1494 /* Tag 0 is invalid for nested containers. */
1495 bitmap_set1(tag_alloc_node->allocated_tags, 0);
1496 hmap_insert(tag_alloc_table, &tag_alloc_node->hmap_node,
1497 hash_string(parent_name, 0));
1498
1499 return tag_alloc_node;
1500 }
1501
1502 static void
1503 tag_alloc_add_existing_tags(struct hmap *tag_alloc_table,
1504 const struct nbrec_logical_switch_port *nbsp)
1505 {
1506 /* Add the tags of already existing nested containers. If there is no
1507 * 'nbsp->parent_name' or no 'nbsp->tag' set, there is nothing to do. */
1508 if (!nbsp->parent_name || !nbsp->parent_name[0] || !nbsp->tag) {
1509 return;
1510 }
1511
1512 struct tag_alloc_node *tag_alloc_node;
1513 tag_alloc_node = tag_alloc_get_node(tag_alloc_table, nbsp->parent_name);
1514 bitmap_set1(tag_alloc_node->allocated_tags, *nbsp->tag);
1515 }
1516
1517 static void
1518 tag_alloc_create_new_tag(struct hmap *tag_alloc_table,
1519 const struct nbrec_logical_switch_port *nbsp)
1520 {
1521 if (!nbsp->tag_request) {
1522 return;
1523 }
1524
1525 if (nbsp->parent_name && nbsp->parent_name[0]
1526 && *nbsp->tag_request == 0) {
1527 /* For nested containers that need allocation, do the allocation. */
1528
1529 if (nbsp->tag) {
1530 /* This has already been allocated. */
1531 return;
1532 }
1533
1534 struct tag_alloc_node *tag_alloc_node;
1535 int64_t tag;
1536 tag_alloc_node = tag_alloc_get_node(tag_alloc_table,
1537 nbsp->parent_name);
1538 tag = bitmap_scan(tag_alloc_node->allocated_tags, 0, 1, MAX_OVN_TAGS);
1539 if (tag == MAX_OVN_TAGS) {
1540 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1541 VLOG_ERR_RL(&rl, "out of vlans for logical switch ports with "
1542 "parent %s", nbsp->parent_name);
1543 return;
1544 }
1545 bitmap_set1(tag_alloc_node->allocated_tags, tag);
1546 nbrec_logical_switch_port_set_tag(nbsp, &tag, 1);
1547 } else if (*nbsp->tag_request != 0) {
1548 /* For everything else, copy the contents of 'tag_request' to 'tag'. */
1549 nbrec_logical_switch_port_set_tag(nbsp, nbsp->tag_request, 1);
1550 }
1551 }
1552 \f
1553
1554 static void
1555 join_logical_ports(struct northd_context *ctx,
1556 struct hmap *datapaths, struct hmap *ports,
1557 struct hmap *chassis_qdisc_queues,
1558 struct hmap *tag_alloc_table, struct ovs_list *sb_only,
1559 struct ovs_list *nb_only, struct ovs_list *both)
1560 {
1561 hmap_init(ports);
1562 ovs_list_init(sb_only);
1563 ovs_list_init(nb_only);
1564 ovs_list_init(both);
1565
1566 const struct sbrec_port_binding *sb;
1567 SBREC_PORT_BINDING_FOR_EACH (sb, ctx->ovnsb_idl) {
1568 struct ovn_port *op = ovn_port_create(ports, sb->logical_port,
1569 NULL, NULL, sb);
1570 ovs_list_push_back(sb_only, &op->list);
1571 }
1572
1573 struct ovn_datapath *od;
1574 HMAP_FOR_EACH (od, key_node, datapaths) {
1575 if (od->nbs) {
1576 for (size_t i = 0; i < od->nbs->n_ports; i++) {
1577 const struct nbrec_logical_switch_port *nbsp
1578 = od->nbs->ports[i];
1579 struct ovn_port *op = ovn_port_find(ports, nbsp->name);
1580 if (op) {
1581 if (op->nbsp || op->nbrp) {
1582 static struct vlog_rate_limit rl
1583 = VLOG_RATE_LIMIT_INIT(5, 1);
1584 VLOG_WARN_RL(&rl, "duplicate logical port %s",
1585 nbsp->name);
1586 continue;
1587 }
1588 op->nbsp = nbsp;
1589 ovs_list_remove(&op->list);
1590
1591 uint32_t queue_id = smap_get_int(&op->sb->options,
1592 "qdisc_queue_id", 0);
1593 if (queue_id && op->sb->chassis) {
1594 add_chassis_queue(
1595 chassis_qdisc_queues, &op->sb->chassis->header_.uuid,
1596 queue_id);
1597 }
1598
1599 ovs_list_push_back(both, &op->list);
1600
1601 /* This port exists due to a SB binding, but should
1602 * not have been initialized fully. */
1603 ovs_assert(!op->n_lsp_addrs && !op->n_ps_addrs);
1604 } else {
1605 op = ovn_port_create(ports, nbsp->name, nbsp, NULL, NULL);
1606 ovs_list_push_back(nb_only, &op->list);
1607 }
1608
1609 if (!strcmp(nbsp->type, "localnet")) {
1610 od->localnet_port = op;
1611 }
1612
1613 op->lsp_addrs
1614 = xmalloc(sizeof *op->lsp_addrs * nbsp->n_addresses);
1615 for (size_t j = 0; j < nbsp->n_addresses; j++) {
1616 if (!strcmp(nbsp->addresses[j], "unknown")
1617 || !strcmp(nbsp->addresses[j], "router")) {
1618 continue;
1619 }
1620 if (is_dynamic_lsp_address(nbsp->addresses[j])) {
1621 continue;
1622 } else if (!extract_lsp_addresses(nbsp->addresses[j],
1623 &op->lsp_addrs[op->n_lsp_addrs])) {
1624 static struct vlog_rate_limit rl
1625 = VLOG_RATE_LIMIT_INIT(1, 1);
1626 VLOG_INFO_RL(&rl, "invalid syntax '%s' in logical "
1627 "switch port addresses. No MAC "
1628 "address found",
1629 op->nbsp->addresses[j]);
1630 continue;
1631 }
1632 op->n_lsp_addrs++;
1633 }
1634
1635 op->ps_addrs
1636 = xmalloc(sizeof *op->ps_addrs * nbsp->n_port_security);
1637 for (size_t j = 0; j < nbsp->n_port_security; j++) {
1638 if (!extract_lsp_addresses(nbsp->port_security[j],
1639 &op->ps_addrs[op->n_ps_addrs])) {
1640 static struct vlog_rate_limit rl
1641 = VLOG_RATE_LIMIT_INIT(1, 1);
1642 VLOG_INFO_RL(&rl, "invalid syntax '%s' in port "
1643 "security. No MAC address found",
1644 op->nbsp->port_security[j]);
1645 continue;
1646 }
1647 op->n_ps_addrs++;
1648 }
1649
1650 op->od = od;
1651 ipam_add_port_addresses(od, op);
1652 tag_alloc_add_existing_tags(tag_alloc_table, nbsp);
1653 }
1654 } else {
1655 for (size_t i = 0; i < od->nbr->n_ports; i++) {
1656 const struct nbrec_logical_router_port *nbrp
1657 = od->nbr->ports[i];
1658
1659 struct lport_addresses lrp_networks;
1660 if (!extract_lrp_networks(nbrp, &lrp_networks)) {
1661 static struct vlog_rate_limit rl
1662 = VLOG_RATE_LIMIT_INIT(5, 1);
1663 VLOG_WARN_RL(&rl, "bad 'mac' %s", nbrp->mac);
1664 continue;
1665 }
1666
1667 if (!lrp_networks.n_ipv4_addrs && !lrp_networks.n_ipv6_addrs) {
1668 continue;
1669 }
1670
1671 struct ovn_port *op = ovn_port_find(ports, nbrp->name);
1672 if (op) {
1673 if (op->nbsp || op->nbrp) {
1674 static struct vlog_rate_limit rl
1675 = VLOG_RATE_LIMIT_INIT(5, 1);
1676 VLOG_WARN_RL(&rl, "duplicate logical router port %s",
1677 nbrp->name);
1678 continue;
1679 }
1680 op->nbrp = nbrp;
1681 ovs_list_remove(&op->list);
1682 ovs_list_push_back(both, &op->list);
1683
1684 /* This port exists but should not have been
1685 * initialized fully. */
1686 ovs_assert(!op->lrp_networks.n_ipv4_addrs
1687 && !op->lrp_networks.n_ipv6_addrs);
1688 } else {
1689 op = ovn_port_create(ports, nbrp->name, NULL, nbrp, NULL);
1690 ovs_list_push_back(nb_only, &op->list);
1691 }
1692
1693 op->lrp_networks = lrp_networks;
1694 op->od = od;
1695 ipam_add_port_addresses(op->od, op);
1696
1697 const char *redirect_chassis = smap_get(&op->nbrp->options,
1698 "redirect-chassis");
1699 if (redirect_chassis || op->nbrp->n_gateway_chassis) {
1700 /* Additional "derived" ovn_port crp represents the
1701 * instance of op on the "redirect-chassis". */
1702 const char *gw_chassis = smap_get(&op->od->nbr->options,
1703 "chassis");
1704 if (gw_chassis) {
1705 static struct vlog_rate_limit rl
1706 = VLOG_RATE_LIMIT_INIT(1, 1);
1707 VLOG_WARN_RL(&rl, "Bad configuration: "
1708 "redirect-chassis configured on port %s "
1709 "on L3 gateway router", nbrp->name);
1710 continue;
1711 }
1712 if (od->l3dgw_port || od->l3redirect_port) {
1713 static struct vlog_rate_limit rl
1714 = VLOG_RATE_LIMIT_INIT(1, 1);
1715 VLOG_WARN_RL(&rl, "Bad configuration: multiple ports "
1716 "with redirect-chassis on same logical "
1717 "router %s", od->nbr->name);
1718 continue;
1719 }
1720
1721 char *redirect_name = chassis_redirect_name(nbrp->name);
1722 struct ovn_port *crp = ovn_port_find(ports, redirect_name);
1723 if (crp) {
1724 crp->derived = true;
1725 crp->nbrp = nbrp;
1726 ovs_list_remove(&crp->list);
1727 ovs_list_push_back(both, &crp->list);
1728 } else {
1729 crp = ovn_port_create(ports, redirect_name,
1730 NULL, nbrp, NULL);
1731 crp->derived = true;
1732 ovs_list_push_back(nb_only, &crp->list);
1733 }
1734 crp->od = od;
1735 free(redirect_name);
1736
1737 /* Set l3dgw_port and l3redirect_port in od, for later
1738 * use during flow creation. */
1739 od->l3dgw_port = op;
1740 od->l3redirect_port = crp;
1741 }
1742 }
1743 }
1744 }
1745
1746 /* Connect logical router ports, and logical switch ports of type "router",
1747 * to their peers. */
1748 struct ovn_port *op;
1749 HMAP_FOR_EACH (op, key_node, ports) {
1750 if (op->nbsp && !strcmp(op->nbsp->type, "router") && !op->derived) {
1751 const char *peer_name = smap_get(&op->nbsp->options, "router-port");
1752 if (!peer_name) {
1753 continue;
1754 }
1755
1756 struct ovn_port *peer = ovn_port_find(ports, peer_name);
1757 if (!peer || !peer->nbrp) {
1758 continue;
1759 }
1760
1761 peer->peer = op;
1762 op->peer = peer;
1763 op->od->router_ports = xrealloc(
1764 op->od->router_ports,
1765 sizeof *op->od->router_ports * (op->od->n_router_ports + 1));
1766 op->od->router_ports[op->od->n_router_ports++] = op;
1767
1768 /* Fill op->lsp_addrs for op->nbsp->addresses[] with
1769 * contents "router", which was skipped in the loop above. */
1770 for (size_t j = 0; j < op->nbsp->n_addresses; j++) {
1771 if (!strcmp(op->nbsp->addresses[j], "router")) {
1772 if (extract_lrp_networks(peer->nbrp,
1773 &op->lsp_addrs[op->n_lsp_addrs])) {
1774 op->n_lsp_addrs++;
1775 }
1776 break;
1777 }
1778 }
1779 } else if (op->nbrp && op->nbrp->peer && !op->derived) {
1780 struct ovn_port *peer = ovn_port_find(ports, op->nbrp->peer);
1781 if (peer) {
1782 if (peer->nbrp) {
1783 op->peer = peer;
1784 } else if (peer->nbsp) {
1785 /* An ovn_port for a switch port of type "router" does have
1786 * a router port as its peer (see the case above for
1787 * "router" ports), but this is set via options:router-port
1788 * in Logical_Switch_Port and does not involve the
1789 * Logical_Router_Port's 'peer' column. */
1790 static struct vlog_rate_limit rl =
1791 VLOG_RATE_LIMIT_INIT(5, 1);
1792 VLOG_WARN_RL(&rl, "Bad configuration: The peer of router "
1793 "port %s is a switch port", op->key);
1794 }
1795 }
1796 }
1797 }
1798 }
1799
1800 static void
1801 ip_address_and_port_from_lb_key(const char *key, char **ip_address,
1802 uint16_t *port, int *addr_family);
1803
1804 static void
1805 get_router_load_balancer_ips(const struct ovn_datapath *od,
1806 struct sset *all_ips, int *addr_family)
1807 {
1808 if (!od->nbr) {
1809 return;
1810 }
1811
1812 for (int i = 0; i < od->nbr->n_load_balancer; i++) {
1813 struct nbrec_load_balancer *lb = od->nbr->load_balancer[i];
1814 struct smap *vips = &lb->vips;
1815 struct smap_node *node;
1816
1817 SMAP_FOR_EACH (node, vips) {
1818 /* node->key contains IP:port or just IP. */
1819 char *ip_address = NULL;
1820 uint16_t port;
1821
1822 ip_address_and_port_from_lb_key(node->key, &ip_address, &port,
1823 addr_family);
1824 if (!ip_address) {
1825 continue;
1826 }
1827
1828 if (!sset_contains(all_ips, ip_address)) {
1829 sset_add(all_ips, ip_address);
1830 }
1831
1832 free(ip_address);
1833 }
1834 }
1835 }
1836
1837 /* Returns an array of strings, each consisting of a MAC address followed
1838 * by one or more IP addresses, and if the port is a distributed gateway
1839 * port, followed by 'is_chassis_resident("LPORT_NAME")', where the
1840 * LPORT_NAME is the name of the L3 redirect port or the name of the
1841 * logical_port specified in a NAT rule. These strings include the
1842 * external IP addresses of all NAT rules defined on that router, and all
1843 * of the IP addresses used in load balancer VIPs defined on that router.
1844 *
1845 * The caller must free each of the n returned strings with free(),
1846 * and must free the returned array when it is no longer needed. */
1847 static char **
1848 get_nat_addresses(const struct ovn_port *op, size_t *n)
1849 {
1850 size_t n_nats = 0;
1851 struct eth_addr mac;
1852 if (!op->nbrp || !op->od || !op->od->nbr
1853 || (!op->od->nbr->n_nat && !op->od->nbr->n_load_balancer)
1854 || !eth_addr_from_string(op->nbrp->mac, &mac)) {
1855 *n = n_nats;
1856 return NULL;
1857 }
1858
1859 struct ds c_addresses = DS_EMPTY_INITIALIZER;
1860 ds_put_format(&c_addresses, ETH_ADDR_FMT, ETH_ADDR_ARGS(mac));
1861 bool central_ip_address = false;
1862
1863 char **addresses;
1864 addresses = xmalloc(sizeof *addresses * (op->od->nbr->n_nat + 1));
1865
1866 /* Get NAT IP addresses. */
1867 for (size_t i = 0; i < op->od->nbr->n_nat; i++) {
1868 const struct nbrec_nat *nat = op->od->nbr->nat[i];
1869 ovs_be32 ip, mask;
1870
1871 char *error = ip_parse_masked(nat->external_ip, &ip, &mask);
1872 if (error || mask != OVS_BE32_MAX) {
1873 free(error);
1874 continue;
1875 }
1876
1877 /* Determine whether this NAT rule satisfies the conditions for
1878 * distributed NAT processing. */
1879 if (op->od->l3redirect_port && !strcmp(nat->type, "dnat_and_snat")
1880 && nat->logical_port && nat->external_mac) {
1881 /* Distributed NAT rule. */
1882 if (eth_addr_from_string(nat->external_mac, &mac)) {
1883 struct ds address = DS_EMPTY_INITIALIZER;
1884 ds_put_format(&address, ETH_ADDR_FMT, ETH_ADDR_ARGS(mac));
1885 ds_put_format(&address, " %s", nat->external_ip);
1886 ds_put_format(&address, " is_chassis_resident(\"%s\")",
1887 nat->logical_port);
1888 addresses[n_nats++] = ds_steal_cstr(&address);
1889 }
1890 } else {
1891 /* Centralized NAT rule, either on gateway router or distributed
1892 * router. */
1893 ds_put_format(&c_addresses, " %s", nat->external_ip);
1894 central_ip_address = true;
1895 }
1896 }
1897
1898 /* A set to hold all load-balancer vips. */
1899 struct sset all_ips = SSET_INITIALIZER(&all_ips);
1900 int addr_family;
1901 get_router_load_balancer_ips(op->od, &all_ips, &addr_family);
1902
1903 const char *ip_address;
1904 SSET_FOR_EACH (ip_address, &all_ips) {
1905 ds_put_format(&c_addresses, " %s", ip_address);
1906 central_ip_address = true;
1907 }
1908 sset_destroy(&all_ips);
1909
1910 if (central_ip_address) {
1911 /* Gratuitous ARP for centralized NAT rules on distributed gateway
1912 * ports should be restricted to the "redirect-chassis". */
1913 if (op->od->l3redirect_port) {
1914 ds_put_format(&c_addresses, " is_chassis_resident(%s)",
1915 op->od->l3redirect_port->json_key);
1916 }
1917
1918 addresses[n_nats++] = ds_steal_cstr(&c_addresses);
1919 }
1920
1921 *n = n_nats;
1922
1923 return addresses;
1924 }
1925
1926 static bool
1927 gateway_chassis_equal(const struct nbrec_gateway_chassis *nb_gwc,
1928 const struct sbrec_chassis *nb_gwc_c,
1929 const struct sbrec_gateway_chassis *sb_gwc)
1930 {
1931 bool equal = !strcmp(nb_gwc->name, sb_gwc->name)
1932 && nb_gwc->priority == sb_gwc->priority
1933 && smap_equal(&nb_gwc->options, &sb_gwc->options)
1934 && smap_equal(&nb_gwc->external_ids, &sb_gwc->external_ids);
1935
1936 if (!equal) {
1937 return false;
1938 }
1939
1940 /* If everything else matched and we were unable to find the SBDB
1941 * Chassis entry at this time, assume a match and return true.
1942 * This happens when an ovn-controller is restarting and the Chassis
1943 * entry is gone away momentarily */
1944 return !nb_gwc_c
1945 || (sb_gwc->chassis && !strcmp(nb_gwc_c->name,
1946 sb_gwc->chassis->name));
1947 }
1948
1949 static bool
1950 sbpb_gw_chassis_needs_update(
1951 struct ovsdb_idl_index *sbrec_chassis_by_name,
1952 const struct sbrec_port_binding *port_binding,
1953 const struct nbrec_logical_router_port *lrp)
1954 {
1955 if (!lrp || !port_binding) {
1956 return false;
1957 }
1958
1959 /* These arrays are used to collect valid Gateway_Chassis and valid
1960 * Chassis records from the Logical_Router_Port Gateway_Chassis list,
1961 * we ignore the ones we can't match on the SBDB */
1962 struct nbrec_gateway_chassis **lrp_gwc = xzalloc(lrp->n_gateway_chassis *
1963 sizeof *lrp_gwc);
1964 const struct sbrec_chassis **lrp_gwc_c = xzalloc(lrp->n_gateway_chassis *
1965 sizeof *lrp_gwc_c);
1966
1967 /* Count the number of gateway chassis chassis names from the logical
1968 * router port that we are able to match on the southbound database */
1969 int lrp_n_gateway_chassis = 0;
1970 int n;
1971 for (n = 0; n < lrp->n_gateway_chassis; n++) {
1972
1973 if (!lrp->gateway_chassis[n]->chassis_name) {
1974 continue;
1975 }
1976
1977 const struct sbrec_chassis *chassis =
1978 chassis_lookup_by_name(sbrec_chassis_by_name,
1979 lrp->gateway_chassis[n]->chassis_name);
1980
1981 lrp_gwc_c[lrp_n_gateway_chassis] = chassis;
1982 lrp_gwc[lrp_n_gateway_chassis] = lrp->gateway_chassis[n];
1983 lrp_n_gateway_chassis++;
1984 if (!chassis) {
1985 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1986 VLOG_WARN_RL(
1987 &rl, "Chassis name %s referenced in NBDB via Gateway_Chassis "
1988 "on logical router port %s does not exist in SBDB",
1989 lrp->gateway_chassis[n]->chassis_name, lrp->name);
1990 }
1991 }
1992
1993 /* Basic check, different amount of Gateway_Chassis means that we
1994 * need to update southbound database Port_Binding */
1995 if (lrp_n_gateway_chassis != port_binding->n_gateway_chassis) {
1996 free(lrp_gwc_c);
1997 free(lrp_gwc);
1998 return true;
1999 }
2000
2001 for (n = 0; n < lrp_n_gateway_chassis; n++) {
2002 int i;
2003 /* For each of the valid gw chassis on the lrp, check if there's
2004 * a match on the Port_Binding list, we assume order is not
2005 * persisted */
2006 for (i = 0; i < port_binding->n_gateway_chassis; i++) {
2007 if (gateway_chassis_equal(lrp_gwc[n],
2008 lrp_gwc_c[n],
2009 port_binding->gateway_chassis[i])) {
2010 break; /* we found a match */
2011 }
2012 }
2013
2014 /* if no Port_Binding gateway chassis matched for the entry... */
2015 if (i == port_binding->n_gateway_chassis) {
2016 free(lrp_gwc_c);
2017 free(lrp_gwc);
2018 return true; /* found no match for this gateway chassis on lrp */
2019 }
2020 }
2021
2022 /* no need for update, all ports matched */
2023 free(lrp_gwc_c);
2024 free(lrp_gwc);
2025 return false;
2026 }
2027
2028 /* This functions translates the gw chassis on the nb database
2029 * to sb database entries, the only difference is that SB database
2030 * Gateway_Chassis table references the chassis directly instead
2031 * of using the name */
2032 static void
2033 copy_gw_chassis_from_nbrp_to_sbpb(
2034 struct northd_context *ctx,
2035 struct ovsdb_idl_index *sbrec_chassis_by_name,
2036 const struct nbrec_logical_router_port *lrp,
2037 const struct sbrec_port_binding *port_binding) {
2038
2039 if (!lrp || !port_binding || !lrp->n_gateway_chassis) {
2040 return;
2041 }
2042
2043 struct sbrec_gateway_chassis **gw_chassis = NULL;
2044 int n_gwc = 0;
2045 int n;
2046
2047 /* XXX: This can be improved. This code will generate a set of new
2048 * Gateway_Chassis and push them all in a single transaction, instead
2049 * this would be more optimal if we just add/update/remove the rows in
2050 * the southbound db that need to change. We don't expect lots of
2051 * changes to the Gateway_Chassis table, but if that proves to be wrong
2052 * we should optimize this. */
2053 for (n = 0; n < lrp->n_gateway_chassis; n++) {
2054 struct nbrec_gateway_chassis *lrp_gwc = lrp->gateway_chassis[n];
2055 if (!lrp_gwc->chassis_name) {
2056 continue;
2057 }
2058
2059 const struct sbrec_chassis *chassis =
2060 chassis_lookup_by_name(sbrec_chassis_by_name,
2061 lrp_gwc->chassis_name);
2062
2063 gw_chassis = xrealloc(gw_chassis, (n_gwc + 1) * sizeof *gw_chassis);
2064
2065 struct sbrec_gateway_chassis *pb_gwc =
2066 sbrec_gateway_chassis_insert(ctx->ovnsb_txn);
2067
2068 sbrec_gateway_chassis_set_name(pb_gwc, lrp_gwc->name);
2069 sbrec_gateway_chassis_set_priority(pb_gwc, lrp_gwc->priority);
2070 sbrec_gateway_chassis_set_chassis(pb_gwc, chassis);
2071 sbrec_gateway_chassis_set_options(pb_gwc, &lrp_gwc->options);
2072 sbrec_gateway_chassis_set_external_ids(pb_gwc, &lrp_gwc->external_ids);
2073
2074 gw_chassis[n_gwc++] = pb_gwc;
2075 }
2076 sbrec_port_binding_set_gateway_chassis(port_binding, gw_chassis, n_gwc);
2077 free(gw_chassis);
2078 }
2079
2080 static void
2081 ovn_port_update_sbrec(struct northd_context *ctx,
2082 struct ovsdb_idl_index *sbrec_chassis_by_name,
2083 const struct ovn_port *op,
2084 struct hmap *chassis_qdisc_queues)
2085 {
2086 sbrec_port_binding_set_datapath(op->sb, op->od->sb);
2087 if (op->nbrp) {
2088 /* If the router is for l3 gateway, it resides on a chassis
2089 * and its port type is "l3gateway". */
2090 const char *chassis_name = smap_get(&op->od->nbr->options, "chassis");
2091 if (op->derived) {
2092 sbrec_port_binding_set_type(op->sb, "chassisredirect");
2093 } else if (chassis_name) {
2094 sbrec_port_binding_set_type(op->sb, "l3gateway");
2095 } else {
2096 sbrec_port_binding_set_type(op->sb, "patch");
2097 }
2098
2099 struct smap new;
2100 smap_init(&new);
2101 if (op->derived) {
2102 const char *redirect_chassis = smap_get(&op->nbrp->options,
2103 "redirect-chassis");
2104 if (op->nbrp->n_gateway_chassis && redirect_chassis) {
2105 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
2106 VLOG_WARN_RL(
2107 &rl, "logical router port %s has both options:"
2108 "redirect-chassis and gateway_chassis populated "
2109 "redirect-chassis will be ignored in favour of "
2110 "gateway chassis", op->nbrp->name);
2111 }
2112
2113 if (op->nbrp->n_gateway_chassis) {
2114 if (sbpb_gw_chassis_needs_update(sbrec_chassis_by_name,
2115 op->sb, op->nbrp)) {
2116 copy_gw_chassis_from_nbrp_to_sbpb(ctx,
2117 sbrec_chassis_by_name,
2118 op->nbrp, op->sb);
2119 }
2120
2121 } else if (redirect_chassis) {
2122 /* Handle ports that had redirect-chassis option attached
2123 * to them, and for backwards compatibility convert them
2124 * to a single Gateway_Chassis entry */
2125 const struct sbrec_chassis *chassis =
2126 chassis_lookup_by_name(sbrec_chassis_by_name,
2127 redirect_chassis);
2128 if (chassis) {
2129 /* If we found the chassis, and the gw chassis on record
2130 * differs from what we expect go ahead and update */
2131 if (op->sb->n_gateway_chassis != 1
2132 || !op->sb->gateway_chassis[0]->chassis
2133 || strcmp(op->sb->gateway_chassis[0]->chassis->name,
2134 chassis->name)
2135 || op->sb->gateway_chassis[0]->priority != 0) {
2136 /* Construct a single Gateway_Chassis entry on the
2137 * Port_Binding attached to the redirect_chassis
2138 * name */
2139 struct sbrec_gateway_chassis *gw_chassis =
2140 sbrec_gateway_chassis_insert(ctx->ovnsb_txn);
2141
2142 char *gwc_name = xasprintf("%s_%s", op->nbrp->name,
2143 chassis->name);
2144
2145 /* XXX: Again, here, we could just update an existing
2146 * Gateway_Chassis, instead of creating a new one
2147 * and replacing it */
2148 sbrec_gateway_chassis_set_name(gw_chassis, gwc_name);
2149 sbrec_gateway_chassis_set_priority(gw_chassis, 0);
2150 sbrec_gateway_chassis_set_chassis(gw_chassis, chassis);
2151 sbrec_gateway_chassis_set_external_ids(gw_chassis,
2152 &op->nbrp->external_ids);
2153 sbrec_port_binding_set_gateway_chassis(op->sb,
2154 &gw_chassis, 1);
2155 free(gwc_name);
2156 }
2157 } else {
2158 VLOG_WARN("chassis name '%s' from redirect from logical "
2159 " router port '%s' redirect-chassis not found",
2160 redirect_chassis, op->nbrp->name);
2161 if (op->sb->n_gateway_chassis) {
2162 sbrec_port_binding_set_gateway_chassis(op->sb, NULL,
2163 0);
2164 }
2165 }
2166 }
2167 smap_add(&new, "distributed-port", op->nbrp->name);
2168 } else {
2169 if (op->peer) {
2170 smap_add(&new, "peer", op->peer->key);
2171 }
2172 if (chassis_name) {
2173 smap_add(&new, "l3gateway-chassis", chassis_name);
2174 }
2175 }
2176 sbrec_port_binding_set_options(op->sb, &new);
2177 smap_destroy(&new);
2178
2179 sbrec_port_binding_set_parent_port(op->sb, NULL);
2180 sbrec_port_binding_set_tag(op->sb, NULL, 0);
2181
2182 struct ds s = DS_EMPTY_INITIALIZER;
2183 ds_put_cstr(&s, op->nbrp->mac);
2184 for (int i = 0; i < op->nbrp->n_networks; ++i) {
2185 ds_put_format(&s, " %s", op->nbrp->networks[i]);
2186 }
2187 const char *addresses = ds_cstr(&s);
2188 sbrec_port_binding_set_mac(op->sb, &addresses, 1);
2189 ds_destroy(&s);
2190
2191 struct smap ids = SMAP_INITIALIZER(&ids);
2192 sbrec_port_binding_set_external_ids(op->sb, &ids);
2193
2194 sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0);
2195 } else {
2196 if (strcmp(op->nbsp->type, "router")) {
2197 uint32_t queue_id = smap_get_int(
2198 &op->sb->options, "qdisc_queue_id", 0);
2199 bool has_qos = port_has_qos_params(&op->nbsp->options);
2200 struct smap options;
2201
2202 if (op->sb->chassis && has_qos && !queue_id) {
2203 queue_id = allocate_chassis_queueid(chassis_qdisc_queues,
2204 op->sb->chassis);
2205 } else if (!has_qos && queue_id) {
2206 free_chassis_queueid(chassis_qdisc_queues,
2207 op->sb->chassis,
2208 queue_id);
2209 queue_id = 0;
2210 }
2211
2212 smap_clone(&options, &op->nbsp->options);
2213 if (queue_id) {
2214 smap_add_format(&options,
2215 "qdisc_queue_id", "%d", queue_id);
2216 }
2217 sbrec_port_binding_set_options(op->sb, &options);
2218 smap_destroy(&options);
2219 if (ovn_is_known_nb_lsp_type(op->nbsp->type)) {
2220 sbrec_port_binding_set_type(op->sb, op->nbsp->type);
2221 } else {
2222 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
2223 VLOG_WARN_RL(
2224 &rl, "Unknown port type '%s' set on logical switch '%s'.",
2225 op->nbsp->type, op->nbsp->name);
2226 }
2227
2228 sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0);
2229 } else {
2230 const char *chassis = NULL;
2231 if (op->peer && op->peer->od && op->peer->od->nbr) {
2232 chassis = smap_get(&op->peer->od->nbr->options, "chassis");
2233 }
2234
2235 /* A switch port connected to a gateway router is also of
2236 * type "l3gateway". */
2237 if (chassis) {
2238 sbrec_port_binding_set_type(op->sb, "l3gateway");
2239 } else {
2240 sbrec_port_binding_set_type(op->sb, "patch");
2241 }
2242
2243 const char *router_port = smap_get(&op->nbsp->options,
2244 "router-port");
2245 if (router_port || chassis) {
2246 struct smap new;
2247 smap_init(&new);
2248 if (router_port) {
2249 smap_add(&new, "peer", router_port);
2250 }
2251 if (chassis) {
2252 smap_add(&new, "l3gateway-chassis", chassis);
2253 }
2254 sbrec_port_binding_set_options(op->sb, &new);
2255 smap_destroy(&new);
2256 } else {
2257 sbrec_port_binding_set_options(op->sb, NULL);
2258 }
2259
2260 const char *nat_addresses = smap_get(&op->nbsp->options,
2261 "nat-addresses");
2262 if (nat_addresses && !strcmp(nat_addresses, "router")) {
2263 if (op->peer && op->peer->od
2264 && (chassis || op->peer->od->l3redirect_port)) {
2265 size_t n_nats;
2266 char **nats = get_nat_addresses(op->peer, &n_nats);
2267 if (n_nats) {
2268 sbrec_port_binding_set_nat_addresses(op->sb,
2269 (const char **) nats, n_nats);
2270 for (size_t i = 0; i < n_nats; i++) {
2271 free(nats[i]);
2272 }
2273 free(nats);
2274 } else {
2275 sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0);
2276 }
2277 } else {
2278 sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0);
2279 }
2280 /* Only accept manual specification of ethernet address
2281 * followed by IPv4 addresses on type "l3gateway" ports. */
2282 } else if (nat_addresses && chassis) {
2283 struct lport_addresses laddrs;
2284 if (!extract_lsp_addresses(nat_addresses, &laddrs)) {
2285 static struct vlog_rate_limit rl =
2286 VLOG_RATE_LIMIT_INIT(1, 1);
2287 VLOG_WARN_RL(&rl, "Error extracting nat-addresses.");
2288 sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0);
2289 } else {
2290 sbrec_port_binding_set_nat_addresses(op->sb,
2291 &nat_addresses, 1);
2292 destroy_lport_addresses(&laddrs);
2293 }
2294 } else {
2295 sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0);
2296 }
2297 }
2298 sbrec_port_binding_set_parent_port(op->sb, op->nbsp->parent_name);
2299 sbrec_port_binding_set_tag(op->sb, op->nbsp->tag, op->nbsp->n_tag);
2300 sbrec_port_binding_set_mac(op->sb, (const char **) op->nbsp->addresses,
2301 op->nbsp->n_addresses);
2302
2303 struct smap ids = SMAP_INITIALIZER(&ids);
2304 smap_clone(&ids, &op->nbsp->external_ids);
2305 const char *name = smap_get(&ids, "neutron:port_name");
2306 if (name && name[0]) {
2307 smap_add(&ids, "name", name);
2308 }
2309 sbrec_port_binding_set_external_ids(op->sb, &ids);
2310 smap_destroy(&ids);
2311 }
2312 }
2313
2314 /* Remove mac_binding entries that refer to logical_ports which are
2315 * deleted. */
2316 static void
2317 cleanup_mac_bindings(struct northd_context *ctx, struct hmap *ports)
2318 {
2319 const struct sbrec_mac_binding *b, *n;
2320 SBREC_MAC_BINDING_FOR_EACH_SAFE (b, n, ctx->ovnsb_idl) {
2321 if (!ovn_port_find(ports, b->logical_port)) {
2322 sbrec_mac_binding_delete(b);
2323 }
2324 }
2325 }
2326
2327 /* Updates the southbound Port_Binding table so that it contains the logical
2328 * switch ports specified by the northbound database.
2329 *
2330 * Initializes 'ports' to contain a "struct ovn_port" for every logical port,
2331 * using the "struct ovn_datapath"s in 'datapaths' to look up logical
2332 * datapaths. */
2333 static void
2334 build_ports(struct northd_context *ctx,
2335 struct ovsdb_idl_index *sbrec_chassis_by_name,
2336 struct hmap *datapaths, struct hmap *ports)
2337 {
2338 struct ovs_list sb_only, nb_only, both;
2339 struct hmap tag_alloc_table = HMAP_INITIALIZER(&tag_alloc_table);
2340 struct hmap chassis_qdisc_queues = HMAP_INITIALIZER(&chassis_qdisc_queues);
2341
2342 join_logical_ports(ctx, datapaths, ports, &chassis_qdisc_queues,
2343 &tag_alloc_table, &sb_only, &nb_only, &both);
2344
2345 struct ovn_port *op, *next;
2346 /* For logical ports that are in both databases, update the southbound
2347 * record based on northbound data. Also index the in-use tunnel_keys.
2348 * For logical ports that are in NB database, do any tag allocation
2349 * needed. */
2350 LIST_FOR_EACH_SAFE (op, next, list, &both) {
2351 if (op->nbsp) {
2352 tag_alloc_create_new_tag(&tag_alloc_table, op->nbsp);
2353 }
2354 ovn_port_update_sbrec(ctx, sbrec_chassis_by_name,
2355 op, &chassis_qdisc_queues);
2356
2357 add_tnlid(&op->od->port_tnlids, op->sb->tunnel_key);
2358 if (op->sb->tunnel_key > op->od->port_key_hint) {
2359 op->od->port_key_hint = op->sb->tunnel_key;
2360 }
2361 }
2362
2363 /* Add southbound record for each unmatched northbound record. */
2364 LIST_FOR_EACH_SAFE (op, next, list, &nb_only) {
2365 uint16_t tunnel_key = ovn_port_allocate_key(op->od);
2366 if (!tunnel_key) {
2367 continue;
2368 }
2369
2370 op->sb = sbrec_port_binding_insert(ctx->ovnsb_txn);
2371 ovn_port_update_sbrec(ctx, sbrec_chassis_by_name, op,
2372 &chassis_qdisc_queues);
2373
2374 sbrec_port_binding_set_logical_port(op->sb, op->key);
2375 sbrec_port_binding_set_tunnel_key(op->sb, tunnel_key);
2376 }
2377
2378 bool remove_mac_bindings = false;
2379 if (!ovs_list_is_empty(&sb_only)) {
2380 remove_mac_bindings = true;
2381 }
2382
2383 /* Delete southbound records without northbound matches. */
2384 LIST_FOR_EACH_SAFE(op, next, list, &sb_only) {
2385 ovs_list_remove(&op->list);
2386 sbrec_port_binding_delete(op->sb);
2387 ovn_port_destroy(ports, op);
2388 }
2389 if (remove_mac_bindings) {
2390 cleanup_mac_bindings(ctx, ports);
2391 }
2392
2393 tag_alloc_destroy(&tag_alloc_table);
2394 destroy_chassis_queues(&chassis_qdisc_queues);
2395 }
2396 \f
2397 #define OVN_MIN_MULTICAST 32768
2398 #define OVN_MAX_MULTICAST 65535
2399
2400 struct multicast_group {
2401 const char *name;
2402 uint16_t key; /* OVN_MIN_MULTICAST...OVN_MAX_MULTICAST. */
2403 };
2404
2405 #define MC_FLOOD "_MC_flood"
2406 static const struct multicast_group mc_flood = { MC_FLOOD, 65535 };
2407
2408 #define MC_UNKNOWN "_MC_unknown"
2409 static const struct multicast_group mc_unknown = { MC_UNKNOWN, 65534 };
2410
2411 static bool
2412 multicast_group_equal(const struct multicast_group *a,
2413 const struct multicast_group *b)
2414 {
2415 return !strcmp(a->name, b->name) && a->key == b->key;
2416 }
2417
2418 /* Multicast group entry. */
2419 struct ovn_multicast {
2420 struct hmap_node hmap_node; /* Index on 'datapath' and 'key'. */
2421 struct ovn_datapath *datapath;
2422 const struct multicast_group *group;
2423
2424 struct ovn_port **ports;
2425 size_t n_ports, allocated_ports;
2426 };
2427
2428 static uint32_t
2429 ovn_multicast_hash(const struct ovn_datapath *datapath,
2430 const struct multicast_group *group)
2431 {
2432 return hash_pointer(datapath, group->key);
2433 }
2434
2435 static struct ovn_multicast *
2436 ovn_multicast_find(struct hmap *mcgroups, struct ovn_datapath *datapath,
2437 const struct multicast_group *group)
2438 {
2439 struct ovn_multicast *mc;
2440
2441 HMAP_FOR_EACH_WITH_HASH (mc, hmap_node,
2442 ovn_multicast_hash(datapath, group), mcgroups) {
2443 if (mc->datapath == datapath
2444 && multicast_group_equal(mc->group, group)) {
2445 return mc;
2446 }
2447 }
2448 return NULL;
2449 }
2450
2451 static void
2452 ovn_multicast_add(struct hmap *mcgroups, const struct multicast_group *group,
2453 struct ovn_port *port)
2454 {
2455 struct ovn_datapath *od = port->od;
2456 struct ovn_multicast *mc = ovn_multicast_find(mcgroups, od, group);
2457 if (!mc) {
2458 mc = xmalloc(sizeof *mc);
2459 hmap_insert(mcgroups, &mc->hmap_node, ovn_multicast_hash(od, group));
2460 mc->datapath = od;
2461 mc->group = group;
2462 mc->n_ports = 0;
2463 mc->allocated_ports = 4;
2464 mc->ports = xmalloc(mc->allocated_ports * sizeof *mc->ports);
2465 }
2466 if (mc->n_ports >= mc->allocated_ports) {
2467 mc->ports = x2nrealloc(mc->ports, &mc->allocated_ports,
2468 sizeof *mc->ports);
2469 }
2470 mc->ports[mc->n_ports++] = port;
2471 }
2472
2473 static void
2474 ovn_multicast_destroy(struct hmap *mcgroups, struct ovn_multicast *mc)
2475 {
2476 if (mc) {
2477 hmap_remove(mcgroups, &mc->hmap_node);
2478 free(mc->ports);
2479 free(mc);
2480 }
2481 }
2482
2483 static void
2484 ovn_multicast_update_sbrec(const struct ovn_multicast *mc,
2485 const struct sbrec_multicast_group *sb)
2486 {
2487 struct sbrec_port_binding **ports = xmalloc(mc->n_ports * sizeof *ports);
2488 for (size_t i = 0; i < mc->n_ports; i++) {
2489 ports[i] = CONST_CAST(struct sbrec_port_binding *, mc->ports[i]->sb);
2490 }
2491 sbrec_multicast_group_set_ports(sb, ports, mc->n_ports);
2492 free(ports);
2493 }
2494 \f
2495 /* Logical flow generation.
2496 *
2497 * This code generates the Logical_Flow table in the southbound database, as a
2498 * function of most of the northbound database.
2499 */
2500
2501 struct ovn_lflow {
2502 struct hmap_node hmap_node;
2503
2504 struct ovn_datapath *od;
2505 enum ovn_stage stage;
2506 uint16_t priority;
2507 char *match;
2508 char *actions;
2509 char *stage_hint;
2510 const char *where;
2511 };
2512
2513 static size_t
2514 ovn_lflow_hash(const struct ovn_lflow *lflow)
2515 {
2516 return ovn_logical_flow_hash(&lflow->od->sb->header_.uuid,
2517 ovn_stage_get_table(lflow->stage),
2518 ovn_stage_get_pipeline_name(lflow->stage),
2519 lflow->priority, lflow->match,
2520 lflow->actions);
2521 }
2522
2523 static bool
2524 ovn_lflow_equal(const struct ovn_lflow *a, const struct ovn_lflow *b)
2525 {
2526 return (a->od == b->od
2527 && a->stage == b->stage
2528 && a->priority == b->priority
2529 && !strcmp(a->match, b->match)
2530 && !strcmp(a->actions, b->actions));
2531 }
2532
2533 static void
2534 ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
2535 enum ovn_stage stage, uint16_t priority,
2536 char *match, char *actions, char *stage_hint,
2537 const char *where)
2538 {
2539 lflow->od = od;
2540 lflow->stage = stage;
2541 lflow->priority = priority;
2542 lflow->match = match;
2543 lflow->actions = actions;
2544 lflow->stage_hint = stage_hint;
2545 lflow->where = where;
2546 }
2547
2548 /* Adds a row with the specified contents to the Logical_Flow table. */
2549 static void
2550 ovn_lflow_add_at(struct hmap *lflow_map, struct ovn_datapath *od,
2551 enum ovn_stage stage, uint16_t priority,
2552 const char *match, const char *actions,
2553 const char *stage_hint, const char *where)
2554 {
2555 ovs_assert(ovn_stage_to_datapath_type(stage) == ovn_datapath_get_type(od));
2556
2557 struct ovn_lflow *lflow = xmalloc(sizeof *lflow);
2558 ovn_lflow_init(lflow, od, stage, priority,
2559 xstrdup(match), xstrdup(actions),
2560 nullable_xstrdup(stage_hint), where);
2561 hmap_insert(lflow_map, &lflow->hmap_node, ovn_lflow_hash(lflow));
2562 }
2563
2564 /* Adds a row with the specified contents to the Logical_Flow table. */
2565 #define ovn_lflow_add_with_hint(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, \
2566 ACTIONS, STAGE_HINT) \
2567 ovn_lflow_add_at(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, ACTIONS, \
2568 STAGE_HINT, OVS_SOURCE_LOCATOR)
2569
2570 #define ovn_lflow_add(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, ACTIONS) \
2571 ovn_lflow_add_with_hint(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, \
2572 ACTIONS, NULL)
2573
2574 static struct ovn_lflow *
2575 ovn_lflow_find(struct hmap *lflows, struct ovn_datapath *od,
2576 enum ovn_stage stage, uint16_t priority,
2577 const char *match, const char *actions, uint32_t hash)
2578 {
2579 struct ovn_lflow target;
2580 ovn_lflow_init(&target, od, stage, priority,
2581 CONST_CAST(char *, match), CONST_CAST(char *, actions),
2582 NULL, NULL);
2583
2584 struct ovn_lflow *lflow;
2585 HMAP_FOR_EACH_WITH_HASH (lflow, hmap_node, hash, lflows) {
2586 if (ovn_lflow_equal(lflow, &target)) {
2587 return lflow;
2588 }
2589 }
2590 return NULL;
2591 }
2592
2593 static void
2594 ovn_lflow_destroy(struct hmap *lflows, struct ovn_lflow *lflow)
2595 {
2596 if (lflow) {
2597 hmap_remove(lflows, &lflow->hmap_node);
2598 free(lflow->match);
2599 free(lflow->actions);
2600 free(lflow->stage_hint);
2601 free(lflow);
2602 }
2603 }
2604
2605 /* Appends port security constraints on L2 address field 'eth_addr_field'
2606 * (e.g. "eth.src" or "eth.dst") to 'match'. 'ps_addrs', with 'n_ps_addrs'
2607 * elements, is the collection of port_security constraints from an
2608 * OVN_NB Logical_Switch_Port row generated by extract_lsp_addresses(). */
2609 static void
2610 build_port_security_l2(const char *eth_addr_field,
2611 struct lport_addresses *ps_addrs,
2612 unsigned int n_ps_addrs,
2613 struct ds *match)
2614 {
2615 if (!n_ps_addrs) {
2616 return;
2617 }
2618
2619 ds_put_format(match, " && %s == {", eth_addr_field);
2620
2621 for (size_t i = 0; i < n_ps_addrs; i++) {
2622 ds_put_format(match, "%s ", ps_addrs[i].ea_s);
2623 }
2624 ds_chomp(match, ' ');
2625 ds_put_cstr(match, "}");
2626 }
2627
2628 static void
2629 build_port_security_ipv6_nd_flow(
2630 struct ds *match, struct eth_addr ea, struct ipv6_netaddr *ipv6_addrs,
2631 int n_ipv6_addrs)
2632 {
2633 ds_put_format(match, " && ip6 && nd && ((nd.sll == "ETH_ADDR_FMT" || "
2634 "nd.sll == "ETH_ADDR_FMT") || ((nd.tll == "ETH_ADDR_FMT" || "
2635 "nd.tll == "ETH_ADDR_FMT")", ETH_ADDR_ARGS(eth_addr_zero),
2636 ETH_ADDR_ARGS(ea), ETH_ADDR_ARGS(eth_addr_zero),
2637 ETH_ADDR_ARGS(ea));
2638 if (!n_ipv6_addrs) {
2639 ds_put_cstr(match, "))");
2640 return;
2641 }
2642
2643 char ip6_str[INET6_ADDRSTRLEN + 1];
2644 struct in6_addr lla;
2645 in6_generate_lla(ea, &lla);
2646 memset(ip6_str, 0, sizeof(ip6_str));
2647 ipv6_string_mapped(ip6_str, &lla);
2648 ds_put_format(match, " && (nd.target == %s", ip6_str);
2649
2650 for(int i = 0; i < n_ipv6_addrs; i++) {
2651 memset(ip6_str, 0, sizeof(ip6_str));
2652 ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr);
2653 ds_put_format(match, " || nd.target == %s", ip6_str);
2654 }
2655
2656 ds_put_format(match, ")))");
2657 }
2658
2659 static void
2660 build_port_security_ipv6_flow(
2661 enum ovn_pipeline pipeline, struct ds *match, struct eth_addr ea,
2662 struct ipv6_netaddr *ipv6_addrs, int n_ipv6_addrs)
2663 {
2664 char ip6_str[INET6_ADDRSTRLEN + 1];
2665
2666 ds_put_format(match, " && %s == {",
2667 pipeline == P_IN ? "ip6.src" : "ip6.dst");
2668
2669 /* Allow link-local address. */
2670 struct in6_addr lla;
2671 in6_generate_lla(ea, &lla);
2672 ipv6_string_mapped(ip6_str, &lla);
2673 ds_put_format(match, "%s, ", ip6_str);
2674
2675 /* Allow ip6.dst=ff00::/8 for multicast packets */
2676 if (pipeline == P_OUT) {
2677 ds_put_cstr(match, "ff00::/8, ");
2678 }
2679 for(int i = 0; i < n_ipv6_addrs; i++) {
2680 ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr);
2681 ds_put_format(match, "%s, ", ip6_str);
2682 }
2683 /* Replace ", " by "}". */
2684 ds_chomp(match, ' ');
2685 ds_chomp(match, ',');
2686 ds_put_cstr(match, "}");
2687 }
2688
2689 /**
2690 * Build port security constraints on ARP and IPv6 ND fields
2691 * and add logical flows to S_SWITCH_IN_PORT_SEC_ND stage.
2692 *
2693 * For each port security of the logical port, following
2694 * logical flows are added
2695 * - If the port security has no IP (both IPv4 and IPv6) or
2696 * if it has IPv4 address(es)
2697 * - Priority 90 flow to allow ARP packets for known MAC addresses
2698 * in the eth.src and arp.spa fields. If the port security
2699 * has IPv4 addresses, allow known IPv4 addresses in the arp.tpa field.
2700 *
2701 * - If the port security has no IP (both IPv4 and IPv6) or
2702 * if it has IPv6 address(es)
2703 * - Priority 90 flow to allow IPv6 ND packets for known MAC addresses
2704 * in the eth.src and nd.sll/nd.tll fields. If the port security
2705 * has IPv6 addresses, allow known IPv6 addresses in the nd.target field
2706 * for IPv6 Neighbor Advertisement packet.
2707 *
2708 * - Priority 80 flow to drop ARP and IPv6 ND packets.
2709 */
2710 static void
2711 build_port_security_nd(struct ovn_port *op, struct hmap *lflows)
2712 {
2713 struct ds match = DS_EMPTY_INITIALIZER;
2714
2715 for (size_t i = 0; i < op->n_ps_addrs; i++) {
2716 struct lport_addresses *ps = &op->ps_addrs[i];
2717
2718 bool no_ip = !(ps->n_ipv4_addrs || ps->n_ipv6_addrs);
2719
2720 ds_clear(&match);
2721 if (ps->n_ipv4_addrs || no_ip) {
2722 ds_put_format(&match,
2723 "inport == %s && eth.src == %s && arp.sha == %s",
2724 op->json_key, ps->ea_s, ps->ea_s);
2725
2726 if (ps->n_ipv4_addrs) {
2727 ds_put_cstr(&match, " && arp.spa == {");
2728 for (size_t j = 0; j < ps->n_ipv4_addrs; j++) {
2729 /* When the netmask is applied, if the host portion is
2730 * non-zero, the host can only use the specified
2731 * address in the arp.spa. If zero, the host is allowed
2732 * to use any address in the subnet. */
2733 if (ps->ipv4_addrs[j].plen == 32
2734 || ps->ipv4_addrs[j].addr & ~ps->ipv4_addrs[j].mask) {
2735 ds_put_cstr(&match, ps->ipv4_addrs[j].addr_s);
2736 } else {
2737 ds_put_format(&match, "%s/%d",
2738 ps->ipv4_addrs[j].network_s,
2739 ps->ipv4_addrs[j].plen);
2740 }
2741 ds_put_cstr(&match, ", ");
2742 }
2743 ds_chomp(&match, ' ');
2744 ds_chomp(&match, ',');
2745 ds_put_cstr(&match, "}");
2746 }
2747 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90,
2748 ds_cstr(&match), "next;");
2749 }
2750
2751 if (ps->n_ipv6_addrs || no_ip) {
2752 ds_clear(&match);
2753 ds_put_format(&match, "inport == %s && eth.src == %s",
2754 op->json_key, ps->ea_s);
2755 build_port_security_ipv6_nd_flow(&match, ps->ea, ps->ipv6_addrs,
2756 ps->n_ipv6_addrs);
2757 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90,
2758 ds_cstr(&match), "next;");
2759 }
2760 }
2761
2762 ds_clear(&match);
2763 ds_put_format(&match, "inport == %s && (arp || nd)", op->json_key);
2764 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 80,
2765 ds_cstr(&match), "drop;");
2766 ds_destroy(&match);
2767 }
2768
2769 /**
2770 * Build port security constraints on IPv4 and IPv6 src and dst fields
2771 * and add logical flows to S_SWITCH_(IN/OUT)_PORT_SEC_IP stage.
2772 *
2773 * For each port security of the logical port, following
2774 * logical flows are added
2775 * - If the port security has IPv4 addresses,
2776 * - Priority 90 flow to allow IPv4 packets for known IPv4 addresses
2777 *
2778 * - If the port security has IPv6 addresses,
2779 * - Priority 90 flow to allow IPv6 packets for known IPv6 addresses
2780 *
2781 * - If the port security has IPv4 addresses or IPv6 addresses or both
2782 * - Priority 80 flow to drop all IPv4 and IPv6 traffic
2783 */
2784 static void
2785 build_port_security_ip(enum ovn_pipeline pipeline, struct ovn_port *op,
2786 struct hmap *lflows)
2787 {
2788 char *port_direction;
2789 enum ovn_stage stage;
2790 if (pipeline == P_IN) {
2791 port_direction = "inport";
2792 stage = S_SWITCH_IN_PORT_SEC_IP;
2793 } else {
2794 port_direction = "outport";
2795 stage = S_SWITCH_OUT_PORT_SEC_IP;
2796 }
2797
2798 for (size_t i = 0; i < op->n_ps_addrs; i++) {
2799 struct lport_addresses *ps = &op->ps_addrs[i];
2800
2801 if (!(ps->n_ipv4_addrs || ps->n_ipv6_addrs)) {
2802 continue;
2803 }
2804
2805 if (ps->n_ipv4_addrs) {
2806 struct ds match = DS_EMPTY_INITIALIZER;
2807 if (pipeline == P_IN) {
2808 /* Permit use of the unspecified address for DHCP discovery */
2809 struct ds dhcp_match = DS_EMPTY_INITIALIZER;
2810 ds_put_format(&dhcp_match, "inport == %s"
2811 " && eth.src == %s"
2812 " && ip4.src == 0.0.0.0"
2813 " && ip4.dst == 255.255.255.255"
2814 " && udp.src == 68 && udp.dst == 67",
2815 op->json_key, ps->ea_s);
2816 ovn_lflow_add(lflows, op->od, stage, 90,
2817 ds_cstr(&dhcp_match), "next;");
2818 ds_destroy(&dhcp_match);
2819 ds_put_format(&match, "inport == %s && eth.src == %s"
2820 " && ip4.src == {", op->json_key,
2821 ps->ea_s);
2822 } else {
2823 ds_put_format(&match, "outport == %s && eth.dst == %s"
2824 " && ip4.dst == {255.255.255.255, 224.0.0.0/4, ",
2825 op->json_key, ps->ea_s);
2826 }
2827
2828 for (int j = 0; j < ps->n_ipv4_addrs; j++) {
2829 ovs_be32 mask = ps->ipv4_addrs[j].mask;
2830 /* When the netmask is applied, if the host portion is
2831 * non-zero, the host can only use the specified
2832 * address. If zero, the host is allowed to use any
2833 * address in the subnet.
2834 */
2835 if (ps->ipv4_addrs[j].plen == 32
2836 || ps->ipv4_addrs[j].addr & ~mask) {
2837 ds_put_format(&match, "%s", ps->ipv4_addrs[j].addr_s);
2838 if (pipeline == P_OUT && ps->ipv4_addrs[j].plen != 32) {
2839 /* Host is also allowed to receive packets to the
2840 * broadcast address in the specified subnet. */
2841 ds_put_format(&match, ", %s",
2842 ps->ipv4_addrs[j].bcast_s);
2843 }
2844 } else {
2845 /* host portion is zero */
2846 ds_put_format(&match, "%s/%d", ps->ipv4_addrs[j].network_s,
2847 ps->ipv4_addrs[j].plen);
2848 }
2849 ds_put_cstr(&match, ", ");
2850 }
2851
2852 /* Replace ", " by "}". */
2853 ds_chomp(&match, ' ');
2854 ds_chomp(&match, ',');
2855 ds_put_cstr(&match, "}");
2856 ovn_lflow_add(lflows, op->od, stage, 90, ds_cstr(&match), "next;");
2857 ds_destroy(&match);
2858 }
2859
2860 if (ps->n_ipv6_addrs) {
2861 struct ds match = DS_EMPTY_INITIALIZER;
2862 if (pipeline == P_IN) {
2863 /* Permit use of unspecified address for duplicate address
2864 * detection */
2865 struct ds dad_match = DS_EMPTY_INITIALIZER;
2866 ds_put_format(&dad_match, "inport == %s"
2867 " && eth.src == %s"
2868 " && ip6.src == ::"
2869 " && ip6.dst == ff02::/16"
2870 " && icmp6.type == {131, 135, 143}", op->json_key,
2871 ps->ea_s);
2872 ovn_lflow_add(lflows, op->od, stage, 90,
2873 ds_cstr(&dad_match), "next;");
2874 ds_destroy(&dad_match);
2875 }
2876 ds_put_format(&match, "%s == %s && %s == %s",
2877 port_direction, op->json_key,
2878 pipeline == P_IN ? "eth.src" : "eth.dst", ps->ea_s);
2879 build_port_security_ipv6_flow(pipeline, &match, ps->ea,
2880 ps->ipv6_addrs, ps->n_ipv6_addrs);
2881 ovn_lflow_add(lflows, op->od, stage, 90,
2882 ds_cstr(&match), "next;");
2883 ds_destroy(&match);
2884 }
2885
2886 char *match = xasprintf("%s == %s && %s == %s && ip",
2887 port_direction, op->json_key,
2888 pipeline == P_IN ? "eth.src" : "eth.dst",
2889 ps->ea_s);
2890 ovn_lflow_add(lflows, op->od, stage, 80, match, "drop;");
2891 free(match);
2892 }
2893
2894 }
2895
2896 static bool
2897 lsp_is_enabled(const struct nbrec_logical_switch_port *lsp)
2898 {
2899 return !lsp->enabled || *lsp->enabled;
2900 }
2901
2902 static bool
2903 lsp_is_up(const struct nbrec_logical_switch_port *lsp)
2904 {
2905 return !lsp->up || *lsp->up;
2906 }
2907
2908 static bool
2909 build_dhcpv4_action(struct ovn_port *op, ovs_be32 offer_ip,
2910 struct ds *options_action, struct ds *response_action,
2911 struct ds *ipv4_addr_match)
2912 {
2913 if (!op->nbsp->dhcpv4_options) {
2914 /* CMS has disabled native DHCPv4 for this lport. */
2915 return false;
2916 }
2917
2918 ovs_be32 host_ip, mask;
2919 char *error = ip_parse_masked(op->nbsp->dhcpv4_options->cidr, &host_ip,
2920 &mask);
2921 if (error || ((offer_ip ^ host_ip) & mask)) {
2922 /* Either
2923 * - cidr defined is invalid or
2924 * - the offer ip of the logical port doesn't belong to the cidr
2925 * defined in the DHCPv4 options.
2926 * */
2927 free(error);
2928 return false;
2929 }
2930
2931 const char *server_ip = smap_get(
2932 &op->nbsp->dhcpv4_options->options, "server_id");
2933 const char *server_mac = smap_get(
2934 &op->nbsp->dhcpv4_options->options, "server_mac");
2935 const char *lease_time = smap_get(
2936 &op->nbsp->dhcpv4_options->options, "lease_time");
2937
2938 if (!(server_ip && server_mac && lease_time)) {
2939 /* "server_id", "server_mac" and "lease_time" should be
2940 * present in the dhcp_options. */
2941 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
2942 VLOG_WARN_RL(&rl, "Required DHCPv4 options not defined for lport - %s",
2943 op->json_key);
2944 return false;
2945 }
2946
2947 struct smap dhcpv4_options = SMAP_INITIALIZER(&dhcpv4_options);
2948 smap_clone(&dhcpv4_options, &op->nbsp->dhcpv4_options->options);
2949
2950 /* server_mac is not DHCPv4 option, delete it from the smap. */
2951 smap_remove(&dhcpv4_options, "server_mac");
2952 char *netmask = xasprintf(IP_FMT, IP_ARGS(mask));
2953 smap_add(&dhcpv4_options, "netmask", netmask);
2954 free(netmask);
2955
2956 ds_put_format(options_action,
2957 REGBIT_DHCP_OPTS_RESULT" = put_dhcp_opts(offerip = "
2958 IP_FMT", ", IP_ARGS(offer_ip));
2959
2960 /* We're not using SMAP_FOR_EACH because we want a consistent order of the
2961 * options on different architectures (big or little endian, SSE4.2) */
2962 const struct smap_node **sorted_opts = smap_sort(&dhcpv4_options);
2963 for (size_t i = 0; i < smap_count(&dhcpv4_options); i++) {
2964 const struct smap_node *node = sorted_opts[i];
2965 ds_put_format(options_action, "%s = %s, ", node->key, node->value);
2966 }
2967 free(sorted_opts);
2968
2969 ds_chomp(options_action, ' ');
2970 ds_chomp(options_action, ',');
2971 ds_put_cstr(options_action, "); next;");
2972
2973 ds_put_format(response_action, "eth.dst = eth.src; eth.src = %s; "
2974 "ip4.dst = "IP_FMT"; ip4.src = %s; udp.src = 67; "
2975 "udp.dst = 68; outport = inport; flags.loopback = 1; "
2976 "output;",
2977 server_mac, IP_ARGS(offer_ip), server_ip);
2978
2979 ds_put_format(ipv4_addr_match,
2980 "ip4.src == "IP_FMT" && ip4.dst == {%s, 255.255.255.255}",
2981 IP_ARGS(offer_ip), server_ip);
2982 smap_destroy(&dhcpv4_options);
2983 return true;
2984 }
2985
2986 static bool
2987 build_dhcpv6_action(struct ovn_port *op, struct in6_addr *offer_ip,
2988 struct ds *options_action, struct ds *response_action)
2989 {
2990 if (!op->nbsp->dhcpv6_options) {
2991 /* CMS has disabled native DHCPv6 for this lport. */
2992 return false;
2993 }
2994
2995 struct in6_addr host_ip, mask;
2996
2997 char *error = ipv6_parse_masked(op->nbsp->dhcpv6_options->cidr, &host_ip,
2998 &mask);
2999 if (error) {
3000 free(error);
3001 return false;
3002 }
3003 struct in6_addr ip6_mask = ipv6_addr_bitxor(offer_ip, &host_ip);
3004 ip6_mask = ipv6_addr_bitand(&ip6_mask, &mask);
3005 if (!ipv6_mask_is_any(&ip6_mask)) {
3006 /* offer_ip doesn't belongs to the cidr defined in lport's DHCPv6
3007 * options.*/
3008 return false;
3009 }
3010
3011 const struct smap *options_map = &op->nbsp->dhcpv6_options->options;
3012 /* "server_id" should be the MAC address. */
3013 const char *server_mac = smap_get(options_map, "server_id");
3014 struct eth_addr ea;
3015 if (!server_mac || !eth_addr_from_string(server_mac, &ea)) {
3016 /* "server_id" should be present in the dhcpv6_options. */
3017 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3018 VLOG_WARN_RL(&rl, "server_id not present in the DHCPv6 options"
3019 " for lport %s", op->json_key);
3020 return false;
3021 }
3022
3023 /* Get the link local IP of the DHCPv6 server from the server MAC. */
3024 struct in6_addr lla;
3025 in6_generate_lla(ea, &lla);
3026
3027 char server_ip[INET6_ADDRSTRLEN + 1];
3028 ipv6_string_mapped(server_ip, &lla);
3029
3030 char ia_addr[INET6_ADDRSTRLEN + 1];
3031 ipv6_string_mapped(ia_addr, offer_ip);
3032
3033 ds_put_format(options_action,
3034 REGBIT_DHCP_OPTS_RESULT" = put_dhcpv6_opts(");
3035
3036 /* Check whether the dhcpv6 options should be configured as stateful.
3037 * Only reply with ia_addr option for dhcpv6 stateful address mode. */
3038 if (!smap_get_bool(options_map, "dhcpv6_stateless", false)) {
3039 ipv6_string_mapped(ia_addr, offer_ip);
3040 ds_put_format(options_action, "ia_addr = %s, ", ia_addr);
3041 }
3042
3043 /* We're not using SMAP_FOR_EACH because we want a consistent order of the
3044 * options on different architectures (big or little endian, SSE4.2) */
3045 const struct smap_node **sorted_opts = smap_sort(options_map);
3046 for (size_t i = 0; i < smap_count(options_map); i++) {
3047 const struct smap_node *node = sorted_opts[i];
3048 if (strcmp(node->key, "dhcpv6_stateless")) {
3049 ds_put_format(options_action, "%s = %s, ", node->key, node->value);
3050 }
3051 }
3052 free(sorted_opts);
3053
3054 ds_chomp(options_action, ' ');
3055 ds_chomp(options_action, ',');
3056 ds_put_cstr(options_action, "); next;");
3057
3058 ds_put_format(response_action, "eth.dst = eth.src; eth.src = %s; "
3059 "ip6.dst = ip6.src; ip6.src = %s; udp.src = 547; "
3060 "udp.dst = 546; outport = inport; flags.loopback = 1; "
3061 "output;",
3062 server_mac, server_ip);
3063
3064 return true;
3065 }
3066
3067 struct ovn_port_group_ls {
3068 struct hmap_node key_node; /* Index on 'key'. */
3069 struct uuid key; /* nb_ls->header_.uuid. */
3070 const struct nbrec_logical_switch *nb_ls;
3071 };
3072
3073 struct ovn_port_group {
3074 struct hmap_node key_node; /* Index on 'key'. */
3075 struct uuid key; /* nb_pg->header_.uuid. */
3076 const struct nbrec_port_group *nb_pg;
3077 struct hmap nb_lswitches; /* NB lswitches related to the port group */
3078 };
3079
3080 static void
3081 ovn_port_group_ls_add(struct ovn_port_group *pg,
3082 const struct nbrec_logical_switch *nb_ls)
3083 {
3084 struct ovn_port_group_ls *pg_ls = xzalloc(sizeof *pg_ls);
3085 pg_ls->key = nb_ls->header_.uuid;
3086 pg_ls->nb_ls = nb_ls;
3087 hmap_insert(&pg->nb_lswitches, &pg_ls->key_node, uuid_hash(&pg_ls->key));
3088 }
3089
3090 static struct ovn_port_group_ls *
3091 ovn_port_group_ls_find(struct ovn_port_group *pg, const struct uuid *ls_uuid)
3092 {
3093 struct ovn_port_group_ls *pg_ls;
3094
3095 HMAP_FOR_EACH_WITH_HASH (pg_ls, key_node, uuid_hash(ls_uuid),
3096 &pg->nb_lswitches) {
3097 if (uuid_equals(ls_uuid, &pg_ls->key)) {
3098 return pg_ls;
3099 }
3100 }
3101 return NULL;
3102 }
3103
3104 struct ovn_ls_port_group {
3105 struct hmap_node key_node; /* Index on 'key'. */
3106 struct uuid key; /* nb_pg->header_.uuid. */
3107 const struct nbrec_port_group *nb_pg;
3108 };
3109
3110 static void
3111 ovn_ls_port_group_add(struct hmap *nb_pgs,
3112 const struct nbrec_port_group *nb_pg)
3113 {
3114 struct ovn_ls_port_group *ls_pg = xzalloc(sizeof *ls_pg);
3115 ls_pg->key = nb_pg->header_.uuid;
3116 ls_pg->nb_pg = nb_pg;
3117 hmap_insert(nb_pgs, &ls_pg->key_node, uuid_hash(&ls_pg->key));
3118 }
3119
3120 static void
3121 ovn_ls_port_group_destroy(struct hmap *nb_pgs)
3122 {
3123 struct ovn_ls_port_group *ls_pg;
3124 HMAP_FOR_EACH_POP (ls_pg, key_node, nb_pgs) {
3125 free(ls_pg);
3126 }
3127 hmap_destroy(nb_pgs);
3128 }
3129
3130 static bool
3131 has_stateful_acl(struct ovn_datapath *od)
3132 {
3133 for (size_t i = 0; i < od->nbs->n_acls; i++) {
3134 struct nbrec_acl *acl = od->nbs->acls[i];
3135 if (!strcmp(acl->action, "allow-related")) {
3136 return true;
3137 }
3138 }
3139
3140 struct ovn_ls_port_group *ls_pg;
3141 HMAP_FOR_EACH (ls_pg, key_node, &od->nb_pgs) {
3142 for (size_t i = 0; i < ls_pg->nb_pg->n_acls; i++) {
3143 struct nbrec_acl *acl = ls_pg->nb_pg->acls[i];
3144 if (!strcmp(acl->action, "allow-related")) {
3145 return true;
3146 }
3147 }
3148 }
3149
3150 return false;
3151 }
3152
3153 static void
3154 build_pre_acls(struct ovn_datapath *od, struct hmap *lflows)
3155 {
3156 bool has_stateful = has_stateful_acl(od);
3157
3158 /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are
3159 * allowed by default. */
3160 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 0, "1", "next;");
3161 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 0, "1", "next;");
3162
3163 /* If there are any stateful ACL rules in this datapath, we must
3164 * send all IP packets through the conntrack action, which handles
3165 * defragmentation, in order to match L4 headers. */
3166 if (has_stateful) {
3167 for (size_t i = 0; i < od->n_router_ports; i++) {
3168 struct ovn_port *op = od->router_ports[i];
3169 /* Can't use ct() for router ports. Consider the
3170 * following configuration: lp1(10.0.0.2) on
3171 * hostA--ls1--lr0--ls2--lp2(10.0.1.2) on hostB, For a
3172 * ping from lp1 to lp2, First, the response will go
3173 * through ct() with a zone for lp2 in the ls2 ingress
3174 * pipeline on hostB. That ct zone knows about this
3175 * connection. Next, it goes through ct() with the zone
3176 * for the router port in the egress pipeline of ls2 on
3177 * hostB. This zone does not know about the connection,
3178 * as the icmp request went through the logical router
3179 * on hostA, not hostB. This would only work with
3180 * distributed conntrack state across all chassis. */
3181 struct ds match_in = DS_EMPTY_INITIALIZER;
3182 struct ds match_out = DS_EMPTY_INITIALIZER;
3183
3184 ds_put_format(&match_in, "ip && inport == %s", op->json_key);
3185 ds_put_format(&match_out, "ip && outport == %s", op->json_key);
3186 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110,
3187 ds_cstr(&match_in), "next;");
3188 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110,
3189 ds_cstr(&match_out), "next;");
3190
3191 ds_destroy(&match_in);
3192 ds_destroy(&match_out);
3193 }
3194 if (od->localnet_port) {
3195 struct ds match_in = DS_EMPTY_INITIALIZER;
3196 struct ds match_out = DS_EMPTY_INITIALIZER;
3197
3198 ds_put_format(&match_in, "ip && inport == %s",
3199 od->localnet_port->json_key);
3200 ds_put_format(&match_out, "ip && outport == %s",
3201 od->localnet_port->json_key);
3202 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110,
3203 ds_cstr(&match_in), "next;");
3204 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110,
3205 ds_cstr(&match_out), "next;");
3206
3207 ds_destroy(&match_in);
3208 ds_destroy(&match_out);
3209 }
3210
3211 /* Ingress and Egress Pre-ACL Table (Priority 110).
3212 *
3213 * Not to do conntrack on ND and ICMP destination
3214 * unreachable packets. */
3215 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110,
3216 "nd || nd_rs || nd_ra || icmp4.type == 3 || "
3217 "icmp6.type == 1 || (tcp && tcp.flags == 4)",
3218 "next;");
3219 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110,
3220 "nd || nd_rs || nd_ra || icmp4.type == 3 || "
3221 "icmp6.type == 1 || (tcp && tcp.flags == 4)",
3222 "next;");
3223
3224 /* Ingress and Egress Pre-ACL Table (Priority 100).
3225 *
3226 * Regardless of whether the ACL is "from-lport" or "to-lport",
3227 * we need rules in both the ingress and egress table, because
3228 * the return traffic needs to be followed.
3229 *
3230 * 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send
3231 * it to conntrack for tracking and defragmentation. */
3232 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 100, "ip",
3233 REGBIT_CONNTRACK_DEFRAG" = 1; next;");
3234 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip",
3235 REGBIT_CONNTRACK_DEFRAG" = 1; next;");
3236 }
3237 }
3238
3239 /* For a 'key' of the form "IP:port" or just "IP", sets 'port' and
3240 * 'ip_address'. The caller must free() the memory allocated for
3241 * 'ip_address'. */
3242 static void
3243 ip_address_and_port_from_lb_key(const char *key, char **ip_address,
3244 uint16_t *port, int *addr_family)
3245 {
3246 struct sockaddr_storage ss;
3247 if (!inet_parse_active(key, 0, &ss, false)) {
3248 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3249 VLOG_WARN_RL(&rl, "bad ip address or port for load balancer key %s",
3250 key);
3251 return;
3252 }
3253
3254 struct ds s = DS_EMPTY_INITIALIZER;
3255 ss_format_address_nobracks(&ss, &s);
3256 *ip_address = ds_steal_cstr(&s);
3257
3258 *port = ss_get_port(&ss);
3259
3260 *addr_family = ss.ss_family;
3261 }
3262
3263 /*
3264 * Returns true if logical switch is configured with DNS records, false
3265 * otherwise.
3266 */
3267 static bool
3268 ls_has_dns_records(const struct nbrec_logical_switch *nbs)
3269 {
3270 for (size_t i = 0; i < nbs->n_dns_records; i++) {
3271 if (!smap_is_empty(&nbs->dns_records[i]->records)) {
3272 return true;
3273 }
3274 }
3275
3276 return false;
3277 }
3278
3279 static void
3280 build_pre_lb(struct ovn_datapath *od, struct hmap *lflows)
3281 {
3282 /* Do not send ND packets to conntrack */
3283 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 110,
3284 "nd || nd_rs || nd_ra", "next;");
3285 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 110,
3286 "nd || nd_rs || nd_ra", "next;");
3287
3288 /* Allow all packets to go to next tables by default. */
3289 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 0, "1", "next;");
3290 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 0, "1", "next;");
3291
3292 struct sset all_ips = SSET_INITIALIZER(&all_ips);
3293 bool vip_configured = false;
3294 int addr_family = AF_INET;
3295 for (int i = 0; i < od->nbs->n_load_balancer; i++) {
3296 struct nbrec_load_balancer *lb = od->nbs->load_balancer[i];
3297 struct smap *vips = &lb->vips;
3298 struct smap_node *node;
3299
3300 SMAP_FOR_EACH (node, vips) {
3301 vip_configured = true;
3302
3303 /* node->key contains IP:port or just IP. */
3304 char *ip_address = NULL;
3305 uint16_t port;
3306 ip_address_and_port_from_lb_key(node->key, &ip_address, &port,
3307 &addr_family);
3308 if (!ip_address) {
3309 continue;
3310 }
3311
3312 if (!sset_contains(&all_ips, ip_address)) {
3313 sset_add(&all_ips, ip_address);
3314 }
3315
3316 free(ip_address);
3317
3318 /* Ignore L4 port information in the key because fragmented packets
3319 * may not have L4 information. The pre-stateful table will send
3320 * the packet through ct() action to de-fragment. In stateful
3321 * table, we will eventually look at L4 information. */
3322 }
3323 }
3324
3325 /* 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send
3326 * packet to conntrack for defragmentation. */
3327 const char *ip_address;
3328 SSET_FOR_EACH(ip_address, &all_ips) {
3329 char *match;
3330
3331 if (addr_family == AF_INET) {
3332 match = xasprintf("ip && ip4.dst == %s", ip_address);
3333 } else {
3334 match = xasprintf("ip && ip6.dst == %s", ip_address);
3335 }
3336 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB,
3337 100, match, REGBIT_CONNTRACK_DEFRAG" = 1; next;");
3338 free(match);
3339 }
3340
3341 sset_destroy(&all_ips);
3342
3343 if (vip_configured) {
3344 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB,
3345 100, "ip", REGBIT_CONNTRACK_DEFRAG" = 1; next;");
3346 }
3347 }
3348
3349 static void
3350 build_pre_stateful(struct ovn_datapath *od, struct hmap *lflows)
3351 {
3352 /* Ingress and Egress pre-stateful Table (Priority 0): Packets are
3353 * allowed by default. */
3354 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 0, "1", "next;");
3355 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 0, "1", "next;");
3356
3357 /* If REGBIT_CONNTRACK_DEFRAG is set as 1, then the packets should be
3358 * sent to conntrack for tracking and defragmentation. */
3359 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 100,
3360 REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;");
3361 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 100,
3362 REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;");
3363 }
3364
3365 static void
3366 build_acl_log(struct ds *actions, const struct nbrec_acl *acl)
3367 {
3368 if (!acl->log) {
3369 return;
3370 }
3371
3372 ds_put_cstr(actions, "log(");
3373
3374 if (acl->name) {
3375 ds_put_format(actions, "name=\"%s\", ", acl->name);
3376 }
3377
3378 /* If a severity level isn't specified, default to "info". */
3379 if (acl->severity) {
3380 ds_put_format(actions, "severity=%s, ", acl->severity);
3381 } else {
3382 ds_put_format(actions, "severity=info, ");
3383 }
3384
3385 if (!strcmp(acl->action, "drop")) {
3386 ds_put_cstr(actions, "verdict=drop, ");
3387 } else if (!strcmp(acl->action, "reject")) {
3388 ds_put_cstr(actions, "verdict=reject, ");
3389 } else if (!strcmp(acl->action, "allow")
3390 || !strcmp(acl->action, "allow-related")) {
3391 ds_put_cstr(actions, "verdict=allow, ");
3392 }
3393
3394 if (acl->meter) {
3395 ds_put_format(actions, "meter=\"%s\", ", acl->meter);
3396 }
3397
3398 ds_chomp(actions, ' ');
3399 ds_chomp(actions, ',');
3400 ds_put_cstr(actions, "); ");
3401 }
3402
3403 static void
3404 build_reject_acl_rules(struct ovn_datapath *od, struct hmap *lflows,
3405 enum ovn_stage stage, struct nbrec_acl *acl,
3406 struct ds *extra_match, struct ds *extra_actions)
3407 {
3408 struct ds match = DS_EMPTY_INITIALIZER;
3409 struct ds actions = DS_EMPTY_INITIALIZER;
3410 bool ingress = (stage == S_SWITCH_IN_ACL);
3411
3412 /* TCP */
3413 build_acl_log(&actions, acl);
3414 if (extra_match->length > 0) {
3415 ds_put_format(&match, "(%s) && ", extra_match->string);
3416 }
3417 ds_put_format(&match, "ip4 && tcp && (%s)", acl->match);
3418 ds_put_format(&actions, "reg0 = 0; "
3419 "eth.dst <-> eth.src; ip4.dst <-> ip4.src; "
3420 "tcp_reset { outport <-> inport; %s };",
3421 ingress ? "output;" : "next(pipeline=ingress,table=0);");
3422 ovn_lflow_add(lflows, od, stage, acl->priority + OVN_ACL_PRI_OFFSET + 10,
3423 ds_cstr(&match), ds_cstr(&actions));
3424 ds_clear(&match);
3425 ds_clear(&actions);
3426 build_acl_log(&actions, acl);
3427 if (extra_match->length > 0) {
3428 ds_put_format(&match, "(%s) && ", extra_match->string);
3429 }
3430 ds_put_format(&match, "ip6 && tcp && (%s)", acl->match);
3431 ds_put_format(&actions, "reg0 = 0; "
3432 "eth.dst <-> eth.src; ip6.dst <-> ip6.src; "
3433 "tcp_reset { outport <-> inport; %s };",
3434 ingress ? "output;" : "next(pipeline=ingress,table=0);");
3435 ovn_lflow_add(lflows, od, stage, acl->priority + OVN_ACL_PRI_OFFSET + 10,
3436 ds_cstr(&match), ds_cstr(&actions));
3437
3438 /* IP traffic */
3439 ds_clear(&match);
3440 ds_clear(&actions);
3441 build_acl_log(&actions, acl);
3442 if (extra_match->length > 0) {
3443 ds_put_format(&match, "(%s) && ", extra_match->string);
3444 }
3445 ds_put_format(&match, "ip4 && (%s)", acl->match);
3446 if (extra_actions->length > 0) {
3447 ds_put_format(&actions, "%s ", extra_actions->string);
3448 }
3449 ds_put_format(&actions, "reg0 = 0; "
3450 "eth.dst <-> eth.src; ip4.dst <-> ip4.src; "
3451 "icmp4 { outport <-> inport; %s };",
3452 ingress ? "output;" : "next(pipeline=ingress,table=0);");
3453 ovn_lflow_add(lflows, od, stage, acl->priority + OVN_ACL_PRI_OFFSET,
3454 ds_cstr(&match), ds_cstr(&actions));
3455 ds_clear(&match);
3456 ds_clear(&actions);
3457 build_acl_log(&actions, acl);
3458 if (extra_match->length > 0) {
3459 ds_put_format(&match, "(%s) && ", extra_match->string);
3460 }
3461 ds_put_format(&match, "ip6 && (%s)", acl->match);
3462 if (extra_actions->length > 0) {
3463 ds_put_format(&actions, "%s ", extra_actions->string);
3464 }
3465 ds_put_format(&actions, "reg0 = 0; icmp6 { "
3466 "eth.dst <-> eth.src; ip6.dst <-> ip6.src; "
3467 "outport <-> inport; %s };",
3468 ingress ? "output;" : "next(pipeline=ingress,table=0);");
3469 ovn_lflow_add(lflows, od, stage, acl->priority + OVN_ACL_PRI_OFFSET,
3470 ds_cstr(&match), ds_cstr(&actions));
3471
3472 ds_destroy(&match);
3473 ds_destroy(&actions);
3474 }
3475
3476 static void
3477 consider_acl(struct hmap *lflows, struct ovn_datapath *od,
3478 struct nbrec_acl *acl, bool has_stateful)
3479 {
3480 bool ingress = !strcmp(acl->direction, "from-lport") ? true :false;
3481 enum ovn_stage stage = ingress ? S_SWITCH_IN_ACL : S_SWITCH_OUT_ACL;
3482
3483 char *stage_hint = xasprintf("%08x", acl->header_.uuid.parts[0]);
3484 if (!strcmp(acl->action, "allow")
3485 || !strcmp(acl->action, "allow-related")) {
3486 /* If there are any stateful flows, we must even commit "allow"
3487 * actions. This is because, while the initiater's
3488 * direction may not have any stateful rules, the server's
3489 * may and then its return traffic would not have an
3490 * associated conntrack entry and would return "+invalid". */
3491 if (!has_stateful) {
3492 struct ds actions = DS_EMPTY_INITIALIZER;
3493 build_acl_log(&actions, acl);
3494 ds_put_cstr(&actions, "next;");
3495 ovn_lflow_add_with_hint(lflows, od, stage,
3496 acl->priority + OVN_ACL_PRI_OFFSET,
3497 acl->match, ds_cstr(&actions),
3498 stage_hint);
3499 ds_destroy(&actions);
3500 } else {
3501 struct ds match = DS_EMPTY_INITIALIZER;
3502 struct ds actions = DS_EMPTY_INITIALIZER;
3503
3504 /* Commit the connection tracking entry if it's a new
3505 * connection that matches this ACL. After this commit,
3506 * the reply traffic is allowed by a flow we create at
3507 * priority 65535, defined earlier.
3508 *
3509 * It's also possible that a known connection was marked for
3510 * deletion after a policy was deleted, but the policy was
3511 * re-added while that connection is still known. We catch
3512 * that case here and un-set ct_label.blocked (which will be done
3513 * by ct_commit in the "stateful" stage) to indicate that the
3514 * connection should be allowed to resume.
3515 */
3516 ds_put_format(&match, "((ct.new && !ct.est)"
3517 " || (!ct.new && ct.est && !ct.rpl "
3518 "&& ct_label.blocked == 1)) "
3519 "&& (%s)", acl->match);
3520 ds_put_cstr(&actions, REGBIT_CONNTRACK_COMMIT" = 1; ");
3521 build_acl_log(&actions, acl);
3522 ds_put_cstr(&actions, "next;");
3523 ovn_lflow_add_with_hint(lflows, od, stage,
3524 acl->priority + OVN_ACL_PRI_OFFSET,
3525 ds_cstr(&match),
3526 ds_cstr(&actions),
3527 stage_hint);
3528
3529 /* Match on traffic in the request direction for an established
3530 * connection tracking entry that has not been marked for
3531 * deletion. There is no need to commit here, so we can just
3532 * proceed to the next table. We use this to ensure that this
3533 * connection is still allowed by the currently defined
3534 * policy. */
3535 ds_clear(&match);
3536 ds_clear(&actions);
3537 ds_put_format(&match,
3538 "!ct.new && ct.est && !ct.rpl"
3539 " && ct_label.blocked == 0 && (%s)",
3540 acl->match);
3541
3542 build_acl_log(&actions, acl);
3543 ds_put_cstr(&actions, "next;");
3544 ovn_lflow_add_with_hint(lflows, od, stage,
3545 acl->priority + OVN_ACL_PRI_OFFSET,
3546 ds_cstr(&match), ds_cstr(&actions),
3547 stage_hint);
3548
3549 ds_destroy(&match);
3550 ds_destroy(&actions);
3551 }
3552 } else if (!strcmp(acl->action, "drop")
3553 || !strcmp(acl->action, "reject")) {
3554 struct ds match = DS_EMPTY_INITIALIZER;
3555 struct ds actions = DS_EMPTY_INITIALIZER;
3556
3557 /* The implementation of "drop" differs if stateful ACLs are in
3558 * use for this datapath. In that case, the actions differ
3559 * depending on whether the connection was previously committed
3560 * to the connection tracker with ct_commit. */
3561 if (has_stateful) {
3562 /* If the packet is not part of an established connection, then
3563 * we can simply reject/drop it. */
3564 ds_put_cstr(&match,
3565 "(!ct.est || (ct.est && ct_label.blocked == 1))");
3566 if (!strcmp(acl->action, "reject")) {
3567 build_reject_acl_rules(od, lflows, stage, acl, &match,
3568 &actions);
3569 } else {
3570 ds_put_format(&match, " && (%s)", acl->match);
3571 build_acl_log(&actions, acl);
3572 ds_put_cstr(&actions, "/* drop */");
3573 ovn_lflow_add(lflows, od, stage,
3574 acl->priority + OVN_ACL_PRI_OFFSET,
3575 ds_cstr(&match), ds_cstr(&actions));
3576 }
3577 /* For an existing connection without ct_label set, we've
3578 * encountered a policy change. ACLs previously allowed
3579 * this connection and we committed the connection tracking
3580 * entry. Current policy says that we should drop this
3581 * connection. First, we set bit 0 of ct_label to indicate
3582 * that this connection is set for deletion. By not
3583 * specifying "next;", we implicitly drop the packet after
3584 * updating conntrack state. We would normally defer
3585 * ct_commit() to the "stateful" stage, but since we're
3586 * rejecting/dropping the packet, we go ahead and do it here.
3587 */
3588 ds_clear(&match);
3589 ds_clear(&actions);
3590 ds_put_cstr(&match, "ct.est && ct_label.blocked == 0");
3591 ds_put_cstr(&actions, "ct_commit(ct_label=1/1); ");
3592 if (!strcmp(acl->action, "reject")) {
3593 build_reject_acl_rules(od, lflows, stage, acl, &match,
3594 &actions);
3595 } else {
3596 ds_put_format(&match, " && (%s)", acl->match);
3597 build_acl_log(&actions, acl);
3598 ds_put_cstr(&actions, "/* drop */");
3599 ovn_lflow_add(lflows, od, stage,
3600 acl->priority + OVN_ACL_PRI_OFFSET,
3601 ds_cstr(&match), ds_cstr(&actions));
3602 }
3603 } else {
3604 /* There are no stateful ACLs in use on this datapath,
3605 * so a "reject/drop" ACL is simply the "reject/drop"
3606 * logical flow action in all cases. */
3607 if (!strcmp(acl->action, "reject")) {
3608 build_reject_acl_rules(od, lflows, stage, acl, &match,
3609 &actions);
3610 } else {
3611 build_acl_log(&actions, acl);
3612 ds_put_cstr(&actions, "/* drop */");
3613 ovn_lflow_add(lflows, od, stage,
3614 acl->priority + OVN_ACL_PRI_OFFSET,
3615 acl->match, ds_cstr(&actions));
3616 }
3617 }
3618 ds_destroy(&match);
3619 ds_destroy(&actions);
3620 }
3621 free(stage_hint);
3622 }
3623
3624 static struct ovn_port_group *
3625 ovn_port_group_create(struct hmap *pgs,
3626 const struct nbrec_port_group *nb_pg)
3627 {
3628 struct ovn_port_group *pg = xzalloc(sizeof *pg);
3629 pg->key = nb_pg->header_.uuid;
3630 pg->nb_pg = nb_pg;
3631 hmap_init(&pg->nb_lswitches);
3632 hmap_insert(pgs, &pg->key_node, uuid_hash(&pg->key));
3633 return pg;
3634 }
3635
3636 static void
3637 ovn_port_group_destroy(struct hmap *pgs, struct ovn_port_group *pg)
3638 {
3639 if (pg) {
3640 hmap_remove(pgs, &pg->key_node);
3641 struct ovn_port_group_ls *ls;
3642 HMAP_FOR_EACH_POP (ls, key_node, &pg->nb_lswitches) {
3643 free(ls);
3644 }
3645 hmap_destroy(&pg->nb_lswitches);
3646 free(pg);
3647 }
3648 }
3649
3650 static void
3651 build_port_group_lswitches(struct northd_context *ctx, struct hmap *pgs,
3652 struct hmap *ports)
3653 {
3654 hmap_init(pgs);
3655
3656 const struct nbrec_port_group *nb_pg;
3657 NBREC_PORT_GROUP_FOR_EACH (nb_pg, ctx->ovnnb_idl) {
3658 struct ovn_port_group *pg = ovn_port_group_create(pgs, nb_pg);
3659 for (size_t i = 0; i < nb_pg->n_ports; i++) {
3660 struct ovn_port *op = ovn_port_find(ports, nb_pg->ports[i]->name);
3661 if (!op) {
3662 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
3663 VLOG_ERR_RL(&rl, "lport %s in port group %s not found.",
3664 nb_pg->ports[i]->name,
3665 nb_pg->name);
3666 continue;
3667 }
3668
3669 if (!op->od->nbs) {
3670 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
3671 VLOG_WARN_RL(&rl, "lport %s in port group %s has no lswitch.",
3672 nb_pg->ports[i]->name,
3673 nb_pg->name);
3674 continue;
3675 }
3676
3677 struct ovn_port_group_ls *pg_ls =
3678 ovn_port_group_ls_find(pg, &op->od->nbs->header_.uuid);
3679 if (!pg_ls) {
3680 ovn_port_group_ls_add(pg, op->od->nbs);
3681 ovn_ls_port_group_add(&op->od->nb_pgs, nb_pg);
3682 }
3683 }
3684 }
3685 }
3686
3687 static void
3688 build_acls(struct ovn_datapath *od, struct hmap *lflows,
3689 struct hmap *port_groups)
3690 {
3691 bool has_stateful = has_stateful_acl(od);
3692
3693 /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
3694 * default. A related rule at priority 1 is added below if there
3695 * are any stateful ACLs in this datapath. */
3696 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 0, "1", "next;");
3697 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 0, "1", "next;");
3698
3699 if (has_stateful) {
3700 /* Ingress and Egress ACL Table (Priority 1).
3701 *
3702 * By default, traffic is allowed. This is partially handled by
3703 * the Priority 0 ACL flows added earlier, but we also need to
3704 * commit IP flows. This is because, while the initiater's
3705 * direction may not have any stateful rules, the server's may
3706 * and then its return traffic would not have an associated
3707 * conntrack entry and would return "+invalid".
3708 *
3709 * We use "ct_commit" for a connection that is not already known
3710 * by the connection tracker. Once a connection is committed,
3711 * subsequent packets will hit the flow at priority 0 that just
3712 * uses "next;"
3713 *
3714 * We also check for established connections that have ct_label.blocked
3715 * set on them. That's a connection that was disallowed, but is
3716 * now allowed by policy again since it hit this default-allow flow.
3717 * We need to set ct_label.blocked=0 to let the connection continue,
3718 * which will be done by ct_commit() in the "stateful" stage.
3719 * Subsequent packets will hit the flow at priority 0 that just
3720 * uses "next;". */
3721 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 1,
3722 "ip && (!ct.est || (ct.est && ct_label.blocked == 1))",
3723 REGBIT_CONNTRACK_COMMIT" = 1; next;");
3724 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 1,
3725 "ip && (!ct.est || (ct.est && ct_label.blocked == 1))",
3726 REGBIT_CONNTRACK_COMMIT" = 1; next;");
3727
3728 /* Ingress and Egress ACL Table (Priority 65535).
3729 *
3730 * Always drop traffic that's in an invalid state. Also drop
3731 * reply direction packets for connections that have been marked
3732 * for deletion (bit 0 of ct_label is set).
3733 *
3734 * This is enforced at a higher priority than ACLs can be defined. */
3735 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
3736 "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
3737 "drop;");
3738 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
3739 "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
3740 "drop;");
3741
3742 /* Ingress and Egress ACL Table (Priority 65535).
3743 *
3744 * Allow reply traffic that is part of an established
3745 * conntrack entry that has not been marked for deletion
3746 * (bit 0 of ct_label). We only match traffic in the
3747 * reply direction because we want traffic in the request
3748 * direction to hit the currently defined policy from ACLs.
3749 *
3750 * This is enforced at a higher priority than ACLs can be defined. */
3751 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
3752 "ct.est && !ct.rel && !ct.new && !ct.inv "
3753 "&& ct.rpl && ct_label.blocked == 0",
3754 "next;");
3755 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
3756 "ct.est && !ct.rel && !ct.new && !ct.inv "
3757 "&& ct.rpl && ct_label.blocked == 0",
3758 "next;");
3759
3760 /* Ingress and Egress ACL Table (Priority 65535).
3761 *
3762 * Allow traffic that is related to an existing conntrack entry that
3763 * has not been marked for deletion (bit 0 of ct_label).
3764 *
3765 * This is enforced at a higher priority than ACLs can be defined.
3766 *
3767 * NOTE: This does not support related data sessions (eg,
3768 * a dynamically negotiated FTP data channel), but will allow
3769 * related traffic such as an ICMP Port Unreachable through
3770 * that's generated from a non-listening UDP port. */
3771 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
3772 "!ct.est && ct.rel && !ct.new && !ct.inv "
3773 "&& ct_label.blocked == 0",
3774 "next;");
3775 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
3776 "!ct.est && ct.rel && !ct.new && !ct.inv "
3777 "&& ct_label.blocked == 0",
3778 "next;");
3779
3780 /* Ingress and Egress ACL Table (Priority 65535).
3781 *
3782 * Not to do conntrack on ND packets. */
3783 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, "nd", "next;");
3784 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, "nd", "next;");
3785 }
3786
3787 /* Ingress or Egress ACL Table (Various priorities). */
3788 for (size_t i = 0; i < od->nbs->n_acls; i++) {
3789 struct nbrec_acl *acl = od->nbs->acls[i];
3790 consider_acl(lflows, od, acl, has_stateful);
3791 }
3792 struct ovn_port_group *pg;
3793 HMAP_FOR_EACH (pg, key_node, port_groups) {
3794 if (ovn_port_group_ls_find(pg, &od->nbs->header_.uuid)) {
3795 for (size_t i = 0; i < pg->nb_pg->n_acls; i++) {
3796 consider_acl(lflows, od, pg->nb_pg->acls[i], has_stateful);
3797 }
3798 }
3799 }
3800
3801 /* Add 34000 priority flow to allow DHCP reply from ovn-controller to all
3802 * logical ports of the datapath if the CMS has configured DHCPv4 options.
3803 * */
3804 for (size_t i = 0; i < od->nbs->n_ports; i++) {
3805 if (od->nbs->ports[i]->dhcpv4_options) {
3806 const char *server_id = smap_get(
3807 &od->nbs->ports[i]->dhcpv4_options->options, "server_id");
3808 const char *server_mac = smap_get(
3809 &od->nbs->ports[i]->dhcpv4_options->options, "server_mac");
3810 const char *lease_time = smap_get(
3811 &od->nbs->ports[i]->dhcpv4_options->options, "lease_time");
3812 if (server_id && server_mac && lease_time) {
3813 struct ds match = DS_EMPTY_INITIALIZER;
3814 const char *actions =
3815 has_stateful ? "ct_commit; next;" : "next;";
3816 ds_put_format(&match, "outport == \"%s\" && eth.src == %s "
3817 "&& ip4.src == %s && udp && udp.src == 67 "
3818 "&& udp.dst == 68", od->nbs->ports[i]->name,
3819 server_mac, server_id);
3820 ovn_lflow_add(
3821 lflows, od, S_SWITCH_OUT_ACL, 34000, ds_cstr(&match),
3822 actions);
3823 ds_destroy(&match);
3824 }
3825 }
3826
3827 if (od->nbs->ports[i]->dhcpv6_options) {
3828 const char *server_mac = smap_get(
3829 &od->nbs->ports[i]->dhcpv6_options->options, "server_id");
3830 struct eth_addr ea;
3831 if (server_mac && eth_addr_from_string(server_mac, &ea)) {
3832 /* Get the link local IP of the DHCPv6 server from the
3833 * server MAC. */
3834 struct in6_addr lla;
3835 in6_generate_lla(ea, &lla);
3836
3837 char server_ip[INET6_ADDRSTRLEN + 1];
3838 ipv6_string_mapped(server_ip, &lla);
3839
3840 struct ds match = DS_EMPTY_INITIALIZER;
3841 const char *actions = has_stateful ? "ct_commit; next;" :
3842 "next;";
3843 ds_put_format(&match, "outport == \"%s\" && eth.src == %s "
3844 "&& ip6.src == %s && udp && udp.src == 547 "
3845 "&& udp.dst == 546", od->nbs->ports[i]->name,
3846 server_mac, server_ip);
3847 ovn_lflow_add(
3848 lflows, od, S_SWITCH_OUT_ACL, 34000, ds_cstr(&match),
3849 actions);
3850 ds_destroy(&match);
3851 }
3852 }
3853 }
3854
3855 /* Add a 34000 priority flow to advance the DNS reply from ovn-controller,
3856 * if the CMS has configured DNS records for the datapath.
3857 */
3858 if (ls_has_dns_records(od->nbs)) {
3859 const char *actions = has_stateful ? "ct_commit; next;" : "next;";
3860 ovn_lflow_add(
3861 lflows, od, S_SWITCH_OUT_ACL, 34000, "udp.src == 53",
3862 actions);
3863 }
3864 }
3865
3866 static void
3867 build_qos(struct ovn_datapath *od, struct hmap *lflows) {
3868 ovn_lflow_add(lflows, od, S_SWITCH_IN_QOS_MARK, 0, "1", "next;");
3869 ovn_lflow_add(lflows, od, S_SWITCH_OUT_QOS_MARK, 0, "1", "next;");
3870 ovn_lflow_add(lflows, od, S_SWITCH_IN_QOS_METER, 0, "1", "next;");
3871 ovn_lflow_add(lflows, od, S_SWITCH_OUT_QOS_METER, 0, "1", "next;");
3872
3873 for (size_t i = 0; i < od->nbs->n_qos_rules; i++) {
3874 struct nbrec_qos *qos = od->nbs->qos_rules[i];
3875 bool ingress = !strcmp(qos->direction, "from-lport") ? true :false;
3876 enum ovn_stage stage = ingress ? S_SWITCH_IN_QOS_MARK : S_SWITCH_OUT_QOS_MARK;
3877 int64_t rate = 0;
3878 int64_t burst = 0;
3879
3880 for (size_t j = 0; j < qos->n_action; j++) {
3881 if (!strcmp(qos->key_action[j], "dscp")) {
3882 struct ds dscp_action = DS_EMPTY_INITIALIZER;
3883
3884 ds_put_format(&dscp_action, "ip.dscp = %"PRId64"; next;",
3885 qos->value_action[j]);
3886 ovn_lflow_add(lflows, od, stage,
3887 qos->priority,
3888 qos->match, ds_cstr(&dscp_action));
3889 ds_destroy(&dscp_action);
3890 }
3891 }
3892
3893 for (size_t n = 0; n < qos->n_bandwidth; n++) {
3894 if (!strcmp(qos->key_bandwidth[n], "rate")) {
3895 rate = qos->value_bandwidth[n];
3896 } else if (!strcmp(qos->key_bandwidth[n], "burst")) {
3897 burst = qos->value_bandwidth[n];
3898 }
3899 }
3900 if (rate) {
3901 struct ds meter_action = DS_EMPTY_INITIALIZER;
3902 stage = ingress ? S_SWITCH_IN_QOS_METER : S_SWITCH_OUT_QOS_METER;
3903 if (burst) {
3904 ds_put_format(&meter_action,
3905 "set_meter(%"PRId64", %"PRId64"); next;",
3906 rate, burst);
3907 } else {
3908 ds_put_format(&meter_action,
3909 "set_meter(%"PRId64"); next;",
3910 rate);
3911 }
3912
3913 /* Ingress and Egress QoS Meter Table.
3914 *
3915 * We limit the bandwidth of this flow by adding a meter table.
3916 */
3917 ovn_lflow_add(lflows, od, stage,
3918 qos->priority,
3919 qos->match, ds_cstr(&meter_action));
3920 ds_destroy(&meter_action);
3921 }
3922 }
3923 }
3924
3925 static void
3926 build_lb(struct ovn_datapath *od, struct hmap *lflows)
3927 {
3928 /* Ingress and Egress LB Table (Priority 0): Packets are allowed by
3929 * default. */
3930 ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, 0, "1", "next;");
3931 ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, 0, "1", "next;");
3932
3933 if (od->nbs->load_balancer) {
3934 /* Ingress and Egress LB Table (Priority 65535).
3935 *
3936 * Send established traffic through conntrack for just NAT. */
3937 ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, UINT16_MAX,
3938 "ct.est && !ct.rel && !ct.new && !ct.inv",
3939 REGBIT_CONNTRACK_NAT" = 1; next;");
3940 ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, UINT16_MAX,
3941 "ct.est && !ct.rel && !ct.new && !ct.inv",
3942 REGBIT_CONNTRACK_NAT" = 1; next;");
3943 }
3944 }
3945
3946 static void
3947 build_stateful(struct ovn_datapath *od, struct hmap *lflows)
3948 {
3949 /* Ingress and Egress stateful Table (Priority 0): Packets are
3950 * allowed by default. */
3951 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 0, "1", "next;");
3952 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 0, "1", "next;");
3953
3954 /* If REGBIT_CONNTRACK_COMMIT is set as 1, then the packets should be
3955 * committed to conntrack. We always set ct_label.blocked to 0 here as
3956 * any packet that makes it this far is part of a connection we
3957 * want to allow to continue. */
3958 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100,
3959 REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit(ct_label=0/1); next;");
3960 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100,
3961 REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit(ct_label=0/1); next;");
3962
3963 /* If REGBIT_CONNTRACK_NAT is set as 1, then packets should just be sent
3964 * through nat (without committing).
3965 *
3966 * REGBIT_CONNTRACK_COMMIT is set for new connections and
3967 * REGBIT_CONNTRACK_NAT is set for established connections. So they
3968 * don't overlap.
3969 */
3970 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100,
3971 REGBIT_CONNTRACK_NAT" == 1", "ct_lb;");
3972 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100,
3973 REGBIT_CONNTRACK_NAT" == 1", "ct_lb;");
3974
3975 /* Load balancing rules for new connections get committed to conntrack
3976 * table. So even if REGBIT_CONNTRACK_COMMIT is set in a previous table
3977 * a higher priority rule for load balancing below also commits the
3978 * connection, so it is okay if we do not hit the above match on
3979 * REGBIT_CONNTRACK_COMMIT. */
3980 for (int i = 0; i < od->nbs->n_load_balancer; i++) {
3981 struct nbrec_load_balancer *lb = od->nbs->load_balancer[i];
3982 struct smap *vips = &lb->vips;
3983 struct smap_node *node;
3984
3985 SMAP_FOR_EACH (node, vips) {
3986 uint16_t port = 0;
3987 int addr_family;
3988
3989 /* node->key contains IP:port or just IP. */
3990 char *ip_address = NULL;
3991 ip_address_and_port_from_lb_key(node->key, &ip_address, &port,
3992 &addr_family);
3993 if (!ip_address) {
3994 continue;
3995 }
3996
3997 /* New connections in Ingress table. */
3998 char *action = xasprintf("ct_lb(%s);", node->value);
3999 struct ds match = DS_EMPTY_INITIALIZER;
4000 if (addr_family == AF_INET) {
4001 ds_put_format(&match, "ct.new && ip4.dst == %s", ip_address);
4002 } else {
4003 ds_put_format(&match, "ct.new && ip6.dst == %s", ip_address);
4004 }
4005 if (port) {
4006 if (lb->protocol && !strcmp(lb->protocol, "udp")) {
4007 ds_put_format(&match, " && udp.dst == %d", port);
4008 } else {
4009 ds_put_format(&match, " && tcp.dst == %d", port);
4010 }
4011 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL,
4012 120, ds_cstr(&match), action);
4013 } else {
4014 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL,
4015 110, ds_cstr(&match), action);
4016 }
4017
4018 free(ip_address);
4019 ds_destroy(&match);
4020 free(action);
4021 }
4022 }
4023 }
4024
4025 static void
4026 build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
4027 struct hmap *port_groups, struct hmap *lflows,
4028 struct hmap *mcgroups)
4029 {
4030 /* This flow table structure is documented in ovn-northd(8), so please
4031 * update ovn-northd.8.xml if you change anything. */
4032
4033 struct ds match = DS_EMPTY_INITIALIZER;
4034 struct ds actions = DS_EMPTY_INITIALIZER;
4035
4036 /* Build pre-ACL and ACL tables for both ingress and egress.
4037 * Ingress tables 3 through 10. Egress tables 0 through 7. */
4038 struct ovn_datapath *od;
4039 HMAP_FOR_EACH (od, key_node, datapaths) {
4040 if (!od->nbs) {
4041 continue;
4042 }
4043
4044 build_pre_acls(od, lflows);
4045 build_pre_lb(od, lflows);
4046 build_pre_stateful(od, lflows);
4047 build_acls(od, lflows, port_groups);
4048 build_qos(od, lflows);
4049 build_lb(od, lflows);
4050 build_stateful(od, lflows);
4051 }
4052
4053 /* Logical switch ingress table 0: Admission control framework (priority
4054 * 100). */
4055 HMAP_FOR_EACH (od, key_node, datapaths) {
4056 if (!od->nbs) {
4057 continue;
4058 }
4059
4060 /* Logical VLANs not supported. */
4061 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "vlan.present",
4062 "drop;");
4063
4064 /* Broadcast/multicast source address is invalid. */
4065 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "eth.src[40]",
4066 "drop;");
4067
4068 /* Port security flows have priority 50 (see below) and will continue
4069 * to the next table if packet source is acceptable. */
4070 }
4071
4072 /* Logical switch ingress table 0: Ingress port security - L2
4073 * (priority 50).
4074 * Ingress table 1: Ingress port security - IP (priority 90 and 80)
4075 * Ingress table 2: Ingress port security - ND (priority 90 and 80)
4076 */
4077 struct ovn_port *op;
4078 HMAP_FOR_EACH (op, key_node, ports) {
4079 if (!op->nbsp) {
4080 continue;
4081 }
4082
4083 if (!lsp_is_enabled(op->nbsp)) {
4084 /* Drop packets from disabled logical ports (since logical flow
4085 * tables are default-drop). */
4086 continue;
4087 }
4088
4089 ds_clear(&match);
4090 ds_clear(&actions);
4091 ds_put_format(&match, "inport == %s", op->json_key);
4092 build_port_security_l2("eth.src", op->ps_addrs, op->n_ps_addrs,
4093 &match);
4094
4095 const char *queue_id = smap_get(&op->sb->options, "qdisc_queue_id");
4096 if (queue_id) {
4097 ds_put_format(&actions, "set_queue(%s); ", queue_id);
4098 }
4099 ds_put_cstr(&actions, "next;");
4100 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_L2, 50,
4101 ds_cstr(&match), ds_cstr(&actions));
4102
4103 if (op->nbsp->n_port_security) {
4104 build_port_security_ip(P_IN, op, lflows);
4105 build_port_security_nd(op, lflows);
4106 }
4107 }
4108
4109 /* Ingress table 1 and 2: Port security - IP and ND, by default goto next.
4110 * (priority 0)*/
4111 HMAP_FOR_EACH (od, key_node, datapaths) {
4112 if (!od->nbs) {
4113 continue;
4114 }
4115
4116 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_ND, 0, "1", "next;");
4117 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_IP, 0, "1", "next;");
4118 }
4119
4120 /* Ingress table 11: ARP/ND responder, skip requests coming from localnet
4121 * and vtep ports. (priority 100); see ovn-northd.8.xml for the
4122 * rationale. */
4123 HMAP_FOR_EACH (op, key_node, ports) {
4124 if (!op->nbsp) {
4125 continue;
4126 }
4127
4128 if ((!strcmp(op->nbsp->type, "localnet")) ||
4129 (!strcmp(op->nbsp->type, "vtep"))) {
4130 ds_clear(&match);
4131 ds_put_format(&match, "inport == %s", op->json_key);
4132 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100,
4133 ds_cstr(&match), "next;");
4134 }
4135 }
4136
4137 /* Ingress table 11: ARP/ND responder, reply for known IPs.
4138 * (priority 50). */
4139 HMAP_FOR_EACH (op, key_node, ports) {
4140 if (!op->nbsp) {
4141 continue;
4142 }
4143
4144 /*
4145 * Add ARP/ND reply flows if either the
4146 * - port is up or
4147 * - port type is router or
4148 * - port type is localport
4149 */
4150 if (!lsp_is_up(op->nbsp) && strcmp(op->nbsp->type, "router") &&
4151 strcmp(op->nbsp->type, "localport")) {
4152 continue;
4153 }
4154
4155 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
4156 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
4157 ds_clear(&match);
4158 ds_put_format(&match, "arp.tpa == %s && arp.op == 1",
4159 op->lsp_addrs[i].ipv4_addrs[j].addr_s);
4160 ds_clear(&actions);
4161 ds_put_format(&actions,
4162 "eth.dst = eth.src; "
4163 "eth.src = %s; "
4164 "arp.op = 2; /* ARP reply */ "
4165 "arp.tha = arp.sha; "
4166 "arp.sha = %s; "
4167 "arp.tpa = arp.spa; "
4168 "arp.spa = %s; "
4169 "outport = inport; "
4170 "flags.loopback = 1; "
4171 "output;",
4172 op->lsp_addrs[i].ea_s, op->lsp_addrs[i].ea_s,
4173 op->lsp_addrs[i].ipv4_addrs[j].addr_s);
4174 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50,
4175 ds_cstr(&match), ds_cstr(&actions));
4176
4177 /* Do not reply to an ARP request from the port that owns the
4178 * address (otherwise a DHCP client that ARPs to check for a
4179 * duplicate address will fail). Instead, forward it the usual
4180 * way.
4181 *
4182 * (Another alternative would be to simply drop the packet. If
4183 * everything is working as it is configured, then this would
4184 * produce equivalent results, since no one should reply to the
4185 * request. But ARPing for one's own IP address is intended to
4186 * detect situations where the network is not working as
4187 * configured, so dropping the request would frustrate that
4188 * intent.) */
4189 ds_put_format(&match, " && inport == %s", op->json_key);
4190 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100,
4191 ds_cstr(&match), "next;");
4192 }
4193
4194 /* For ND solicitations, we need to listen for both the
4195 * unicast IPv6 address and its all-nodes multicast address,
4196 * but always respond with the unicast IPv6 address. */
4197 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
4198 ds_clear(&match);
4199 ds_put_format(&match,
4200 "nd_ns && ip6.dst == {%s, %s} && nd.target == %s",
4201 op->lsp_addrs[i].ipv6_addrs[j].addr_s,
4202 op->lsp_addrs[i].ipv6_addrs[j].sn_addr_s,
4203 op->lsp_addrs[i].ipv6_addrs[j].addr_s);
4204
4205 ds_clear(&actions);
4206 ds_put_format(&actions,
4207 "%s { "
4208 "eth.src = %s; "
4209 "ip6.src = %s; "
4210 "nd.target = %s; "
4211 "nd.tll = %s; "
4212 "outport = inport; "
4213 "flags.loopback = 1; "
4214 "output; "
4215 "};",
4216 !strcmp(op->nbsp->type, "router") ?
4217 "nd_na_router" : "nd_na",
4218 op->lsp_addrs[i].ea_s,
4219 op->lsp_addrs[i].ipv6_addrs[j].addr_s,
4220 op->lsp_addrs[i].ipv6_addrs[j].addr_s,
4221 op->lsp_addrs[i].ea_s);
4222 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50,
4223 ds_cstr(&match), ds_cstr(&actions));
4224
4225 /* Do not reply to a solicitation from the port that owns the
4226 * address (otherwise DAD detection will fail). */
4227 ds_put_format(&match, " && inport == %s", op->json_key);
4228 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100,
4229 ds_cstr(&match), "next;");
4230 }
4231 }
4232 }
4233
4234 /* Ingress table 11: ARP/ND responder, by default goto next.
4235 * (priority 0)*/
4236 HMAP_FOR_EACH (od, key_node, datapaths) {
4237 if (!od->nbs) {
4238 continue;
4239 }
4240
4241 ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_ND_RSP, 0, "1", "next;");
4242 }
4243
4244 /* Logical switch ingress table 12 and 13: DHCP options and response
4245 * priority 100 flows. */
4246 HMAP_FOR_EACH (op, key_node, ports) {
4247 if (!op->nbsp) {
4248 continue;
4249 }
4250
4251 if (!lsp_is_enabled(op->nbsp) || !strcmp(op->nbsp->type, "router")) {
4252 /* Don't add the DHCP flows if the port is not enabled or if the
4253 * port is a router port. */
4254 continue;
4255 }
4256
4257 if (!op->nbsp->dhcpv4_options && !op->nbsp->dhcpv6_options) {
4258 /* CMS has disabled both native DHCPv4 and DHCPv6 for this lport.
4259 */
4260 continue;
4261 }
4262
4263 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
4264 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
4265 struct ds options_action = DS_EMPTY_INITIALIZER;
4266 struct ds response_action = DS_EMPTY_INITIALIZER;
4267 struct ds ipv4_addr_match = DS_EMPTY_INITIALIZER;
4268 if (build_dhcpv4_action(
4269 op, op->lsp_addrs[i].ipv4_addrs[j].addr,
4270 &options_action, &response_action, &ipv4_addr_match)) {
4271 ds_clear(&match);
4272 ds_put_format(
4273 &match, "inport == %s && eth.src == %s && "
4274 "ip4.src == 0.0.0.0 && ip4.dst == 255.255.255.255 && "
4275 "udp.src == 68 && udp.dst == 67", op->json_key,
4276 op->lsp_addrs[i].ea_s);
4277
4278 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS,
4279 100, ds_cstr(&match),
4280 ds_cstr(&options_action));
4281 ds_clear(&match);
4282 /* Allow ip4.src = OFFER_IP and
4283 * ip4.dst = {SERVER_IP, 255.255.255.255} for the below
4284 * cases
4285 * - When the client wants to renew the IP by sending
4286 * the DHCPREQUEST to the server ip.
4287 * - When the client wants to renew the IP by
4288 * broadcasting the DHCPREQUEST.
4289 */
4290 ds_put_format(
4291 &match, "inport == %s && eth.src == %s && "
4292 "%s && udp.src == 68 && udp.dst == 67", op->json_key,
4293 op->lsp_addrs[i].ea_s, ds_cstr(&ipv4_addr_match));
4294
4295 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS,
4296 100, ds_cstr(&match),
4297 ds_cstr(&options_action));
4298 ds_clear(&match);
4299
4300 /* If REGBIT_DHCP_OPTS_RESULT is set, it means the
4301 * put_dhcp_opts action is successful. */
4302 ds_put_format(
4303 &match, "inport == %s && eth.src == %s && "
4304 "ip4 && udp.src == 68 && udp.dst == 67"
4305 " && "REGBIT_DHCP_OPTS_RESULT, op->json_key,
4306 op->lsp_addrs[i].ea_s);
4307 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_RESPONSE,
4308 100, ds_cstr(&match),
4309 ds_cstr(&response_action));
4310 ds_destroy(&options_action);
4311 ds_destroy(&response_action);
4312 ds_destroy(&ipv4_addr_match);
4313 break;
4314 }
4315 }
4316
4317 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
4318 struct ds options_action = DS_EMPTY_INITIALIZER;
4319 struct ds response_action = DS_EMPTY_INITIALIZER;
4320 if (build_dhcpv6_action(
4321 op, &op->lsp_addrs[i].ipv6_addrs[j].addr,
4322 &options_action, &response_action)) {
4323 ds_clear(&match);
4324 ds_put_format(
4325 &match, "inport == %s && eth.src == %s"
4326 " && ip6.dst == ff02::1:2 && udp.src == 546 &&"
4327 " udp.dst == 547", op->json_key,
4328 op->lsp_addrs[i].ea_s);
4329
4330 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS, 100,
4331 ds_cstr(&match), ds_cstr(&options_action));
4332
4333 /* If REGBIT_DHCP_OPTS_RESULT is set to 1, it means the
4334 * put_dhcpv6_opts action is successful */
4335 ds_put_cstr(&match, " && "REGBIT_DHCP_OPTS_RESULT);
4336 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_RESPONSE, 100,
4337 ds_cstr(&match), ds_cstr(&response_action));
4338 ds_destroy(&options_action);
4339 ds_destroy(&response_action);
4340 break;
4341 }
4342 }
4343 }
4344 }
4345
4346 /* Logical switch ingress table 14 and 15: DNS lookup and response
4347 * priority 100 flows.
4348 */
4349 HMAP_FOR_EACH (od, key_node, datapaths) {
4350 if (!od->nbs || !ls_has_dns_records(od->nbs)) {
4351 continue;
4352 }
4353
4354 struct ds action = DS_EMPTY_INITIALIZER;
4355
4356 ds_clear(&match);
4357 ds_put_cstr(&match, "udp.dst == 53");
4358 ds_put_format(&action,
4359 REGBIT_DNS_LOOKUP_RESULT" = dns_lookup(); next;");
4360 ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_LOOKUP, 100,
4361 ds_cstr(&match), ds_cstr(&action));
4362 ds_clear(&action);
4363 ds_put_cstr(&match, " && "REGBIT_DNS_LOOKUP_RESULT);
4364 ds_put_format(&action, "eth.dst <-> eth.src; ip4.src <-> ip4.dst; "
4365 "udp.dst = udp.src; udp.src = 53; outport = inport; "
4366 "flags.loopback = 1; output;");
4367 ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_RESPONSE, 100,
4368 ds_cstr(&match), ds_cstr(&action));
4369 ds_clear(&action);
4370 ds_put_format(&action, "eth.dst <-> eth.src; ip6.src <-> ip6.dst; "
4371 "udp.dst = udp.src; udp.src = 53; outport = inport; "
4372 "flags.loopback = 1; output;");
4373 ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_RESPONSE, 100,
4374 ds_cstr(&match), ds_cstr(&action));
4375 ds_destroy(&action);
4376 }
4377
4378 /* Ingress table 12 and 13: DHCP options and response, by default goto
4379 * next. (priority 0).
4380 * Ingress table 14 and 15: DNS lookup and response, by default goto next.
4381 * (priority 0).*/
4382
4383 HMAP_FOR_EACH (od, key_node, datapaths) {
4384 if (!od->nbs) {
4385 continue;
4386 }
4387
4388 ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_OPTIONS, 0, "1", "next;");
4389 ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_RESPONSE, 0, "1", "next;");
4390 ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_LOOKUP, 0, "1", "next;");
4391 ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_RESPONSE, 0, "1", "next;");
4392 }
4393
4394 /* Ingress table 16: Destination lookup, broadcast and multicast handling
4395 * (priority 100). */
4396 HMAP_FOR_EACH (op, key_node, ports) {
4397 if (!op->nbsp) {
4398 continue;
4399 }
4400
4401 if (lsp_is_enabled(op->nbsp)) {
4402 ovn_multicast_add(mcgroups, &mc_flood, op);
4403 }
4404 }
4405 HMAP_FOR_EACH (od, key_node, datapaths) {
4406 if (!od->nbs) {
4407 continue;
4408 }
4409
4410 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 100, "eth.mcast",
4411 "outport = \""MC_FLOOD"\"; output;");
4412 }
4413
4414 /* Ingress table 16: Destination lookup, unicast handling (priority 50), */
4415 HMAP_FOR_EACH (op, key_node, ports) {
4416 if (!op->nbsp) {
4417 continue;
4418 }
4419
4420 for (size_t i = 0; i < op->nbsp->n_addresses; i++) {
4421 /* Addresses are owned by the logical port.
4422 * Ethernet address followed by zero or more IPv4
4423 * or IPv6 addresses (or both). */
4424 struct eth_addr mac;
4425 if (ovs_scan(op->nbsp->addresses[i],
4426 ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) {
4427 ds_clear(&match);
4428 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
4429 ETH_ADDR_ARGS(mac));
4430
4431 ds_clear(&actions);
4432 ds_put_format(&actions, "outport = %s; output;", op->json_key);
4433 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
4434 ds_cstr(&match), ds_cstr(&actions));
4435 } else if (!strcmp(op->nbsp->addresses[i], "unknown")) {
4436 if (lsp_is_enabled(op->nbsp)) {
4437 ovn_multicast_add(mcgroups, &mc_unknown, op);
4438 op->od->has_unknown = true;
4439 }
4440 } else if (is_dynamic_lsp_address(op->nbsp->addresses[i])) {
4441 if (!op->nbsp->dynamic_addresses
4442 || !ovs_scan(op->nbsp->dynamic_addresses,
4443 ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) {
4444 continue;
4445 }
4446 ds_clear(&match);
4447 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
4448 ETH_ADDR_ARGS(mac));
4449
4450 ds_clear(&actions);
4451 ds_put_format(&actions, "outport = %s; output;", op->json_key);
4452 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
4453 ds_cstr(&match), ds_cstr(&actions));
4454 } else if (!strcmp(op->nbsp->addresses[i], "router")) {
4455 if (!op->peer || !op->peer->nbrp
4456 || !ovs_scan(op->peer->nbrp->mac,
4457 ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) {
4458 continue;
4459 }
4460 ds_clear(&match);
4461 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
4462 ETH_ADDR_ARGS(mac));
4463 if (op->peer->od->l3dgw_port
4464 && op->peer == op->peer->od->l3dgw_port
4465 && op->peer->od->l3redirect_port) {
4466 /* The destination lookup flow for the router's
4467 * distributed gateway port MAC address should only be
4468 * programmed on the "redirect-chassis". */
4469 ds_put_format(&match, " && is_chassis_resident(%s)",
4470 op->peer->od->l3redirect_port->json_key);
4471 }
4472
4473 ds_clear(&actions);
4474 ds_put_format(&actions, "outport = %s; output;", op->json_key);
4475 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
4476 ds_cstr(&match), ds_cstr(&actions));
4477
4478 /* Add ethernet addresses specified in NAT rules on
4479 * distributed logical routers. */
4480 if (op->peer->od->l3dgw_port
4481 && op->peer == op->peer->od->l3dgw_port) {
4482 for (int j = 0; j < op->peer->od->nbr->n_nat; j++) {
4483 const struct nbrec_nat *nat
4484 = op->peer->od->nbr->nat[j];
4485 if (!strcmp(nat->type, "dnat_and_snat")
4486 && nat->logical_port && nat->external_mac
4487 && eth_addr_from_string(nat->external_mac, &mac)) {
4488
4489 ds_clear(&match);
4490 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT
4491 " && is_chassis_resident(\"%s\")",
4492 ETH_ADDR_ARGS(mac),
4493 nat->logical_port);
4494
4495 ds_clear(&actions);
4496 ds_put_format(&actions, "outport = %s; output;",
4497 op->json_key);
4498 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP,
4499 50, ds_cstr(&match),
4500 ds_cstr(&actions));
4501 }
4502 }
4503 }
4504 } else {
4505 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
4506
4507 VLOG_INFO_RL(&rl,
4508 "%s: invalid syntax '%s' in addresses column",
4509 op->nbsp->name, op->nbsp->addresses[i]);
4510 }
4511 }
4512 }
4513
4514 /* Ingress table 16: Destination lookup for unknown MACs (priority 0). */
4515 HMAP_FOR_EACH (od, key_node, datapaths) {
4516 if (!od->nbs) {
4517 continue;
4518 }
4519
4520 if (od->has_unknown) {
4521 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1",
4522 "outport = \""MC_UNKNOWN"\"; output;");
4523 }
4524 }
4525
4526 /* Egress tables 8: Egress port security - IP (priority 0)
4527 * Egress table 9: Egress port security L2 - multicast/broadcast
4528 * (priority 100). */
4529 HMAP_FOR_EACH (od, key_node, datapaths) {
4530 if (!od->nbs) {
4531 continue;
4532 }
4533
4534 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_IP, 0, "1", "next;");
4535 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_L2, 100, "eth.mcast",
4536 "output;");
4537 }
4538
4539 /* Egress table 8: Egress port security - IP (priorities 90 and 80)
4540 * if port security enabled.
4541 *
4542 * Egress table 9: Egress port security - L2 (priorities 50 and 150).
4543 *
4544 * Priority 50 rules implement port security for enabled logical port.
4545 *
4546 * Priority 150 rules drop packets to disabled logical ports, so that they
4547 * don't even receive multicast or broadcast packets. */
4548 HMAP_FOR_EACH (op, key_node, ports) {
4549 if (!op->nbsp) {
4550 continue;
4551 }
4552
4553 ds_clear(&match);
4554 ds_put_format(&match, "outport == %s", op->json_key);
4555 if (lsp_is_enabled(op->nbsp)) {
4556 build_port_security_l2("eth.dst", op->ps_addrs, op->n_ps_addrs,
4557 &match);
4558 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 50,
4559 ds_cstr(&match), "output;");
4560 } else {
4561 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 150,
4562 ds_cstr(&match), "drop;");
4563 }
4564
4565 if (op->nbsp->n_port_security) {
4566 build_port_security_ip(P_OUT, op, lflows);
4567 }
4568 }
4569
4570 ds_destroy(&match);
4571 ds_destroy(&actions);
4572 }
4573
4574 static bool
4575 lrport_is_enabled(const struct nbrec_logical_router_port *lrport)
4576 {
4577 return !lrport->enabled || *lrport->enabled;
4578 }
4579
4580 /* Returns a string of the IP address of the router port 'op' that
4581 * overlaps with 'ip_s". If one is not found, returns NULL.
4582 *
4583 * The caller must not free the returned string. */
4584 static const char *
4585 find_lrp_member_ip(const struct ovn_port *op, const char *ip_s)
4586 {
4587 bool is_ipv4 = strchr(ip_s, '.') ? true : false;
4588
4589 if (is_ipv4) {
4590 ovs_be32 ip;
4591
4592 if (!ip_parse(ip_s, &ip)) {
4593 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4594 VLOG_WARN_RL(&rl, "bad ip address %s", ip_s);
4595 return NULL;
4596 }
4597
4598 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
4599 const struct ipv4_netaddr *na = &op->lrp_networks.ipv4_addrs[i];
4600
4601 if (!((na->network ^ ip) & na->mask)) {
4602 /* There should be only 1 interface that matches the
4603 * supplied IP. Otherwise, it's a configuration error,
4604 * because subnets of a router's interfaces should NOT
4605 * overlap. */
4606 return na->addr_s;
4607 }
4608 }
4609 } else {
4610 struct in6_addr ip6;
4611
4612 if (!ipv6_parse(ip_s, &ip6)) {
4613 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4614 VLOG_WARN_RL(&rl, "bad ipv6 address %s", ip_s);
4615 return NULL;
4616 }
4617
4618 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
4619 const struct ipv6_netaddr *na = &op->lrp_networks.ipv6_addrs[i];
4620 struct in6_addr xor_addr = ipv6_addr_bitxor(&na->network, &ip6);
4621 struct in6_addr and_addr = ipv6_addr_bitand(&xor_addr, &na->mask);
4622
4623 if (ipv6_is_zero(&and_addr)) {
4624 /* There should be only 1 interface that matches the
4625 * supplied IP. Otherwise, it's a configuration error,
4626 * because subnets of a router's interfaces should NOT
4627 * overlap. */
4628 return na->addr_s;
4629 }
4630 }
4631 }
4632
4633 return NULL;
4634 }
4635
4636 static void
4637 add_route(struct hmap *lflows, const struct ovn_port *op,
4638 const char *lrp_addr_s, const char *network_s, int plen,
4639 const char *gateway, const char *policy)
4640 {
4641 bool is_ipv4 = strchr(network_s, '.') ? true : false;
4642 struct ds match = DS_EMPTY_INITIALIZER;
4643 const char *dir;
4644 uint16_t priority;
4645
4646 if (policy && !strcmp(policy, "src-ip")) {
4647 dir = "src";
4648 priority = plen * 2;
4649 } else {
4650 dir = "dst";
4651 priority = (plen * 2) + 1;
4652 }
4653
4654 /* IPv6 link-local addresses must be scoped to the local router port. */
4655 if (!is_ipv4) {
4656 struct in6_addr network;
4657 ovs_assert(ipv6_parse(network_s, &network));
4658 if (in6_is_lla(&network)) {
4659 ds_put_format(&match, "inport == %s && ", op->json_key);
4660 }
4661 }
4662 ds_put_format(&match, "ip%s.%s == %s/%d", is_ipv4 ? "4" : "6", dir,
4663 network_s, plen);
4664
4665 struct ds actions = DS_EMPTY_INITIALIZER;
4666 ds_put_format(&actions, "ip.ttl--; %sreg0 = ", is_ipv4 ? "" : "xx");
4667
4668 if (gateway) {
4669 ds_put_cstr(&actions, gateway);
4670 } else {
4671 ds_put_format(&actions, "ip%s.dst", is_ipv4 ? "4" : "6");
4672 }
4673 ds_put_format(&actions, "; "
4674 "%sreg1 = %s; "
4675 "eth.src = %s; "
4676 "outport = %s; "
4677 "flags.loopback = 1; "
4678 "next;",
4679 is_ipv4 ? "" : "xx",
4680 lrp_addr_s,
4681 op->lrp_networks.ea_s,
4682 op->json_key);
4683
4684 /* The priority here is calculated to implement longest-prefix-match
4685 * routing. */
4686 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_ROUTING, priority,
4687 ds_cstr(&match), ds_cstr(&actions));
4688 ds_destroy(&match);
4689 ds_destroy(&actions);
4690 }
4691
4692 static void
4693 build_static_route_flow(struct hmap *lflows, struct ovn_datapath *od,
4694 struct hmap *ports,
4695 const struct nbrec_logical_router_static_route *route)
4696 {
4697 ovs_be32 nexthop;
4698 const char *lrp_addr_s = NULL;
4699 unsigned int plen;
4700 bool is_ipv4;
4701
4702 /* Verify that the next hop is an IP address with an all-ones mask. */
4703 char *error = ip_parse_cidr(route->nexthop, &nexthop, &plen);
4704 if (!error) {
4705 if (plen != 32) {
4706 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4707 VLOG_WARN_RL(&rl, "bad next hop mask %s", route->nexthop);
4708 return;
4709 }
4710 is_ipv4 = true;
4711 } else {
4712 free(error);
4713
4714 struct in6_addr ip6;
4715 error = ipv6_parse_cidr(route->nexthop, &ip6, &plen);
4716 if (!error) {
4717 if (plen != 128) {
4718 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4719 VLOG_WARN_RL(&rl, "bad next hop mask %s", route->nexthop);
4720 return;
4721 }
4722 is_ipv4 = false;
4723 } else {
4724 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4725 VLOG_WARN_RL(&rl, "bad next hop ip address %s", route->nexthop);
4726 free(error);
4727 return;
4728 }
4729 }
4730
4731 char *prefix_s;
4732 if (is_ipv4) {
4733 ovs_be32 prefix;
4734 /* Verify that ip prefix is a valid IPv4 address. */
4735 error = ip_parse_cidr(route->ip_prefix, &prefix, &plen);
4736 if (error) {
4737 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4738 VLOG_WARN_RL(&rl, "bad 'ip_prefix' in static routes %s",
4739 route->ip_prefix);
4740 free(error);
4741 return;
4742 }
4743 prefix_s = xasprintf(IP_FMT, IP_ARGS(prefix & be32_prefix_mask(plen)));
4744 } else {
4745 /* Verify that ip prefix is a valid IPv6 address. */
4746 struct in6_addr prefix;
4747 error = ipv6_parse_cidr(route->ip_prefix, &prefix, &plen);
4748 if (error) {
4749 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4750 VLOG_WARN_RL(&rl, "bad 'ip_prefix' in static routes %s",
4751 route->ip_prefix);
4752 free(error);
4753 return;
4754 }
4755 struct in6_addr mask = ipv6_create_mask(plen);
4756 struct in6_addr network = ipv6_addr_bitand(&prefix, &mask);
4757 prefix_s = xmalloc(INET6_ADDRSTRLEN);
4758 inet_ntop(AF_INET6, &network, prefix_s, INET6_ADDRSTRLEN);
4759 }
4760
4761 /* Find the outgoing port. */
4762 struct ovn_port *out_port = NULL;
4763 if (route->output_port) {
4764 out_port = ovn_port_find(ports, route->output_port);
4765 if (!out_port) {
4766 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4767 VLOG_WARN_RL(&rl, "Bad out port %s for static route %s",
4768 route->output_port, route->ip_prefix);
4769 goto free_prefix_s;
4770 }
4771 lrp_addr_s = find_lrp_member_ip(out_port, route->nexthop);
4772 if (!lrp_addr_s) {
4773 /* There are no IP networks configured on the router's port via
4774 * which 'route->nexthop' is theoretically reachable. But since
4775 * 'out_port' has been specified, we honor it by trying to reach
4776 * 'route->nexthop' via the first IP address of 'out_port'.
4777 * (There are cases, e.g in GCE, where each VM gets a /32 IP
4778 * address and the default gateway is still reachable from it.) */
4779 if (is_ipv4) {
4780 if (out_port->lrp_networks.n_ipv4_addrs) {
4781 lrp_addr_s = out_port->lrp_networks.ipv4_addrs[0].addr_s;
4782 }
4783 } else {
4784 if (out_port->lrp_networks.n_ipv6_addrs) {
4785 lrp_addr_s = out_port->lrp_networks.ipv6_addrs[0].addr_s;
4786 }
4787 }
4788 }
4789 } else {
4790 /* output_port is not specified, find the
4791 * router port matching the next hop. */
4792 int i;
4793 for (i = 0; i < od->nbr->n_ports; i++) {
4794 struct nbrec_logical_router_port *lrp = od->nbr->ports[i];
4795 out_port = ovn_port_find(ports, lrp->name);
4796 if (!out_port) {
4797 /* This should not happen. */
4798 continue;
4799 }
4800
4801 lrp_addr_s = find_lrp_member_ip(out_port, route->nexthop);
4802 if (lrp_addr_s) {
4803 break;
4804 }
4805 }
4806 }
4807
4808 if (!out_port || !lrp_addr_s) {
4809 /* There is no matched out port. */
4810 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4811 VLOG_WARN_RL(&rl, "No path for static route %s; next hop %s",
4812 route->ip_prefix, route->nexthop);
4813 goto free_prefix_s;
4814 }
4815
4816 char *policy = route->policy ? route->policy : "dst-ip";
4817 add_route(lflows, out_port, lrp_addr_s, prefix_s, plen, route->nexthop,
4818 policy);
4819
4820 free_prefix_s:
4821 free(prefix_s);
4822 }
4823
4824 static void
4825 op_put_v4_networks(struct ds *ds, const struct ovn_port *op, bool add_bcast)
4826 {
4827 if (!add_bcast && op->lrp_networks.n_ipv4_addrs == 1) {
4828 ds_put_format(ds, "%s", op->lrp_networks.ipv4_addrs[0].addr_s);
4829 return;
4830 }
4831
4832 ds_put_cstr(ds, "{");
4833 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
4834 ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].addr_s);
4835 if (add_bcast) {
4836 ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].bcast_s);
4837 }
4838 }
4839 ds_chomp(ds, ' ');
4840 ds_chomp(ds, ',');
4841 ds_put_cstr(ds, "}");
4842 }
4843
4844 static void
4845 op_put_v6_networks(struct ds *ds, const struct ovn_port *op)
4846 {
4847 if (op->lrp_networks.n_ipv6_addrs == 1) {
4848 ds_put_format(ds, "%s", op->lrp_networks.ipv6_addrs[0].addr_s);
4849 return;
4850 }
4851
4852 ds_put_cstr(ds, "{");
4853 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
4854 ds_put_format(ds, "%s, ", op->lrp_networks.ipv6_addrs[i].addr_s);
4855 }
4856 ds_chomp(ds, ' ');
4857 ds_chomp(ds, ',');
4858 ds_put_cstr(ds, "}");
4859 }
4860
4861 static const char *
4862 get_force_snat_ip(struct ovn_datapath *od, const char *key_type, ovs_be32 *ip)
4863 {
4864 char *key = xasprintf("%s_force_snat_ip", key_type);
4865 const char *ip_address = smap_get(&od->nbr->options, key);
4866 free(key);
4867
4868 if (ip_address) {
4869 ovs_be32 mask;
4870 char *error = ip_parse_masked(ip_address, ip, &mask);
4871 if (error || mask != OVS_BE32_MAX) {
4872 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4873 VLOG_WARN_RL(&rl, "bad ip %s in options of router "UUID_FMT"",
4874 ip_address, UUID_ARGS(&od->key));
4875 free(error);
4876 *ip = 0;
4877 return NULL;
4878 }
4879 return ip_address;
4880 }
4881
4882 *ip = 0;
4883 return NULL;
4884 }
4885
4886 static void
4887 add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od,
4888 struct ds *match, struct ds *actions, int priority,
4889 const char *lb_force_snat_ip, char *backend_ips,
4890 bool is_udp, int addr_family)
4891 {
4892 /* A match and actions for new connections. */
4893 char *new_match = xasprintf("ct.new && %s", ds_cstr(match));
4894 if (lb_force_snat_ip) {
4895 char *new_actions = xasprintf("flags.force_snat_for_lb = 1; %s",
4896 ds_cstr(actions));
4897 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, new_match,
4898 new_actions);
4899 free(new_actions);
4900 } else {
4901 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, new_match,
4902 ds_cstr(actions));
4903 }
4904
4905 /* A match and actions for established connections. */
4906 char *est_match = xasprintf("ct.est && %s", ds_cstr(match));
4907 if (lb_force_snat_ip) {
4908 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, est_match,
4909 "flags.force_snat_for_lb = 1; ct_dnat;");
4910 } else {
4911 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, est_match,
4912 "ct_dnat;");
4913 }
4914
4915 free(new_match);
4916 free(est_match);
4917
4918 if (!od->l3dgw_port || !od->l3redirect_port || !backend_ips) {
4919 return;
4920 }
4921
4922 /* Add logical flows to UNDNAT the load balanced reverse traffic in
4923 * the router egress pipleine stage - S_ROUTER_OUT_UNDNAT if the logical
4924 * router has a gateway router port associated.
4925 */
4926 struct ds undnat_match = DS_EMPTY_INITIALIZER;
4927 if (addr_family == AF_INET) {
4928 ds_put_cstr(&undnat_match, "ip4 && (");
4929 } else {
4930 ds_put_cstr(&undnat_match, "ip6 && (");
4931 }
4932 char *start, *next, *ip_str;
4933 start = next = xstrdup(backend_ips);
4934 ip_str = strsep(&next, ",");
4935 bool backend_ips_found = false;
4936 while (ip_str && ip_str[0]) {
4937 char *ip_address = NULL;
4938 uint16_t port = 0;
4939 int addr_family_;
4940 ip_address_and_port_from_lb_key(ip_str, &ip_address, &port,
4941 &addr_family_);
4942 if (!ip_address) {
4943 break;
4944 }
4945
4946 if (addr_family_ == AF_INET) {
4947 ds_put_format(&undnat_match, "(ip4.src == %s", ip_address);
4948 } else {
4949 ds_put_format(&undnat_match, "(ip6.src == %s", ip_address);
4950 }
4951 free(ip_address);
4952 if (port) {
4953 ds_put_format(&undnat_match, " && %s.src == %d) || ",
4954 is_udp ? "udp" : "tcp", port);
4955 } else {
4956 ds_put_cstr(&undnat_match, ") || ");
4957 }
4958 ip_str = strsep(&next, ",");
4959 backend_ips_found = true;
4960 }
4961
4962 free(start);
4963 if (!backend_ips_found) {
4964 ds_destroy(&undnat_match);
4965 return;
4966 }
4967 ds_chomp(&undnat_match, ' ');
4968 ds_chomp(&undnat_match, '|');
4969 ds_chomp(&undnat_match, '|');
4970 ds_chomp(&undnat_match, ' ');
4971 ds_put_format(&undnat_match, ") && outport == %s && "
4972 "is_chassis_resident(%s)", od->l3dgw_port->json_key,
4973 od->l3redirect_port->json_key);
4974 if (lb_force_snat_ip) {
4975 ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 120,
4976 ds_cstr(&undnat_match),
4977 "flags.force_snat_for_lb = 1; ct_dnat;");
4978 } else {
4979 ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 120,
4980 ds_cstr(&undnat_match), "ct_dnat;");
4981 }
4982
4983 ds_destroy(&undnat_match);
4984 }
4985
4986 #define ND_RA_MAX_INTERVAL_MAX 1800
4987 #define ND_RA_MAX_INTERVAL_MIN 4
4988
4989 #define ND_RA_MIN_INTERVAL_MAX(max) ((max) * 3 / 4)
4990 #define ND_RA_MIN_INTERVAL_MIN 3
4991
4992 static void
4993 copy_ra_to_sb(struct ovn_port *op, const char *address_mode)
4994 {
4995 struct smap options;
4996 smap_clone(&options, &op->sb->options);
4997
4998 smap_add(&options, "ipv6_ra_send_periodic", "true");
4999 smap_add(&options, "ipv6_ra_address_mode", address_mode);
5000
5001 int max_interval = smap_get_int(&op->nbrp->ipv6_ra_configs,
5002 "max_interval", ND_RA_MAX_INTERVAL_DEFAULT);
5003 if (max_interval > ND_RA_MAX_INTERVAL_MAX) {
5004 max_interval = ND_RA_MAX_INTERVAL_MAX;
5005 }
5006 if (max_interval < ND_RA_MAX_INTERVAL_MIN) {
5007 max_interval = ND_RA_MAX_INTERVAL_MIN;
5008 }
5009 smap_add_format(&options, "ipv6_ra_max_interval", "%d", max_interval);
5010
5011 int min_interval = smap_get_int(&op->nbrp->ipv6_ra_configs,
5012 "min_interval", nd_ra_min_interval_default(max_interval));
5013 if (min_interval > ND_RA_MIN_INTERVAL_MAX(max_interval)) {
5014 min_interval = ND_RA_MIN_INTERVAL_MAX(max_interval);
5015 }
5016 if (min_interval < ND_RA_MIN_INTERVAL_MIN) {
5017 min_interval = ND_RA_MIN_INTERVAL_MIN;
5018 }
5019 smap_add_format(&options, "ipv6_ra_min_interval", "%d", min_interval);
5020
5021 int mtu = smap_get_int(&op->nbrp->ipv6_ra_configs, "mtu", ND_MTU_DEFAULT);
5022 /* RFC 2460 requires the MTU for IPv6 to be at least 1280 */
5023 if (mtu && mtu >= 1280) {
5024 smap_add_format(&options, "ipv6_ra_mtu", "%d", mtu);
5025 }
5026
5027 struct ds s = DS_EMPTY_INITIALIZER;
5028 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; ++i) {
5029 struct ipv6_netaddr *addrs = &op->lrp_networks.ipv6_addrs[i];
5030 if (in6_is_lla(&addrs->network)) {
5031 smap_add(&options, "ipv6_ra_src_addr", addrs->addr_s);
5032 continue;
5033 }
5034 ds_put_format(&s, "%s/%u ", addrs->network_s, addrs->plen);
5035 }
5036 /* Remove trailing space */
5037 ds_chomp(&s, ' ');
5038 smap_add(&options, "ipv6_ra_prefixes", ds_cstr(&s));
5039 ds_destroy(&s);
5040
5041 smap_add(&options, "ipv6_ra_src_eth", op->lrp_networks.ea_s);
5042
5043 sbrec_port_binding_set_options(op->sb, &options);
5044 smap_destroy(&options);
5045 }
5046
5047 static void
5048 build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
5049 struct hmap *lflows)
5050 {
5051 /* This flow table structure is documented in ovn-northd(8), so please
5052 * update ovn-northd.8.xml if you change anything. */
5053
5054 struct ds match = DS_EMPTY_INITIALIZER;
5055 struct ds actions = DS_EMPTY_INITIALIZER;
5056
5057 /* Logical router ingress table 0: Admission control framework. */
5058 struct ovn_datapath *od;
5059 HMAP_FOR_EACH (od, key_node, datapaths) {
5060 if (!od->nbr) {
5061 continue;
5062 }
5063
5064 /* Logical VLANs not supported.
5065 * Broadcast/multicast source address is invalid. */
5066 ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100,
5067 "vlan.present || eth.src[40]", "drop;");
5068 }
5069
5070 /* Logical router ingress table 0: match (priority 50). */
5071 struct ovn_port *op;
5072 HMAP_FOR_EACH (op, key_node, ports) {
5073 if (!op->nbrp) {
5074 continue;
5075 }
5076
5077 if (!lrport_is_enabled(op->nbrp)) {
5078 /* Drop packets from disabled logical ports (since logical flow
5079 * tables are default-drop). */
5080 continue;
5081 }
5082
5083 if (op->derived) {
5084 /* No ingress packets should be received on a chassisredirect
5085 * port. */
5086 continue;
5087 }
5088
5089 ds_clear(&match);
5090 ds_put_format(&match, "eth.mcast && inport == %s", op->json_key);
5091 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
5092 ds_cstr(&match), "next;");
5093
5094 ds_clear(&match);
5095 ds_put_format(&match, "eth.dst == %s && inport == %s",
5096 op->lrp_networks.ea_s, op->json_key);
5097 if (op->od->l3dgw_port && op == op->od->l3dgw_port
5098 && op->od->l3redirect_port) {
5099 /* Traffic with eth.dst = l3dgw_port->lrp_networks.ea_s
5100 * should only be received on the "redirect-chassis". */
5101 ds_put_format(&match, " && is_chassis_resident(%s)",
5102 op->od->l3redirect_port->json_key);
5103 }
5104 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
5105 ds_cstr(&match), "next;");
5106 }
5107
5108 /* Logical router ingress table 1: IP Input. */
5109 HMAP_FOR_EACH (od, key_node, datapaths) {
5110 if (!od->nbr) {
5111 continue;
5112 }
5113
5114 /* L3 admission control: drop multicast and broadcast source, localhost
5115 * source or destination, and zero network source or destination
5116 * (priority 100). */
5117 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100,
5118 "ip4.mcast || "
5119 "ip4.src == 255.255.255.255 || "
5120 "ip4.src == 127.0.0.0/8 || "
5121 "ip4.dst == 127.0.0.0/8 || "
5122 "ip4.src == 0.0.0.0/8 || "
5123 "ip4.dst == 0.0.0.0/8",
5124 "drop;");
5125
5126 /* ARP reply handling. Use ARP replies to populate the logical
5127 * router's ARP table. */
5128 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "arp.op == 2",
5129 "put_arp(inport, arp.spa, arp.sha);");
5130
5131 /* Drop Ethernet local broadcast. By definition this traffic should
5132 * not be forwarded.*/
5133 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
5134 "eth.bcast", "drop;");
5135
5136 /* TTL discard */
5137 ds_clear(&match);
5138 ds_put_cstr(&match, "ip4 && ip.ttl == {0, 1}");
5139 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30,
5140 ds_cstr(&match), "drop;");
5141
5142 /* ND advertisement handling. Use advertisements to populate
5143 * the logical router's ARP/ND table. */
5144 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "nd_na",
5145 "put_nd(inport, nd.target, nd.tll);");
5146
5147 /* Lean from neighbor solicitations that were not directed at
5148 * us. (A priority-90 flow will respond to requests to us and
5149 * learn the sender's mac address. */
5150 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 80, "nd_ns",
5151 "put_nd(inport, ip6.src, nd.sll);");
5152
5153 /* Pass other traffic not already handled to the next table for
5154 * routing. */
5155 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;");
5156 }
5157
5158 /* Logical router ingress table 1: IP Input for IPv4. */
5159 HMAP_FOR_EACH (op, key_node, ports) {
5160 if (!op->nbrp) {
5161 continue;
5162 }
5163
5164 if (op->derived) {
5165 /* No ingress packets are accepted on a chassisredirect
5166 * port, so no need to program flows for that port. */
5167 continue;
5168 }
5169
5170 if (op->lrp_networks.n_ipv4_addrs) {
5171 /* L3 admission control: drop packets that originate from an
5172 * IPv4 address owned by the router or a broadcast address
5173 * known to the router (priority 100). */
5174 ds_clear(&match);
5175 ds_put_cstr(&match, "ip4.src == ");
5176 op_put_v4_networks(&match, op, true);
5177 ds_put_cstr(&match, " && "REGBIT_EGRESS_LOOPBACK" == 0");
5178 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
5179 ds_cstr(&match), "drop;");
5180
5181 /* ICMP echo reply. These flows reply to ICMP echo requests
5182 * received for the router's IP address. Since packets only
5183 * get here as part of the logical router datapath, the inport
5184 * (i.e. the incoming locally attached net) does not matter.
5185 * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */
5186 ds_clear(&match);
5187 ds_put_cstr(&match, "ip4.dst == ");
5188 op_put_v4_networks(&match, op, false);
5189 ds_put_cstr(&match, " && icmp4.type == 8 && icmp4.code == 0");
5190
5191 ds_clear(&actions);
5192 ds_put_format(&actions,
5193 "ip4.dst <-> ip4.src; "
5194 "ip.ttl = 255; "
5195 "icmp4.type = 0; "
5196 "flags.loopback = 1; "
5197 "next; ");
5198 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
5199 ds_cstr(&match), ds_cstr(&actions));
5200 }
5201
5202 /* ICMP time exceeded */
5203 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
5204 ds_clear(&match);
5205 ds_clear(&actions);
5206
5207 ds_put_format(&match,
5208 "inport == %s && ip4 && "
5209 "ip.ttl == {0, 1} && !ip.later_frag", op->json_key);
5210 ds_put_format(&actions,
5211 "icmp4 {"
5212 "eth.dst <-> eth.src; "
5213 "icmp4.type = 11; /* Time exceeded */ "
5214 "icmp4.code = 0; /* TTL exceeded in transit */ "
5215 "ip4.dst = ip4.src; "
5216 "ip4.src = %s; "
5217 "ip.ttl = 255; "
5218 "next; };",
5219 op->lrp_networks.ipv4_addrs[i].addr_s);
5220 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 40,
5221 ds_cstr(&match), ds_cstr(&actions));
5222 }
5223
5224 /* ARP reply. These flows reply to ARP requests for the router's own
5225 * IP address. */
5226 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
5227 ds_clear(&match);
5228 ds_put_format(&match,
5229 "inport == %s && arp.spa == %s/%u && arp.tpa == %s"
5230 " && arp.op == 1",
5231 op->json_key,
5232 op->lrp_networks.ipv4_addrs[i].network_s,
5233 op->lrp_networks.ipv4_addrs[i].plen,
5234 op->lrp_networks.ipv4_addrs[i].addr_s);
5235 if (op->od->l3dgw_port && op == op->od->l3dgw_port
5236 && op->od->l3redirect_port) {
5237 /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
5238 * should only be sent from the "redirect-chassis", so that
5239 * upstream MAC learning points to the "redirect-chassis".
5240 * Also need to avoid generation of multiple ARP responses
5241 * from different chassis. */
5242 ds_put_format(&match, " && is_chassis_resident(%s)",
5243 op->od->l3redirect_port->json_key);
5244 }
5245
5246 ds_clear(&actions);
5247 ds_put_format(&actions,
5248 "put_arp(inport, arp.spa, arp.sha); "
5249 "eth.dst = eth.src; "
5250 "eth.src = %s; "
5251 "arp.op = 2; /* ARP reply */ "
5252 "arp.tha = arp.sha; "
5253 "arp.sha = %s; "
5254 "arp.tpa = arp.spa; "
5255 "arp.spa = %s; "
5256 "outport = %s; "
5257 "flags.loopback = 1; "
5258 "output;",
5259 op->lrp_networks.ea_s,
5260 op->lrp_networks.ea_s,
5261 op->lrp_networks.ipv4_addrs[i].addr_s,
5262 op->json_key);
5263 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
5264 ds_cstr(&match), ds_cstr(&actions));
5265 }
5266
5267 /* Learn from ARP requests that were not directed at us. A typical
5268 * use case is GARP request handling. (A priority-90 flow will
5269 * respond to request to us and learn the sender's mac address.) */
5270 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
5271 ds_clear(&match);
5272 ds_put_format(&match,
5273 "inport == %s && arp.spa == %s/%u && arp.op == 1",
5274 op->json_key,
5275 op->lrp_networks.ipv4_addrs[i].network_s,
5276 op->lrp_networks.ipv4_addrs[i].plen);
5277 if (op->od->l3dgw_port && op == op->od->l3dgw_port
5278 && op->od->l3redirect_port) {
5279 ds_put_format(&match, " && is_chassis_resident(%s)",
5280 op->od->l3redirect_port->json_key);
5281 }
5282 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 80,
5283 ds_cstr(&match),
5284 "put_arp(inport, arp.spa, arp.sha);");
5285
5286 }
5287
5288 /* A set to hold all load-balancer vips that need ARP responses. */
5289 struct sset all_ips = SSET_INITIALIZER(&all_ips);
5290 int addr_family;
5291 get_router_load_balancer_ips(op->od, &all_ips, &addr_family);
5292
5293 const char *ip_address;
5294 SSET_FOR_EACH(ip_address, &all_ips) {
5295 ds_clear(&match);
5296 if (addr_family == AF_INET) {
5297 ds_put_format(&match,
5298 "inport == %s && arp.tpa == %s && arp.op == 1",
5299 op->json_key, ip_address);
5300 } else {
5301 ds_put_format(&match,
5302 "inport == %s && nd_ns && nd.target == %s",
5303 op->json_key, ip_address);
5304 }
5305
5306 ds_clear(&actions);
5307 if (addr_family == AF_INET) {
5308 ds_put_format(&actions,
5309 "eth.dst = eth.src; "
5310 "eth.src = %s; "
5311 "arp.op = 2; /* ARP reply */ "
5312 "arp.tha = arp.sha; "
5313 "arp.sha = %s; "
5314 "arp.tpa = arp.spa; "
5315 "arp.spa = %s; "
5316 "outport = %s; "
5317 "flags.loopback = 1; "
5318 "output;",
5319 op->lrp_networks.ea_s,
5320 op->lrp_networks.ea_s,
5321 ip_address,
5322 op->json_key);
5323 } else {
5324 ds_put_format(&actions,
5325 "nd_na { "
5326 "eth.src = %s; "
5327 "ip6.src = %s; "
5328 "nd.target = %s; "
5329 "nd.tll = %s; "
5330 "outport = inport; "
5331 "flags.loopback = 1; "
5332 "output; "
5333 "};",
5334 op->lrp_networks.ea_s,
5335 ip_address,
5336 ip_address,
5337 op->lrp_networks.ea_s);
5338 }
5339 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
5340 ds_cstr(&match), ds_cstr(&actions));
5341 }
5342
5343 sset_destroy(&all_ips);
5344
5345 /* A gateway router can have 2 SNAT IP addresses to force DNATed and
5346 * LBed traffic respectively to be SNATed. In addition, there can be
5347 * a number of SNAT rules in the NAT table. */
5348 ovs_be32 *snat_ips = xmalloc(sizeof *snat_ips *
5349 (op->od->nbr->n_nat + 2));
5350 size_t n_snat_ips = 0;
5351
5352 ovs_be32 snat_ip;
5353 const char *dnat_force_snat_ip = get_force_snat_ip(op->od, "dnat",
5354 &snat_ip);
5355 if (dnat_force_snat_ip) {
5356 snat_ips[n_snat_ips++] = snat_ip;
5357 }
5358
5359 const char *lb_force_snat_ip = get_force_snat_ip(op->od, "lb",
5360 &snat_ip);
5361 if (lb_force_snat_ip) {
5362 snat_ips[n_snat_ips++] = snat_ip;
5363 }
5364
5365 for (int i = 0; i < op->od->nbr->n_nat; i++) {
5366 const struct nbrec_nat *nat;
5367
5368 nat = op->od->nbr->nat[i];
5369
5370 ovs_be32 ip;
5371 if (!ip_parse(nat->external_ip, &ip) || !ip) {
5372 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
5373 VLOG_WARN_RL(&rl, "bad ip address %s in nat configuration "
5374 "for router %s", nat->external_ip, op->key);
5375 continue;
5376 }
5377
5378 if (!strcmp(nat->type, "snat")) {
5379 snat_ips[n_snat_ips++] = ip;
5380 continue;
5381 }
5382
5383 /* ARP handling for external IP addresses.
5384 *
5385 * DNAT IP addresses are external IP addresses that need ARP
5386 * handling. */
5387 ds_clear(&match);
5388 ds_put_format(&match,
5389 "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1",
5390 op->json_key, IP_ARGS(ip));
5391
5392 ds_clear(&actions);
5393 ds_put_format(&actions,
5394 "eth.dst = eth.src; "
5395 "arp.op = 2; /* ARP reply */ "
5396 "arp.tha = arp.sha; ");
5397
5398 if (op->od->l3dgw_port && op == op->od->l3dgw_port) {
5399 struct eth_addr mac;
5400 if (nat->external_mac &&
5401 eth_addr_from_string(nat->external_mac, &mac)
5402 && nat->logical_port) {
5403 /* distributed NAT case, use nat->external_mac */
5404 ds_put_format(&actions,
5405 "eth.src = "ETH_ADDR_FMT"; "
5406 "arp.sha = "ETH_ADDR_FMT"; ",
5407 ETH_ADDR_ARGS(mac),
5408 ETH_ADDR_ARGS(mac));
5409 /* Traffic with eth.src = nat->external_mac should only be
5410 * sent from the chassis where nat->logical_port is
5411 * resident, so that upstream MAC learning points to the
5412 * correct chassis. Also need to avoid generation of
5413 * multiple ARP responses from different chassis. */
5414 ds_put_format(&match, " && is_chassis_resident(\"%s\")",
5415 nat->logical_port);
5416 } else {
5417 ds_put_format(&actions,
5418 "eth.src = %s; "
5419 "arp.sha = %s; ",
5420 op->lrp_networks.ea_s,
5421 op->lrp_networks.ea_s);
5422 /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
5423 * should only be sent from the "redirect-chassis", so that
5424 * upstream MAC learning points to the "redirect-chassis".
5425 * Also need to avoid generation of multiple ARP responses
5426 * from different chassis. */
5427 if (op->od->l3redirect_port) {
5428 ds_put_format(&match, " && is_chassis_resident(%s)",
5429 op->od->l3redirect_port->json_key);
5430 }
5431 }
5432 } else {
5433 ds_put_format(&actions,
5434 "eth.src = %s; "
5435 "arp.sha = %s; ",
5436 op->lrp_networks.ea_s,
5437 op->lrp_networks.ea_s);
5438 }
5439 ds_put_format(&actions,
5440 "arp.tpa = arp.spa; "
5441 "arp.spa = "IP_FMT"; "
5442 "outport = %s; "
5443 "flags.loopback = 1; "
5444 "output;",
5445 IP_ARGS(ip),
5446 op->json_key);
5447 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
5448 ds_cstr(&match), ds_cstr(&actions));
5449 }
5450
5451 if (!smap_get(&op->od->nbr->options, "chassis")
5452 && !op->od->l3dgw_port) {
5453 /* UDP/TCP port unreachable. */
5454 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
5455 ds_clear(&match);
5456 ds_put_format(&match,
5457 "ip4 && ip4.dst == %s && !ip.later_frag && udp",
5458 op->lrp_networks.ipv4_addrs[i].addr_s);
5459 const char *action = "icmp4 {"
5460 "eth.dst <-> eth.src; "
5461 "ip4.dst <-> ip4.src; "
5462 "ip.ttl = 255; "
5463 "icmp4.type = 3; "
5464 "icmp4.code = 3; "
5465 "next; };";
5466 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 80,
5467 ds_cstr(&match), action);
5468
5469 ds_clear(&match);
5470 ds_put_format(&match,
5471 "ip4 && ip4.dst == %s && !ip.later_frag && tcp",
5472 op->lrp_networks.ipv4_addrs[i].addr_s);
5473 action = "tcp_reset {"
5474 "eth.dst <-> eth.src; "
5475 "ip4.dst <-> ip4.src; "
5476 "next; };";
5477 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 80,
5478 ds_cstr(&match), action);
5479
5480 ds_clear(&match);
5481 ds_put_format(&match,
5482 "ip4 && ip4.dst == %s && !ip.later_frag",
5483 op->lrp_networks.ipv4_addrs[i].addr_s);
5484 action = "icmp4 {"
5485 "eth.dst <-> eth.src; "
5486 "ip4.dst <-> ip4.src; "
5487 "ip.ttl = 255; "
5488 "icmp4.type = 3; "
5489 "icmp4.code = 2; "
5490 "next; };";
5491 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 70,
5492 ds_cstr(&match), action);
5493 }
5494 }
5495
5496 ds_clear(&match);
5497 ds_put_cstr(&match, "ip4.dst == {");
5498 bool has_drop_ips = false;
5499 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
5500 bool snat_ip_is_router_ip = false;
5501 for (int j = 0; j < n_snat_ips; j++) {
5502 /* Packets to SNAT IPs should not be dropped. */
5503 if (op->lrp_networks.ipv4_addrs[i].addr == snat_ips[j]) {
5504 snat_ip_is_router_ip = true;
5505 break;
5506 }
5507 }
5508 if (snat_ip_is_router_ip) {
5509 continue;
5510 }
5511 ds_put_format(&match, "%s, ",
5512 op->lrp_networks.ipv4_addrs[i].addr_s);
5513 has_drop_ips = true;
5514 }
5515 ds_chomp(&match, ' ');
5516 ds_chomp(&match, ',');
5517 ds_put_cstr(&match, "}");
5518
5519 if (has_drop_ips) {
5520 /* Drop IP traffic to this router. */
5521 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
5522 ds_cstr(&match), "drop;");
5523 }
5524
5525 free(snat_ips);
5526 }
5527
5528 /* Logical router ingress table 1: IP Input for IPv6. */
5529 HMAP_FOR_EACH (op, key_node, ports) {
5530 if (!op->nbrp) {
5531 continue;
5532 }
5533
5534 if (op->derived) {
5535 /* No ingress packets are accepted on a chassisredirect
5536 * port, so no need to program flows for that port. */
5537 continue;
5538 }
5539
5540 if (op->lrp_networks.n_ipv6_addrs) {
5541 /* L3 admission control: drop packets that originate from an
5542 * IPv6 address owned by the router (priority 100). */
5543 ds_clear(&match);
5544 ds_put_cstr(&match, "ip6.src == ");
5545 op_put_v6_networks(&match, op);
5546 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
5547 ds_cstr(&match), "drop;");
5548
5549 /* ICMPv6 echo reply. These flows reply to echo requests
5550 * received for the router's IP address. */
5551 ds_clear(&match);
5552 ds_put_cstr(&match, "ip6.dst == ");
5553 op_put_v6_networks(&match, op);
5554 ds_put_cstr(&match, " && icmp6.type == 128 && icmp6.code == 0");
5555
5556 ds_clear(&actions);
5557 ds_put_cstr(&actions,
5558 "ip6.dst <-> ip6.src; "
5559 "ip.ttl = 255; "
5560 "icmp6.type = 129; "
5561 "flags.loopback = 1; "
5562 "next; ");
5563 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
5564 ds_cstr(&match), ds_cstr(&actions));
5565
5566 /* Drop IPv6 traffic to this router. */
5567 ds_clear(&match);
5568 ds_put_cstr(&match, "ip6.dst == ");
5569 op_put_v6_networks(&match, op);
5570 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
5571 ds_cstr(&match), "drop;");
5572 }
5573
5574 /* ND reply. These flows reply to ND solicitations for the
5575 * router's own IP address. */
5576 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
5577 ds_clear(&match);
5578 ds_put_format(&match,
5579 "inport == %s && nd_ns && ip6.dst == {%s, %s} "
5580 "&& nd.target == %s",
5581 op->json_key,
5582 op->lrp_networks.ipv6_addrs[i].addr_s,
5583 op->lrp_networks.ipv6_addrs[i].sn_addr_s,
5584 op->lrp_networks.ipv6_addrs[i].addr_s);
5585 if (op->od->l3dgw_port && op == op->od->l3dgw_port
5586 && op->od->l3redirect_port) {
5587 /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
5588 * should only be sent from the "redirect-chassis", so that
5589 * upstream MAC learning points to the "redirect-chassis".
5590 * Also need to avoid generation of multiple ND replies
5591 * from different chassis. */
5592 ds_put_format(&match, " && is_chassis_resident(%s)",
5593 op->od->l3redirect_port->json_key);
5594 }
5595
5596 ds_clear(&actions);
5597 ds_put_format(&actions,
5598 "put_nd(inport, ip6.src, nd.sll); "
5599 "nd_na_router { "
5600 "eth.src = %s; "
5601 "ip6.src = %s; "
5602 "nd.target = %s; "
5603 "nd.tll = %s; "
5604 "outport = inport; "
5605 "flags.loopback = 1; "
5606 "output; "
5607 "};",
5608 op->lrp_networks.ea_s,
5609 op->lrp_networks.ipv6_addrs[i].addr_s,
5610 op->lrp_networks.ipv6_addrs[i].addr_s,
5611 op->lrp_networks.ea_s);
5612 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
5613 ds_cstr(&match), ds_cstr(&actions));
5614 }
5615
5616 /* UDP/TCP port unreachable */
5617 if (!smap_get(&op->od->nbr->options, "chassis")
5618 && !op->od->l3dgw_port) {
5619 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
5620 ds_clear(&match);
5621 ds_put_format(&match,
5622 "ip6 && ip6.dst == %s && !ip.later_frag && tcp",
5623 op->lrp_networks.ipv6_addrs[i].addr_s);
5624 const char *action = "tcp_reset {"
5625 "eth.dst <-> eth.src; "
5626 "ip6.dst <-> ip6.src; "
5627 "next; };";
5628 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 80,
5629 ds_cstr(&match), action);
5630
5631 ds_clear(&match);
5632 ds_put_format(&match,
5633 "ip6 && ip6.dst == %s && !ip.later_frag && udp",
5634 op->lrp_networks.ipv6_addrs[i].addr_s);
5635 action = "icmp6 {"
5636 "eth.dst <-> eth.src; "
5637 "ip6.dst <-> ip6.src; "
5638 "ip.ttl = 255; "
5639 "icmp6.type = 1; "
5640 "icmp6.code = 4; "
5641 "next; };";
5642 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 80,
5643 ds_cstr(&match), action);
5644
5645 ds_clear(&match);
5646 ds_put_format(&match,
5647 "ip6 && ip6.dst == %s && !ip.later_frag",
5648 op->lrp_networks.ipv6_addrs[i].addr_s);
5649 action = "icmp6 {"
5650 "eth.dst <-> eth.src; "
5651 "ip6.dst <-> ip6.src; "
5652 "ip.ttl = 255; "
5653 "icmp6.type = 1; "
5654 "icmp6.code = 3; "
5655 "next; };";
5656 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 70,
5657 ds_cstr(&match), action);
5658 }
5659 }
5660
5661 /* ICMPv6 time exceeded */
5662 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
5663 /* skip link-local address */
5664 if (in6_is_lla(&op->lrp_networks.ipv6_addrs[i].network)) {
5665 continue;
5666 }
5667
5668 ds_clear(&match);
5669 ds_clear(&actions);
5670
5671 ds_put_format(&match,
5672 "inport == %s && ip6 && "
5673 "ip6.src == %s/%d && "
5674 "ip.ttl == {0, 1} && !ip.later_frag",
5675 op->json_key,
5676 op->lrp_networks.ipv6_addrs[i].network_s,
5677 op->lrp_networks.ipv6_addrs[i].plen);
5678 ds_put_format(&actions,
5679 "icmp6 {"
5680 "eth.dst <-> eth.src; "
5681 "ip6.dst = ip6.src; "
5682 "ip6.src = %s; "
5683 "ip.ttl = 255; "
5684 "icmp6.type = 3; /* Time exceeded */ "
5685 "icmp6.code = 0; /* TTL exceeded in transit */ "
5686 "next; };",
5687 op->lrp_networks.ipv6_addrs[i].addr_s);
5688 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 40,
5689 ds_cstr(&match), ds_cstr(&actions));
5690 }
5691 }
5692
5693 /* NAT, Defrag and load balancing. */
5694 HMAP_FOR_EACH (od, key_node, datapaths) {
5695 if (!od->nbr) {
5696 continue;
5697 }
5698
5699 /* Packets are allowed by default. */
5700 ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG, 0, "1", "next;");
5701 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;");
5702 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;");
5703 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;");
5704 ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 0, "1", "next;");
5705 ovn_lflow_add(lflows, od, S_ROUTER_OUT_EGR_LOOP, 0, "1", "next;");
5706
5707 /* NAT rules are only valid on Gateway routers and routers with
5708 * l3dgw_port (router has a port with "redirect-chassis"
5709 * specified). */
5710 if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) {
5711 continue;
5712 }
5713
5714 ovs_be32 snat_ip;
5715 const char *dnat_force_snat_ip = get_force_snat_ip(od, "dnat",
5716 &snat_ip);
5717 const char *lb_force_snat_ip = get_force_snat_ip(od, "lb",
5718 &snat_ip);
5719
5720 for (int i = 0; i < od->nbr->n_nat; i++) {
5721 const struct nbrec_nat *nat;
5722
5723 nat = od->nbr->nat[i];
5724
5725 ovs_be32 ip, mask;
5726
5727 char *error = ip_parse_masked(nat->external_ip, &ip, &mask);
5728 if (error || mask != OVS_BE32_MAX) {
5729 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
5730 VLOG_WARN_RL(&rl, "bad external ip %s for nat",
5731 nat->external_ip);
5732 free(error);
5733 continue;
5734 }
5735
5736 /* Check the validity of nat->logical_ip. 'logical_ip' can
5737 * be a subnet when the type is "snat". */
5738 error = ip_parse_masked(nat->logical_ip, &ip, &mask);
5739 if (!strcmp(nat->type, "snat")) {
5740 if (error) {
5741 static struct vlog_rate_limit rl =
5742 VLOG_RATE_LIMIT_INIT(5, 1);
5743 VLOG_WARN_RL(&rl, "bad ip network or ip %s for snat "
5744 "in router "UUID_FMT"",
5745 nat->logical_ip, UUID_ARGS(&od->key));
5746 free(error);
5747 continue;
5748 }
5749 } else {
5750 if (error || mask != OVS_BE32_MAX) {
5751 static struct vlog_rate_limit rl =
5752 VLOG_RATE_LIMIT_INIT(5, 1);
5753 VLOG_WARN_RL(&rl, "bad ip %s for dnat in router "
5754 ""UUID_FMT"", nat->logical_ip, UUID_ARGS(&od->key));
5755 free(error);
5756 continue;
5757 }
5758 }
5759
5760 /* For distributed router NAT, determine whether this NAT rule
5761 * satisfies the conditions for distributed NAT processing. */
5762 bool distributed = false;
5763 struct eth_addr mac;
5764 if (od->l3dgw_port && !strcmp(nat->type, "dnat_and_snat") &&
5765 nat->logical_port && nat->external_mac) {
5766 if (eth_addr_from_string(nat->external_mac, &mac)) {
5767 distributed = true;
5768 } else {
5769 static struct vlog_rate_limit rl =
5770 VLOG_RATE_LIMIT_INIT(5, 1);
5771 VLOG_WARN_RL(&rl, "bad mac %s for dnat in router "
5772 ""UUID_FMT"", nat->external_mac, UUID_ARGS(&od->key));
5773 continue;
5774 }
5775 }
5776
5777 /* Ingress UNSNAT table: It is for already established connections'
5778 * reverse traffic. i.e., SNAT has already been done in egress
5779 * pipeline and now the packet has entered the ingress pipeline as
5780 * part of a reply. We undo the SNAT here.
5781 *
5782 * Undoing SNAT has to happen before DNAT processing. This is
5783 * because when the packet was DNATed in ingress pipeline, it did
5784 * not know about the possibility of eventual additional SNAT in
5785 * egress pipeline. */
5786 if (!strcmp(nat->type, "snat")
5787 || !strcmp(nat->type, "dnat_and_snat")) {
5788 if (!od->l3dgw_port) {
5789 /* Gateway router. */
5790 ds_clear(&match);
5791 ds_put_format(&match, "ip && ip4.dst == %s",
5792 nat->external_ip);
5793 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 90,
5794 ds_cstr(&match), "ct_snat;");
5795 } else {
5796 /* Distributed router. */
5797
5798 /* Traffic received on l3dgw_port is subject to NAT. */
5799 ds_clear(&match);
5800 ds_put_format(&match, "ip && ip4.dst == %s"
5801 " && inport == %s",
5802 nat->external_ip,
5803 od->l3dgw_port->json_key);
5804 if (!distributed && od->l3redirect_port) {
5805 /* Flows for NAT rules that are centralized are only
5806 * programmed on the "redirect-chassis". */
5807 ds_put_format(&match, " && is_chassis_resident(%s)",
5808 od->l3redirect_port->json_key);
5809 }
5810 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 100,
5811 ds_cstr(&match), "ct_snat;");
5812
5813 /* Traffic received on other router ports must be
5814 * redirected to the central instance of the l3dgw_port
5815 * for NAT processing. */
5816 ds_clear(&match);
5817 ds_put_format(&match, "ip && ip4.dst == %s",
5818 nat->external_ip);
5819 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 50,
5820 ds_cstr(&match),
5821 REGBIT_NAT_REDIRECT" = 1; next;");
5822 }
5823 }
5824
5825 /* Ingress DNAT table: Packets enter the pipeline with destination
5826 * IP address that needs to be DNATted from a external IP address
5827 * to a logical IP address. */
5828 if (!strcmp(nat->type, "dnat")
5829 || !strcmp(nat->type, "dnat_and_snat")) {
5830 if (!od->l3dgw_port) {
5831 /* Gateway router. */
5832 /* Packet when it goes from the initiator to destination.
5833 * We need to set flags.loopback because the router can
5834 * send the packet back through the same interface. */
5835 ds_clear(&match);
5836 ds_put_format(&match, "ip && ip4.dst == %s",
5837 nat->external_ip);
5838 ds_clear(&actions);
5839 if (dnat_force_snat_ip) {
5840 /* Indicate to the future tables that a DNAT has taken
5841 * place and a force SNAT needs to be done in the
5842 * Egress SNAT table. */
5843 ds_put_format(&actions,
5844 "flags.force_snat_for_dnat = 1; ");
5845 }
5846 ds_put_format(&actions, "flags.loopback = 1; ct_dnat(%s);",
5847 nat->logical_ip);
5848 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100,
5849 ds_cstr(&match), ds_cstr(&actions));
5850 } else {
5851 /* Distributed router. */
5852
5853 /* Traffic received on l3dgw_port is subject to NAT. */
5854 ds_clear(&match);
5855 ds_put_format(&match, "ip && ip4.dst == %s"
5856 " && inport == %s",
5857 nat->external_ip,
5858 od->l3dgw_port->json_key);
5859 if (!distributed && od->l3redirect_port) {
5860 /* Flows for NAT rules that are centralized are only
5861 * programmed on the "redirect-chassis". */
5862 ds_put_format(&match, " && is_chassis_resident(%s)",
5863 od->l3redirect_port->json_key);
5864 }
5865 ds_clear(&actions);
5866 ds_put_format(&actions, "ct_dnat(%s);",
5867 nat->logical_ip);
5868 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100,
5869 ds_cstr(&match), ds_cstr(&actions));
5870
5871 /* Traffic received on other router ports must be
5872 * redirected to the central instance of the l3dgw_port
5873 * for NAT processing. */
5874 ds_clear(&match);
5875 ds_put_format(&match, "ip && ip4.dst == %s",
5876 nat->external_ip);
5877 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50,
5878 ds_cstr(&match),
5879 REGBIT_NAT_REDIRECT" = 1; next;");
5880 }
5881 }
5882
5883 /* Egress UNDNAT table: It is for already established connections'
5884 * reverse traffic. i.e., DNAT has already been done in ingress
5885 * pipeline and now the packet has entered the egress pipeline as
5886 * part of a reply. We undo the DNAT here.
5887 *
5888 * Note that this only applies for NAT on a distributed router.
5889 * Undo DNAT on a gateway router is done in the ingress DNAT
5890 * pipeline stage. */
5891 if (od->l3dgw_port && (!strcmp(nat->type, "dnat")
5892 || !strcmp(nat->type, "dnat_and_snat"))) {
5893 ds_clear(&match);
5894 ds_put_format(&match, "ip && ip4.src == %s"
5895 " && outport == %s",
5896 nat->logical_ip,
5897 od->l3dgw_port->json_key);
5898 if (!distributed && od->l3redirect_port) {
5899 /* Flows for NAT rules that are centralized are only
5900 * programmed on the "redirect-chassis". */
5901 ds_put_format(&match, " && is_chassis_resident(%s)",
5902 od->l3redirect_port->json_key);
5903 }
5904 ds_clear(&actions);
5905 if (distributed) {
5906 ds_put_format(&actions, "eth.src = "ETH_ADDR_FMT"; ",
5907 ETH_ADDR_ARGS(mac));
5908 }
5909 ds_put_format(&actions, "ct_dnat;");
5910 ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 100,
5911 ds_cstr(&match), ds_cstr(&actions));
5912 }
5913
5914 /* Egress SNAT table: Packets enter the egress pipeline with
5915 * source ip address that needs to be SNATted to a external ip
5916 * address. */
5917 if (!strcmp(nat->type, "snat")
5918 || !strcmp(nat->type, "dnat_and_snat")) {
5919 if (!od->l3dgw_port) {
5920 /* Gateway router. */
5921 ds_clear(&match);
5922 ds_put_format(&match, "ip && ip4.src == %s",
5923 nat->logical_ip);
5924 ds_clear(&actions);
5925 ds_put_format(&actions, "ct_snat(%s);", nat->external_ip);
5926
5927 /* The priority here is calculated such that the
5928 * nat->logical_ip with the longest mask gets a higher
5929 * priority. */
5930 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT,
5931 count_1bits(ntohl(mask)) + 1,
5932 ds_cstr(&match), ds_cstr(&actions));
5933 } else {
5934 /* Distributed router. */
5935 ds_clear(&match);
5936 ds_put_format(&match, "ip && ip4.src == %s"
5937 " && outport == %s",
5938 nat->logical_ip,
5939 od->l3dgw_port->json_key);
5940 if (!distributed && od->l3redirect_port) {
5941 /* Flows for NAT rules that are centralized are only
5942 * programmed on the "redirect-chassis". */
5943 ds_put_format(&match, " && is_chassis_resident(%s)",
5944 od->l3redirect_port->json_key);
5945 }
5946 ds_clear(&actions);
5947 if (distributed) {
5948 ds_put_format(&actions, "eth.src = "ETH_ADDR_FMT"; ",
5949 ETH_ADDR_ARGS(mac));
5950 }
5951 ds_put_format(&actions, "ct_snat(%s);", nat->external_ip);
5952
5953 /* The priority here is calculated such that the
5954 * nat->logical_ip with the longest mask gets a higher
5955 * priority. */
5956 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT,
5957 count_1bits(ntohl(mask)) + 1,
5958 ds_cstr(&match), ds_cstr(&actions));
5959 }
5960 }
5961
5962 /* Logical router ingress table 0:
5963 * For NAT on a distributed router, add rules allowing
5964 * ingress traffic with eth.dst matching nat->external_mac
5965 * on the l3dgw_port instance where nat->logical_port is
5966 * resident. */
5967 if (distributed) {
5968 ds_clear(&match);
5969 ds_put_format(&match,
5970 "eth.dst == "ETH_ADDR_FMT" && inport == %s"
5971 " && is_chassis_resident(\"%s\")",
5972 ETH_ADDR_ARGS(mac),
5973 od->l3dgw_port->json_key,
5974 nat->logical_port);
5975 ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 50,
5976 ds_cstr(&match), "next;");
5977 }
5978
5979 /* Ingress Gateway Redirect Table: For NAT on a distributed
5980 * router, add flows that are specific to a NAT rule. These
5981 * flows indicate the presence of an applicable NAT rule that
5982 * can be applied in a distributed manner. */
5983 if (distributed) {
5984 ds_clear(&match);
5985 ds_put_format(&match, "ip4.src == %s && outport == %s",
5986 nat->logical_ip,
5987 od->l3dgw_port->json_key);
5988 ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 100,
5989 ds_cstr(&match), "next;");
5990 }
5991
5992 /* Egress Loopback table: For NAT on a distributed router.
5993 * If packets in the egress pipeline on the distributed
5994 * gateway port have ip.dst matching a NAT external IP, then
5995 * loop a clone of the packet back to the beginning of the
5996 * ingress pipeline with inport = outport. */
5997 if (od->l3dgw_port) {
5998 /* Distributed router. */
5999 ds_clear(&match);
6000 ds_put_format(&match, "ip4.dst == %s && outport == %s",
6001 nat->external_ip,
6002 od->l3dgw_port->json_key);
6003 ds_clear(&actions);
6004 ds_put_format(&actions,
6005 "clone { ct_clear; "
6006 "inport = outport; outport = \"\"; "
6007 "flags = 0; flags.loopback = 1; ");
6008 for (int j = 0; j < MFF_N_LOG_REGS; j++) {
6009 ds_put_format(&actions, "reg%d = 0; ", j);
6010 }
6011 ds_put_format(&actions, REGBIT_EGRESS_LOOPBACK" = 1; "
6012 "next(pipeline=ingress, table=0); };");
6013 ovn_lflow_add(lflows, od, S_ROUTER_OUT_EGR_LOOP, 100,
6014 ds_cstr(&match), ds_cstr(&actions));
6015 }
6016 }
6017
6018 /* Handle force SNAT options set in the gateway router. */
6019 if (dnat_force_snat_ip && !od->l3dgw_port) {
6020 /* If a packet with destination IP address as that of the
6021 * gateway router (as set in options:dnat_force_snat_ip) is seen,
6022 * UNSNAT it. */
6023 ds_clear(&match);
6024 ds_put_format(&match, "ip && ip4.dst == %s", dnat_force_snat_ip);
6025 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 110,
6026 ds_cstr(&match), "ct_snat;");
6027
6028 /* Higher priority rules to force SNAT with the IP addresses
6029 * configured in the Gateway router. This only takes effect
6030 * when the packet has already been DNATed once. */
6031 ds_clear(&match);
6032 ds_put_format(&match, "flags.force_snat_for_dnat == 1 && ip");
6033 ds_clear(&actions);
6034 ds_put_format(&actions, "ct_snat(%s);", dnat_force_snat_ip);
6035 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 100,
6036 ds_cstr(&match), ds_cstr(&actions));
6037 }
6038 if (lb_force_snat_ip && !od->l3dgw_port) {
6039 /* If a packet with destination IP address as that of the
6040 * gateway router (as set in options:lb_force_snat_ip) is seen,
6041 * UNSNAT it. */
6042 ds_clear(&match);
6043 ds_put_format(&match, "ip && ip4.dst == %s", lb_force_snat_ip);
6044 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 100,
6045 ds_cstr(&match), "ct_snat;");
6046
6047 /* Load balanced traffic will have flags.force_snat_for_lb set.
6048 * Force SNAT it. */
6049 ds_clear(&match);
6050 ds_put_format(&match, "flags.force_snat_for_lb == 1 && ip");
6051 ds_clear(&actions);
6052 ds_put_format(&actions, "ct_snat(%s);", lb_force_snat_ip);
6053 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 100,
6054 ds_cstr(&match), ds_cstr(&actions));
6055 }
6056
6057 if (!od->l3dgw_port) {
6058 /* For gateway router, re-circulate every packet through
6059 * the DNAT zone. This helps with the following.
6060 *
6061 * Any packet that needs to be unDNATed in the reverse
6062 * direction gets unDNATed. Ideally this could be done in
6063 * the egress pipeline. But since the gateway router
6064 * does not have any feature that depends on the source
6065 * ip address being external IP address for IP routing,
6066 * we can do it here, saving a future re-circulation. */
6067 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50,
6068 "ip", "flags.loopback = 1; ct_dnat;");
6069 } else {
6070 /* For NAT on a distributed router, add flows to Ingress
6071 * IP Routing table, Ingress ARP Resolution table, and
6072 * Ingress Gateway Redirect Table that are not specific to a
6073 * NAT rule. */
6074
6075 /* The highest priority IN_IP_ROUTING rule matches packets
6076 * with REGBIT_NAT_REDIRECT (set in DNAT or UNSNAT stages),
6077 * with action "ip.ttl--; next;". The IN_GW_REDIRECT table
6078 * will take care of setting the outport. */
6079 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 300,
6080 REGBIT_NAT_REDIRECT" == 1", "ip.ttl--; next;");
6081
6082 /* The highest priority IN_ARP_RESOLVE rule matches packets
6083 * with REGBIT_NAT_REDIRECT (set in DNAT or UNSNAT stages),
6084 * then sets eth.dst to the distributed gateway port's
6085 * ethernet address. */
6086 ds_clear(&actions);
6087 ds_put_format(&actions, "eth.dst = %s; next;",
6088 od->l3dgw_port->lrp_networks.ea_s);
6089 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 200,
6090 REGBIT_NAT_REDIRECT" == 1", ds_cstr(&actions));
6091
6092 /* The highest priority IN_GW_REDIRECT rule redirects packets
6093 * with REGBIT_NAT_REDIRECT (set in DNAT or UNSNAT stages) to
6094 * the central instance of the l3dgw_port for NAT processing. */
6095 ds_clear(&actions);
6096 ds_put_format(&actions, "outport = %s; next;",
6097 od->l3redirect_port->json_key);
6098 ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 200,
6099 REGBIT_NAT_REDIRECT" == 1", ds_cstr(&actions));
6100 }
6101
6102 /* Load balancing and packet defrag are only valid on
6103 * Gateway routers or router with gateway port. */
6104 if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) {
6105 continue;
6106 }
6107
6108 /* A set to hold all ips that need defragmentation and tracking. */
6109 struct sset all_ips = SSET_INITIALIZER(&all_ips);
6110
6111 for (int i = 0; i < od->nbr->n_load_balancer; i++) {
6112 struct nbrec_load_balancer *lb = od->nbr->load_balancer[i];
6113 struct smap *vips = &lb->vips;
6114 struct smap_node *node;
6115
6116 SMAP_FOR_EACH (node, vips) {
6117 uint16_t port = 0;
6118 int addr_family;
6119
6120 /* node->key contains IP:port or just IP. */
6121 char *ip_address = NULL;
6122 ip_address_and_port_from_lb_key(node->key, &ip_address, &port,
6123 &addr_family);
6124 if (!ip_address) {
6125 continue;
6126 }
6127
6128 if (!sset_contains(&all_ips, ip_address)) {
6129 sset_add(&all_ips, ip_address);
6130 /* If there are any load balancing rules, we should send
6131 * the packet to conntrack for defragmentation and
6132 * tracking. This helps with two things.
6133 *
6134 * 1. With tracking, we can send only new connections to
6135 * pick a DNAT ip address from a group.
6136 * 2. If there are L4 ports in load balancing rules, we
6137 * need the defragmentation to match on L4 ports. */
6138 ds_clear(&match);
6139 if (addr_family == AF_INET) {
6140 ds_put_format(&match, "ip && ip4.dst == %s",
6141 ip_address);
6142 } else {
6143 ds_put_format(&match, "ip && ip6.dst == %s",
6144 ip_address);
6145 }
6146 ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG,
6147 100, ds_cstr(&match), "ct_next;");
6148 }
6149
6150 /* Higher priority rules are added for load-balancing in DNAT
6151 * table. For every match (on a VIP[:port]), we add two flows
6152 * via add_router_lb_flow(). One flow is for specific matching
6153 * on ct.new with an action of "ct_lb($targets);". The other
6154 * flow is for ct.est with an action of "ct_dnat;". */
6155 ds_clear(&actions);
6156 ds_put_format(&actions, "ct_lb(%s);", node->value);
6157
6158 ds_clear(&match);
6159 if (addr_family == AF_INET) {
6160 ds_put_format(&match, "ip && ip4.dst == %s",
6161 ip_address);
6162 } else {
6163 ds_put_format(&match, "ip && ip6.dst == %s",
6164 ip_address);
6165 }
6166 free(ip_address);
6167
6168 int prio = 110;
6169 bool is_udp = lb->protocol && !strcmp(lb->protocol, "udp") ?
6170 true : false;
6171 if (port) {
6172 if (is_udp) {
6173 ds_put_format(&match, " && udp && udp.dst == %d",
6174 port);
6175 } else {
6176 ds_put_format(&match, " && tcp && tcp.dst == %d",
6177 port);
6178 }
6179 prio = 120;
6180 }
6181
6182 if (od->l3redirect_port) {
6183 ds_put_format(&match, " && is_chassis_resident(%s)",
6184 od->l3redirect_port->json_key);
6185 }
6186 add_router_lb_flow(lflows, od, &match, &actions, prio,
6187 lb_force_snat_ip, node->value, is_udp,
6188 addr_family);
6189 }
6190 }
6191 sset_destroy(&all_ips);
6192 }
6193
6194 /* Logical router ingress table 5 and 6: IPv6 Router Adv (RA) options and
6195 * response. */
6196 HMAP_FOR_EACH (op, key_node, ports) {
6197 if (!op->nbrp || op->nbrp->peer || !op->peer) {
6198 continue;
6199 }
6200
6201 if (!op->lrp_networks.n_ipv6_addrs) {
6202 continue;
6203 }
6204
6205 const char *address_mode = smap_get(
6206 &op->nbrp->ipv6_ra_configs, "address_mode");
6207
6208 if (!address_mode) {
6209 continue;
6210 }
6211 if (strcmp(address_mode, "slaac") &&
6212 strcmp(address_mode, "dhcpv6_stateful") &&
6213 strcmp(address_mode, "dhcpv6_stateless")) {
6214 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
6215 VLOG_WARN_RL(&rl, "Invalid address mode [%s] defined",
6216 address_mode);
6217 continue;
6218 }
6219
6220 if (smap_get_bool(&op->nbrp->ipv6_ra_configs, "send_periodic",
6221 false)) {
6222 copy_ra_to_sb(op, address_mode);
6223 }
6224
6225 ds_clear(&match);
6226 ds_put_format(&match, "inport == %s && ip6.dst == ff02::2 && nd_rs",
6227 op->json_key);
6228 ds_clear(&actions);
6229
6230 const char *mtu_s = smap_get(
6231 &op->nbrp->ipv6_ra_configs, "mtu");
6232
6233 /* As per RFC 2460, 1280 is minimum IPv6 MTU. */
6234 uint32_t mtu = (mtu_s && atoi(mtu_s) >= 1280) ? atoi(mtu_s) : 0;
6235
6236 ds_put_format(&actions, REGBIT_ND_RA_OPTS_RESULT" = put_nd_ra_opts("
6237 "addr_mode = \"%s\", slla = %s",
6238 address_mode, op->lrp_networks.ea_s);
6239 if (mtu > 0) {
6240 ds_put_format(&actions, ", mtu = %u", mtu);
6241 }
6242
6243 bool add_rs_response_flow = false;
6244
6245 for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
6246 if (in6_is_lla(&op->lrp_networks.ipv6_addrs[i].network)) {
6247 continue;
6248 }
6249
6250 /* Add the prefix option if the address mode is slaac or
6251 * dhcpv6_stateless. */
6252 if (strcmp(address_mode, "dhcpv6_stateful")) {
6253 ds_put_format(&actions, ", prefix = %s/%u",
6254 op->lrp_networks.ipv6_addrs[i].network_s,
6255 op->lrp_networks.ipv6_addrs[i].plen);
6256 }
6257 add_rs_response_flow = true;
6258 }
6259
6260 if (add_rs_response_flow) {
6261 ds_put_cstr(&actions, "); next;");
6262 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ND_RA_OPTIONS, 50,
6263 ds_cstr(&match), ds_cstr(&actions));
6264 ds_clear(&actions);
6265 ds_clear(&match);
6266 ds_put_format(&match, "inport == %s && ip6.dst == ff02::2 && "
6267 "nd_ra && "REGBIT_ND_RA_OPTS_RESULT, op->json_key);
6268
6269 char ip6_str[INET6_ADDRSTRLEN + 1];
6270 struct in6_addr lla;
6271 in6_generate_lla(op->lrp_networks.ea, &lla);
6272 memset(ip6_str, 0, sizeof(ip6_str));
6273 ipv6_string_mapped(ip6_str, &lla);
6274 ds_put_format(&actions, "eth.dst = eth.src; eth.src = %s; "
6275 "ip6.dst = ip6.src; ip6.src = %s; "
6276 "outport = inport; flags.loopback = 1; "
6277 "output;",
6278 op->lrp_networks.ea_s, ip6_str);
6279 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ND_RA_RESPONSE, 50,
6280 ds_cstr(&match), ds_cstr(&actions));
6281 }
6282 }
6283
6284 /* Logical router ingress table 5, 6: RS responder, by default goto next.
6285 * (priority 0)*/
6286 HMAP_FOR_EACH (od, key_node, datapaths) {
6287 if (!od->nbr) {
6288 continue;
6289 }
6290
6291 ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_OPTIONS, 0, "1", "next;");
6292 ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_RESPONSE, 0, "1", "next;");
6293 }
6294
6295 /* Logical router ingress table 7: IP Routing.
6296 *
6297 * A packet that arrives at this table is an IP packet that should be
6298 * routed to the address in 'ip[46].dst'. This table sets outport to
6299 * the correct output port, eth.src to the output port's MAC
6300 * address, and '[xx]reg0' to the next-hop IP address (leaving
6301 * 'ip[46].dst', the packet’s final destination, unchanged), and
6302 * advances to the next table for ARP/ND resolution. */
6303 HMAP_FOR_EACH (op, key_node, ports) {
6304 if (!op->nbrp) {
6305 continue;
6306 }
6307
6308 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
6309 add_route(lflows, op, op->lrp_networks.ipv4_addrs[i].addr_s,
6310 op->lrp_networks.ipv4_addrs[i].network_s,
6311 op->lrp_networks.ipv4_addrs[i].plen, NULL, NULL);
6312 }
6313
6314 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
6315 add_route(lflows, op, op->lrp_networks.ipv6_addrs[i].addr_s,
6316 op->lrp_networks.ipv6_addrs[i].network_s,
6317 op->lrp_networks.ipv6_addrs[i].plen, NULL, NULL);
6318 }
6319 }
6320
6321 /* Convert the static routes to flows. */
6322 HMAP_FOR_EACH (od, key_node, datapaths) {
6323 if (!od->nbr) {
6324 continue;
6325 }
6326
6327 for (int i = 0; i < od->nbr->n_static_routes; i++) {
6328 const struct nbrec_logical_router_static_route *route;
6329
6330 route = od->nbr->static_routes[i];
6331 build_static_route_flow(lflows, od, ports, route);
6332 }
6333 }
6334
6335 /* XXX destination unreachable */
6336
6337 /* Local router ingress table 8: ARP Resolution.
6338 *
6339 * Any packet that reaches this table is an IP packet whose next-hop IP
6340 * address is in reg0. (ip4.dst is the final destination.) This table
6341 * resolves the IP address in reg0 into an output port in outport and an
6342 * Ethernet address in eth.dst. */
6343 HMAP_FOR_EACH (op, key_node, ports) {
6344 if (op->nbsp && !lsp_is_enabled(op->nbsp)) {
6345 continue;
6346 }
6347
6348 if (op->nbrp) {
6349 /* This is a logical router port. If next-hop IP address in
6350 * '[xx]reg0' matches IP address of this router port, then
6351 * the packet is intended to eventually be sent to this
6352 * logical port. Set the destination mac address using this
6353 * port's mac address.
6354 *
6355 * The packet is still in peer's logical pipeline. So the match
6356 * should be on peer's outport. */
6357 if (op->peer && op->nbrp->peer) {
6358 if (op->lrp_networks.n_ipv4_addrs) {
6359 ds_clear(&match);
6360 ds_put_format(&match, "outport == %s && reg0 == ",
6361 op->peer->json_key);
6362 op_put_v4_networks(&match, op, false);
6363
6364 ds_clear(&actions);
6365 ds_put_format(&actions, "eth.dst = %s; next;",
6366 op->lrp_networks.ea_s);
6367 ovn_lflow_add(lflows, op->peer->od, S_ROUTER_IN_ARP_RESOLVE,
6368 100, ds_cstr(&match), ds_cstr(&actions));
6369 }
6370
6371 if (op->lrp_networks.n_ipv6_addrs) {
6372 ds_clear(&match);
6373 ds_put_format(&match, "outport == %s && xxreg0 == ",
6374 op->peer->json_key);
6375 op_put_v6_networks(&match, op);
6376
6377 ds_clear(&actions);
6378 ds_put_format(&actions, "eth.dst = %s; next;",
6379 op->lrp_networks.ea_s);
6380 ovn_lflow_add(lflows, op->peer->od, S_ROUTER_IN_ARP_RESOLVE,
6381 100, ds_cstr(&match), ds_cstr(&actions));
6382 }
6383 }
6384 } else if (op->od->n_router_ports && strcmp(op->nbsp->type, "router")) {
6385 /* This is a logical switch port that backs a VM or a container.
6386 * Extract its addresses. For each of the address, go through all
6387 * the router ports attached to the switch (to which this port
6388 * connects) and if the address in question is reachable from the
6389 * router port, add an ARP/ND entry in that router's pipeline. */
6390
6391 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
6392 const char *ea_s = op->lsp_addrs[i].ea_s;
6393 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
6394 const char *ip_s = op->lsp_addrs[i].ipv4_addrs[j].addr_s;
6395 for (size_t k = 0; k < op->od->n_router_ports; k++) {
6396 /* Get the Logical_Router_Port that the
6397 * Logical_Switch_Port is connected to, as
6398 * 'peer'. */
6399 const char *peer_name = smap_get(
6400 &op->od->router_ports[k]->nbsp->options,
6401 "router-port");
6402 if (!peer_name) {
6403 continue;
6404 }
6405
6406 struct ovn_port *peer = ovn_port_find(ports, peer_name);
6407 if (!peer || !peer->nbrp) {
6408 continue;
6409 }
6410
6411 if (!find_lrp_member_ip(peer, ip_s)) {
6412 continue;
6413 }
6414
6415 ds_clear(&match);
6416 ds_put_format(&match, "outport == %s && reg0 == %s",
6417 peer->json_key, ip_s);
6418
6419 ds_clear(&actions);
6420 ds_put_format(&actions, "eth.dst = %s; next;", ea_s);
6421 ovn_lflow_add(lflows, peer->od,
6422 S_ROUTER_IN_ARP_RESOLVE, 100,
6423 ds_cstr(&match), ds_cstr(&actions));
6424 }
6425 }
6426
6427 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
6428 const char *ip_s = op->lsp_addrs[i].ipv6_addrs[j].addr_s;
6429 for (size_t k = 0; k < op->od->n_router_ports; k++) {
6430 /* Get the Logical_Router_Port that the
6431 * Logical_Switch_Port is connected to, as
6432 * 'peer'. */
6433 const char *peer_name = smap_get(
6434 &op->od->router_ports[k]->nbsp->options,
6435 "router-port");
6436 if (!peer_name) {
6437 continue;
6438 }
6439
6440 struct ovn_port *peer = ovn_port_find(ports, peer_name);
6441 if (!peer || !peer->nbrp) {
6442 continue;
6443 }
6444
6445 if (!find_lrp_member_ip(peer, ip_s)) {
6446 continue;
6447 }
6448
6449 ds_clear(&match);
6450 ds_put_format(&match, "outport == %s && xxreg0 == %s",
6451 peer->json_key, ip_s);
6452
6453 ds_clear(&actions);
6454 ds_put_format(&actions, "eth.dst = %s; next;", ea_s);
6455 ovn_lflow_add(lflows, peer->od,
6456 S_ROUTER_IN_ARP_RESOLVE, 100,
6457 ds_cstr(&match), ds_cstr(&actions));
6458 }
6459 }
6460 }
6461 } else if (!strcmp(op->nbsp->type, "router")) {
6462 /* This is a logical switch port that connects to a router. */
6463
6464 /* The peer of this switch port is the router port for which
6465 * we need to add logical flows such that it can resolve
6466 * ARP entries for all the other router ports connected to
6467 * the switch in question. */
6468
6469 const char *peer_name = smap_get(&op->nbsp->options,
6470 "router-port");
6471 if (!peer_name) {
6472 continue;
6473 }
6474
6475 struct ovn_port *peer = ovn_port_find(ports, peer_name);
6476 if (!peer || !peer->nbrp) {
6477 continue;
6478 }
6479
6480 for (size_t i = 0; i < op->od->n_router_ports; i++) {
6481 const char *router_port_name = smap_get(
6482 &op->od->router_ports[i]->nbsp->options,
6483 "router-port");
6484 struct ovn_port *router_port = ovn_port_find(ports,
6485 router_port_name);
6486 if (!router_port || !router_port->nbrp) {
6487 continue;
6488 }
6489
6490 /* Skip the router port under consideration. */
6491 if (router_port == peer) {
6492 continue;
6493 }
6494
6495 if (router_port->lrp_networks.n_ipv4_addrs) {
6496 ds_clear(&match);
6497 ds_put_format(&match, "outport == %s && reg0 == ",
6498 peer->json_key);
6499 op_put_v4_networks(&match, router_port, false);
6500
6501 ds_clear(&actions);
6502 ds_put_format(&actions, "eth.dst = %s; next;",
6503 router_port->lrp_networks.ea_s);
6504 ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE,
6505 100, ds_cstr(&match), ds_cstr(&actions));
6506 }
6507
6508 if (router_port->lrp_networks.n_ipv6_addrs) {
6509 ds_clear(&match);
6510 ds_put_format(&match, "outport == %s && xxreg0 == ",
6511 peer->json_key);
6512 op_put_v6_networks(&match, router_port);
6513
6514 ds_clear(&actions);
6515 ds_put_format(&actions, "eth.dst = %s; next;",
6516 router_port->lrp_networks.ea_s);
6517 ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE,
6518 100, ds_cstr(&match), ds_cstr(&actions));
6519 }
6520 }
6521 }
6522 }
6523
6524 HMAP_FOR_EACH (od, key_node, datapaths) {
6525 if (!od->nbr) {
6526 continue;
6527 }
6528
6529 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip4",
6530 "get_arp(outport, reg0); next;");
6531
6532 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip6",
6533 "get_nd(outport, xxreg0); next;");
6534 }
6535
6536 /* Logical router ingress table 9: Gateway redirect.
6537 *
6538 * For traffic with outport equal to the l3dgw_port
6539 * on a distributed router, this table redirects a subset
6540 * of the traffic to the l3redirect_port which represents
6541 * the central instance of the l3dgw_port.
6542 */
6543 HMAP_FOR_EACH (od, key_node, datapaths) {
6544 if (!od->nbr) {
6545 continue;
6546 }
6547 if (od->l3dgw_port && od->l3redirect_port) {
6548 /* For traffic with outport == l3dgw_port, if the
6549 * packet did not match any higher priority redirect
6550 * rule, then the traffic is redirected to the central
6551 * instance of the l3dgw_port. */
6552 ds_clear(&match);
6553 ds_put_format(&match, "outport == %s",
6554 od->l3dgw_port->json_key);
6555 ds_clear(&actions);
6556 ds_put_format(&actions, "outport = %s; next;",
6557 od->l3redirect_port->json_key);
6558 ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 50,
6559 ds_cstr(&match), ds_cstr(&actions));
6560
6561 /* If the Ethernet destination has not been resolved,
6562 * redirect to the central instance of the l3dgw_port.
6563 * Such traffic will be replaced by an ARP request or ND
6564 * Neighbor Solicitation in the ARP request ingress
6565 * table, before being redirected to the central instance.
6566 */
6567 ds_put_format(&match, " && eth.dst == 00:00:00:00:00:00");
6568 ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 150,
6569 ds_cstr(&match), ds_cstr(&actions));
6570 }
6571
6572 /* Packets are allowed by default. */
6573 ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 0, "1", "next;");
6574 }
6575
6576 /* Local router ingress table 10: ARP request.
6577 *
6578 * In the common case where the Ethernet destination has been resolved,
6579 * this table outputs the packet (priority 0). Otherwise, it composes
6580 * and sends an ARP/IPv6 NA request (priority 100). */
6581 HMAP_FOR_EACH (od, key_node, datapaths) {
6582 if (!od->nbr) {
6583 continue;
6584 }
6585
6586 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100,
6587 "eth.dst == 00:00:00:00:00:00",
6588 "arp { "
6589 "eth.dst = ff:ff:ff:ff:ff:ff; "
6590 "arp.spa = reg1; "
6591 "arp.tpa = reg0; "
6592 "arp.op = 1; " /* ARP request */
6593 "output; "
6594 "};");
6595 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100,
6596 "eth.dst == 00:00:00:00:00:00",
6597 "nd_ns { "
6598 "nd.target = xxreg0; "
6599 "output; "
6600 "};");
6601 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1", "output;");
6602 }
6603
6604 /* Logical router egress table 1: Delivery (priority 100).
6605 *
6606 * Priority 100 rules deliver packets to enabled logical ports. */
6607 HMAP_FOR_EACH (op, key_node, ports) {
6608 if (!op->nbrp) {
6609 continue;
6610 }
6611
6612 if (!lrport_is_enabled(op->nbrp)) {
6613 /* Drop packets to disabled logical ports (since logical flow
6614 * tables are default-drop). */
6615 continue;
6616 }
6617
6618 if (op->derived) {
6619 /* No egress packets should be processed in the context of
6620 * a chassisredirect port. The chassisredirect port should
6621 * be replaced by the l3dgw port in the local output
6622 * pipeline stage before egress processing. */
6623 continue;
6624 }
6625
6626 ds_clear(&match);
6627 ds_put_format(&match, "outport == %s", op->json_key);
6628 ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100,
6629 ds_cstr(&match), "output;");
6630 }
6631
6632 ds_destroy(&match);
6633 ds_destroy(&actions);
6634 }
6635
6636 /* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
6637 * constructing their contents based on the OVN_NB database. */
6638 static void
6639 build_lflows(struct northd_context *ctx, struct hmap *datapaths,
6640 struct hmap *ports, struct hmap *port_groups)
6641 {
6642 struct hmap lflows = HMAP_INITIALIZER(&lflows);
6643 struct hmap mcgroups = HMAP_INITIALIZER(&mcgroups);
6644
6645 build_lswitch_flows(datapaths, ports, port_groups, &lflows, &mcgroups);
6646 build_lrouter_flows(datapaths, ports, &lflows);
6647
6648 /* Push changes to the Logical_Flow table to database. */
6649 const struct sbrec_logical_flow *sbflow, *next_sbflow;
6650 SBREC_LOGICAL_FLOW_FOR_EACH_SAFE (sbflow, next_sbflow, ctx->ovnsb_idl) {
6651 struct ovn_datapath *od
6652 = ovn_datapath_from_sbrec(datapaths, sbflow->logical_datapath);
6653 if (!od) {
6654 sbrec_logical_flow_delete(sbflow);
6655 continue;
6656 }
6657
6658 enum ovn_datapath_type dp_type = od->nbs ? DP_SWITCH : DP_ROUTER;
6659 enum ovn_pipeline pipeline
6660 = !strcmp(sbflow->pipeline, "ingress") ? P_IN : P_OUT;
6661 struct ovn_lflow *lflow = ovn_lflow_find(
6662 &lflows, od, ovn_stage_build(dp_type, pipeline, sbflow->table_id),
6663 sbflow->priority, sbflow->match, sbflow->actions, sbflow->hash);
6664 if (lflow) {
6665 ovn_lflow_destroy(&lflows, lflow);
6666 } else {
6667 sbrec_logical_flow_delete(sbflow);
6668 }
6669 }
6670 struct ovn_lflow *lflow, *next_lflow;
6671 HMAP_FOR_EACH_SAFE (lflow, next_lflow, hmap_node, &lflows) {
6672 const char *pipeline = ovn_stage_get_pipeline_name(lflow->stage);
6673 uint8_t table = ovn_stage_get_table(lflow->stage);
6674
6675 sbflow = sbrec_logical_flow_insert(ctx->ovnsb_txn);
6676 sbrec_logical_flow_set_logical_datapath(sbflow, lflow->od->sb);
6677 sbrec_logical_flow_set_pipeline(sbflow, pipeline);
6678 sbrec_logical_flow_set_table_id(sbflow, table);
6679 sbrec_logical_flow_set_priority(sbflow, lflow->priority);
6680 sbrec_logical_flow_set_match(sbflow, lflow->match);
6681 sbrec_logical_flow_set_actions(sbflow, lflow->actions);
6682
6683 /* Trim the source locator lflow->where, which looks something like
6684 * "ovn/northd/ovn-northd.c:1234", down to just the part following the
6685 * last slash, e.g. "ovn-northd.c:1234". */
6686 const char *slash = strrchr(lflow->where, '/');
6687 #if _WIN32
6688 const char *backslash = strrchr(lflow->where, '\\');
6689 if (!slash || backslash > slash) {
6690 slash = backslash;
6691 }
6692 #endif
6693 const char *where = slash ? slash + 1 : lflow->where;
6694
6695 struct smap ids = SMAP_INITIALIZER(&ids);
6696 smap_add(&ids, "stage-name", ovn_stage_to_str(lflow->stage));
6697 smap_add(&ids, "source", where);
6698 if (lflow->stage_hint) {
6699 smap_add(&ids, "stage-hint", lflow->stage_hint);
6700 }
6701 sbrec_logical_flow_set_external_ids(sbflow, &ids);
6702 smap_destroy(&ids);
6703
6704 ovn_lflow_destroy(&lflows, lflow);
6705 }
6706 hmap_destroy(&lflows);
6707
6708 /* Push changes to the Multicast_Group table to database. */
6709 const struct sbrec_multicast_group *sbmc, *next_sbmc;
6710 SBREC_MULTICAST_GROUP_FOR_EACH_SAFE (sbmc, next_sbmc, ctx->ovnsb_idl) {
6711 struct ovn_datapath *od = ovn_datapath_from_sbrec(datapaths,
6712 sbmc->datapath);
6713 if (!od) {
6714 sbrec_multicast_group_delete(sbmc);
6715 continue;
6716 }
6717
6718 struct multicast_group group = { .name = sbmc->name,
6719 .key = sbmc->tunnel_key };
6720 struct ovn_multicast *mc = ovn_multicast_find(&mcgroups, od, &group);
6721 if (mc) {
6722 ovn_multicast_update_sbrec(mc, sbmc);
6723 ovn_multicast_destroy(&mcgroups, mc);
6724 } else {
6725 sbrec_multicast_group_delete(sbmc);
6726 }
6727 }
6728 struct ovn_multicast *mc, *next_mc;
6729 HMAP_FOR_EACH_SAFE (mc, next_mc, hmap_node, &mcgroups) {
6730 sbmc = sbrec_multicast_group_insert(ctx->ovnsb_txn);
6731 sbrec_multicast_group_set_datapath(sbmc, mc->datapath->sb);
6732 sbrec_multicast_group_set_name(sbmc, mc->group->name);
6733 sbrec_multicast_group_set_tunnel_key(sbmc, mc->group->key);
6734 ovn_multicast_update_sbrec(mc, sbmc);
6735 ovn_multicast_destroy(&mcgroups, mc);
6736 }
6737 hmap_destroy(&mcgroups);
6738 }
6739
6740 static void
6741 sync_address_set(struct northd_context *ctx, const char *name,
6742 const char **addrs, size_t n_addrs,
6743 struct shash *sb_address_sets)
6744 {
6745 const struct sbrec_address_set *sb_address_set;
6746 sb_address_set = shash_find_and_delete(sb_address_sets,
6747 name);
6748 if (!sb_address_set) {
6749 sb_address_set = sbrec_address_set_insert(ctx->ovnsb_txn);
6750 sbrec_address_set_set_name(sb_address_set, name);
6751 }
6752
6753 sbrec_address_set_set_addresses(sb_address_set,
6754 addrs, n_addrs);
6755 }
6756
6757 /* Go through 'addresses' and add found IPv4 addresses to 'ipv4_addrs' and IPv6
6758 * addresses to 'ipv6_addrs'.
6759 */
6760 static void
6761 split_addresses(const char *addresses, struct svec *ipv4_addrs,
6762 struct svec *ipv6_addrs)
6763 {
6764 struct lport_addresses laddrs;
6765 extract_lsp_addresses(addresses, &laddrs);
6766 for (size_t k = 0; k < laddrs.n_ipv4_addrs; k++) {
6767 svec_add(ipv4_addrs, laddrs.ipv4_addrs[k].addr_s);
6768 }
6769 for (size_t k = 0; k < laddrs.n_ipv6_addrs; k++) {
6770 svec_add(ipv6_addrs, laddrs.ipv6_addrs[k].addr_s);
6771 }
6772 destroy_lport_addresses(&laddrs);
6773 }
6774
6775 /* OVN_Southbound Address_Set table contains same records as in north
6776 * bound, plus the records generated from Port_Group table in north bound.
6777 *
6778 * There are 2 records generated from each port group, one for IPv4, and
6779 * one for IPv6, named in the format: <port group name>_ip4 and
6780 * <port group name>_ip6 respectively. MAC addresses are ignored.
6781 *
6782 * We always update OVN_Southbound to match the Address_Set and Port_Group
6783 * in OVN_Northbound, so that the address sets used in Logical_Flows in
6784 * OVN_Southbound is checked against the proper set.*/
6785 static void
6786 sync_address_sets(struct northd_context *ctx)
6787 {
6788 struct shash sb_address_sets = SHASH_INITIALIZER(&sb_address_sets);
6789
6790 const struct sbrec_address_set *sb_address_set;
6791 SBREC_ADDRESS_SET_FOR_EACH (sb_address_set, ctx->ovnsb_idl) {
6792 shash_add(&sb_address_sets, sb_address_set->name, sb_address_set);
6793 }
6794
6795 /* sync port group generated address sets first */
6796 const struct nbrec_port_group *nb_port_group;
6797 NBREC_PORT_GROUP_FOR_EACH (nb_port_group, ctx->ovnnb_idl) {
6798 struct svec ipv4_addrs = SVEC_EMPTY_INITIALIZER;
6799 struct svec ipv6_addrs = SVEC_EMPTY_INITIALIZER;
6800 for (size_t i = 0; i < nb_port_group->n_ports; i++) {
6801 for (size_t j = 0; j < nb_port_group->ports[i]->n_addresses; j++) {
6802 const char *addrs = nb_port_group->ports[i]->addresses[j];
6803 if (!is_dynamic_lsp_address(addrs)) {
6804 split_addresses(addrs, &ipv4_addrs, &ipv6_addrs);
6805 }
6806 }
6807 if (nb_port_group->ports[i]->dynamic_addresses) {
6808 split_addresses(nb_port_group->ports[i]->dynamic_addresses,
6809 &ipv4_addrs, &ipv6_addrs);
6810 }
6811 }
6812 char *ipv4_addrs_name = xasprintf("%s_ip4", nb_port_group->name);
6813 char *ipv6_addrs_name = xasprintf("%s_ip6", nb_port_group->name);
6814 sync_address_set(ctx, ipv4_addrs_name,
6815 /* "char **" is not compatible with "const char **" */
6816 (const char **)ipv4_addrs.names,
6817 ipv4_addrs.n, &sb_address_sets);
6818 sync_address_set(ctx, ipv6_addrs_name,
6819 /* "char **" is not compatible with "const char **" */
6820 (const char **)ipv6_addrs.names,
6821 ipv6_addrs.n, &sb_address_sets);
6822 free(ipv4_addrs_name);
6823 free(ipv6_addrs_name);
6824 svec_destroy(&ipv4_addrs);
6825 svec_destroy(&ipv6_addrs);
6826 }
6827
6828 /* sync user defined address sets, which may overwrite port group
6829 * generated address sets if same name is used */
6830 const struct nbrec_address_set *nb_address_set;
6831 NBREC_ADDRESS_SET_FOR_EACH (nb_address_set, ctx->ovnnb_idl) {
6832 sync_address_set(ctx, nb_address_set->name,
6833 /* "char **" is not compatible with "const char **" */
6834 (const char **)nb_address_set->addresses,
6835 nb_address_set->n_addresses, &sb_address_sets);
6836 }
6837
6838 struct shash_node *node, *next;
6839 SHASH_FOR_EACH_SAFE (node, next, &sb_address_sets) {
6840 sbrec_address_set_delete(node->data);
6841 shash_delete(&sb_address_sets, node);
6842 }
6843 shash_destroy(&sb_address_sets);
6844 }
6845
6846 /* Each port group in Port_Group table in OVN_Northbound has a corresponding
6847 * entry in Port_Group table in OVN_Southbound. In OVN_Northbound the entries
6848 * contains lport uuids, while in OVN_Southbound we store the lport names.
6849 */
6850 static void
6851 sync_port_groups(struct northd_context *ctx)
6852 {
6853 struct shash sb_port_groups = SHASH_INITIALIZER(&sb_port_groups);
6854
6855 const struct sbrec_port_group *sb_port_group;
6856 SBREC_PORT_GROUP_FOR_EACH (sb_port_group, ctx->ovnsb_idl) {
6857 shash_add(&sb_port_groups, sb_port_group->name, sb_port_group);
6858 }
6859
6860 const struct nbrec_port_group *nb_port_group;
6861 NBREC_PORT_GROUP_FOR_EACH (nb_port_group, ctx->ovnnb_idl) {
6862 sb_port_group = shash_find_and_delete(&sb_port_groups,
6863 nb_port_group->name);
6864 if (!sb_port_group) {
6865 sb_port_group = sbrec_port_group_insert(ctx->ovnsb_txn);
6866 sbrec_port_group_set_name(sb_port_group, nb_port_group->name);
6867 }
6868
6869 const char **nb_port_names = xcalloc(nb_port_group->n_ports,
6870 sizeof *nb_port_names);
6871 int i;
6872 for (i = 0; i < nb_port_group->n_ports; i++) {
6873 nb_port_names[i] = nb_port_group->ports[i]->name;
6874 }
6875 sbrec_port_group_set_ports(sb_port_group,
6876 nb_port_names,
6877 nb_port_group->n_ports);
6878 free(nb_port_names);
6879 }
6880
6881 struct shash_node *node, *next;
6882 SHASH_FOR_EACH_SAFE (node, next, &sb_port_groups) {
6883 sbrec_port_group_delete(node->data);
6884 shash_delete(&sb_port_groups, node);
6885 }
6886 shash_destroy(&sb_port_groups);
6887 }
6888
6889 struct band_entry {
6890 int64_t rate;
6891 int64_t burst_size;
6892 const char *action;
6893 };
6894
6895 static int
6896 band_cmp(const void *band1_, const void *band2_)
6897 {
6898 const struct band_entry *band1p = band1_;
6899 const struct band_entry *band2p = band2_;
6900
6901 if (band1p->rate != band2p->rate) {
6902 return band1p->rate > band2p->rate ? -1 : 1;
6903 } else if (band1p->burst_size != band2p->burst_size) {
6904 return band1p->burst_size > band2p->burst_size ? -1 : 1;
6905 } else {
6906 return strcmp(band1p->action, band2p->action);
6907 }
6908 }
6909
6910 static bool
6911 bands_need_update(const struct nbrec_meter *nb_meter,
6912 const struct sbrec_meter *sb_meter)
6913 {
6914 if (nb_meter->n_bands != sb_meter->n_bands) {
6915 return true;
6916 }
6917
6918 /* A single band is the most common scenario, so speed up that
6919 * check. */
6920 if (nb_meter->n_bands == 1) {
6921 struct nbrec_meter_band *nb_band = nb_meter->bands[0];
6922 struct sbrec_meter_band *sb_band = sb_meter->bands[0];
6923
6924 return !(nb_band->rate == sb_band->rate
6925 && nb_band->burst_size == sb_band->burst_size
6926 && !strcmp(sb_band->action, nb_band->action));
6927 }
6928
6929 /* Place the Northbound entries in sorted order. */
6930 struct band_entry *nb_bands;
6931 nb_bands = xmalloc(sizeof *nb_bands * nb_meter->n_bands);
6932 for (size_t i = 0; i < nb_meter->n_bands; i++) {
6933 struct nbrec_meter_band *nb_band = nb_meter->bands[i];
6934
6935 nb_bands[i].rate = nb_band->rate;
6936 nb_bands[i].burst_size = nb_band->burst_size;
6937 nb_bands[i].action = nb_band->action;
6938 }
6939 qsort(nb_bands, nb_meter->n_bands, sizeof *nb_bands, band_cmp);
6940
6941 /* Place the Southbound entries in sorted order. */
6942 struct band_entry *sb_bands;
6943 sb_bands = xmalloc(sizeof *sb_bands * sb_meter->n_bands);
6944 for (size_t i = 0; i < sb_meter->n_bands; i++) {
6945 struct sbrec_meter_band *sb_band = sb_meter->bands[i];
6946
6947 sb_bands[i].rate = sb_band->rate;
6948 sb_bands[i].burst_size = sb_band->burst_size;
6949 sb_bands[i].action = sb_band->action;
6950 }
6951 qsort(sb_bands, sb_meter->n_bands, sizeof *sb_bands, band_cmp);
6952
6953 bool need_update = false;
6954 for (size_t i = 0; i < nb_meter->n_bands; i++) {
6955 if (nb_bands[i].rate != sb_bands[i].rate
6956 || nb_bands[i].burst_size != sb_bands[i].burst_size
6957 || strcmp(nb_bands[i].action, nb_bands[i].action)) {
6958 need_update = true;
6959 goto done;
6960 }
6961 }
6962
6963 done:
6964 free(nb_bands);
6965 free(sb_bands);
6966
6967 return need_update;
6968 }
6969
6970 /* Each entry in the Meter and Meter_Band tables in OVN_Northbound have
6971 * a corresponding entries in the Meter and Meter_Band tables in
6972 * OVN_Southbound.
6973 */
6974 static void
6975 sync_meters(struct northd_context *ctx)
6976 {
6977 struct shash sb_meters = SHASH_INITIALIZER(&sb_meters);
6978
6979 const struct sbrec_meter *sb_meter;
6980 SBREC_METER_FOR_EACH (sb_meter, ctx->ovnsb_idl) {
6981 shash_add(&sb_meters, sb_meter->name, sb_meter);
6982 }
6983
6984 const struct nbrec_meter *nb_meter;
6985 NBREC_METER_FOR_EACH (nb_meter, ctx->ovnnb_idl) {
6986 bool new_sb_meter = false;
6987
6988 sb_meter = shash_find_and_delete(&sb_meters, nb_meter->name);
6989 if (!sb_meter) {
6990 sb_meter = sbrec_meter_insert(ctx->ovnsb_txn);
6991 sbrec_meter_set_name(sb_meter, nb_meter->name);
6992 new_sb_meter = true;
6993 }
6994
6995 if (new_sb_meter || bands_need_update(nb_meter, sb_meter)) {
6996 struct sbrec_meter_band **sb_bands;
6997 sb_bands = xcalloc(nb_meter->n_bands, sizeof *sb_bands);
6998 for (size_t i = 0; i < nb_meter->n_bands; i++) {
6999 const struct nbrec_meter_band *nb_band = nb_meter->bands[i];
7000
7001 sb_bands[i] = sbrec_meter_band_insert(ctx->ovnsb_txn);
7002
7003 sbrec_meter_band_set_action(sb_bands[i], nb_band->action);
7004 sbrec_meter_band_set_rate(sb_bands[i], nb_band->rate);
7005 sbrec_meter_band_set_burst_size(sb_bands[i],
7006 nb_band->burst_size);
7007 }
7008 sbrec_meter_set_bands(sb_meter, sb_bands, nb_meter->n_bands);
7009 free(sb_bands);
7010 }
7011
7012 sbrec_meter_set_unit(sb_meter, nb_meter->unit);
7013 }
7014
7015 struct shash_node *node, *next;
7016 SHASH_FOR_EACH_SAFE (node, next, &sb_meters) {
7017 sbrec_meter_delete(node->data);
7018 shash_delete(&sb_meters, node);
7019 }
7020 shash_destroy(&sb_meters);
7021 }
7022
7023 /*
7024 * struct 'dns_info' is used to sync the DNS records between OVN Northbound db
7025 * and Southbound db.
7026 */
7027 struct dns_info {
7028 struct hmap_node hmap_node;
7029 const struct nbrec_dns *nb_dns; /* DNS record in the Northbound db. */
7030 const struct sbrec_dns *sb_dns; /* DNS record in the Soutbound db. */
7031
7032 /* Datapaths to which the DNS entry is associated with it. */
7033 const struct sbrec_datapath_binding **sbs;
7034 size_t n_sbs;
7035 };
7036
7037 static inline struct dns_info *
7038 get_dns_info_from_hmap(struct hmap *dns_map, struct uuid *uuid)
7039 {
7040 struct dns_info *dns_info;
7041 size_t hash = uuid_hash(uuid);
7042 HMAP_FOR_EACH_WITH_HASH (dns_info, hmap_node, hash, dns_map) {
7043 if (uuid_equals(&dns_info->nb_dns->header_.uuid, uuid)) {
7044 return dns_info;
7045 }
7046 }
7047
7048 return NULL;
7049 }
7050
7051 static void
7052 sync_dns_entries(struct northd_context *ctx, struct hmap *datapaths)
7053 {
7054 struct hmap dns_map = HMAP_INITIALIZER(&dns_map);
7055 struct ovn_datapath *od;
7056 HMAP_FOR_EACH (od, key_node, datapaths) {
7057 if (!od->nbs || !od->nbs->n_dns_records) {
7058 continue;
7059 }
7060
7061 for (size_t i = 0; i < od->nbs->n_dns_records; i++) {
7062 struct dns_info *dns_info = get_dns_info_from_hmap(
7063 &dns_map, &od->nbs->dns_records[i]->header_.uuid);
7064 if (!dns_info) {
7065 size_t hash = uuid_hash(
7066 &od->nbs->dns_records[i]->header_.uuid);
7067 dns_info = xzalloc(sizeof *dns_info);;
7068 dns_info->nb_dns = od->nbs->dns_records[i];
7069 hmap_insert(&dns_map, &dns_info->hmap_node, hash);
7070 }
7071
7072 dns_info->n_sbs++;
7073 dns_info->sbs = xrealloc(dns_info->sbs,
7074 dns_info->n_sbs * sizeof *dns_info->sbs);
7075 dns_info->sbs[dns_info->n_sbs - 1] = od->sb;
7076 }
7077 }
7078
7079 const struct sbrec_dns *sbrec_dns, *next;
7080 SBREC_DNS_FOR_EACH_SAFE (sbrec_dns, next, ctx->ovnsb_idl) {
7081 const char *nb_dns_uuid = smap_get(&sbrec_dns->external_ids, "dns_id");
7082 struct uuid dns_uuid;
7083 if (!nb_dns_uuid || !uuid_from_string(&dns_uuid, nb_dns_uuid)) {
7084 sbrec_dns_delete(sbrec_dns);
7085 continue;
7086 }
7087
7088 struct dns_info *dns_info =
7089 get_dns_info_from_hmap(&dns_map, &dns_uuid);
7090 if (dns_info) {
7091 dns_info->sb_dns = sbrec_dns;
7092 } else {
7093 sbrec_dns_delete(sbrec_dns);
7094 }
7095 }
7096
7097 struct dns_info *dns_info;
7098 HMAP_FOR_EACH_POP (dns_info, hmap_node, &dns_map) {
7099 if (!dns_info->sb_dns) {
7100 sbrec_dns = sbrec_dns_insert(ctx->ovnsb_txn);
7101 dns_info->sb_dns = sbrec_dns;
7102 char *dns_id = xasprintf(
7103 UUID_FMT, UUID_ARGS(&dns_info->nb_dns->header_.uuid));
7104 const struct smap external_ids =
7105 SMAP_CONST1(&external_ids, "dns_id", dns_id);
7106 sbrec_dns_set_external_ids(sbrec_dns, &external_ids);
7107 free(dns_id);
7108 }
7109
7110 /* Set the datapaths and records. If nothing has changed, then
7111 * this will be a no-op.
7112 */
7113 sbrec_dns_set_datapaths(
7114 dns_info->sb_dns,
7115 (struct sbrec_datapath_binding **)dns_info->sbs,
7116 dns_info->n_sbs);
7117 sbrec_dns_set_records(dns_info->sb_dns, &dns_info->nb_dns->records);
7118 free(dns_info->sbs);
7119 free(dns_info);
7120 }
7121 hmap_destroy(&dns_map);
7122 }
7123
7124
7125 \f
7126 static void
7127 ovnnb_db_run(struct northd_context *ctx,
7128 struct ovsdb_idl_index *sbrec_chassis_by_name,
7129 struct ovsdb_idl_loop *sb_loop)
7130 {
7131 if (!ctx->ovnsb_txn || !ctx->ovnnb_txn) {
7132 return;
7133 }
7134 struct hmap datapaths, ports, port_groups;
7135 build_datapaths(ctx, &datapaths);
7136 build_ports(ctx, sbrec_chassis_by_name, &datapaths, &ports);
7137 build_ipam(&datapaths, &ports);
7138 build_port_group_lswitches(ctx, &port_groups, &ports);
7139 build_lflows(ctx, &datapaths, &ports, &port_groups);
7140
7141 sync_address_sets(ctx);
7142 sync_port_groups(ctx);
7143 sync_meters(ctx);
7144 sync_dns_entries(ctx, &datapaths);
7145
7146 struct ovn_port_group *pg, *next_pg;
7147 HMAP_FOR_EACH_SAFE (pg, next_pg, key_node, &port_groups) {
7148 ovn_port_group_destroy(&port_groups, pg);
7149 }
7150 hmap_destroy(&port_groups);
7151
7152 struct ovn_datapath *dp, *next_dp;
7153 HMAP_FOR_EACH_SAFE (dp, next_dp, key_node, &datapaths) {
7154 ovn_datapath_destroy(&datapaths, dp);
7155 }
7156 hmap_destroy(&datapaths);
7157
7158 struct ovn_port *port, *next_port;
7159 HMAP_FOR_EACH_SAFE (port, next_port, key_node, &ports) {
7160 ovn_port_destroy(&ports, port);
7161 }
7162 hmap_destroy(&ports);
7163
7164 /* Copy nb_cfg from northbound to southbound database.
7165 *
7166 * Also set up to update sb_cfg once our southbound transaction commits. */
7167 const struct nbrec_nb_global *nb = nbrec_nb_global_first(ctx->ovnnb_idl);
7168 if (!nb) {
7169 nb = nbrec_nb_global_insert(ctx->ovnnb_txn);
7170 }
7171 const struct sbrec_sb_global *sb = sbrec_sb_global_first(ctx->ovnsb_idl);
7172 if (!sb) {
7173 sb = sbrec_sb_global_insert(ctx->ovnsb_txn);
7174 }
7175 sbrec_sb_global_set_nb_cfg(sb, nb->nb_cfg);
7176 sbrec_sb_global_set_options(sb, &nb->options);
7177 sb_loop->next_cfg = nb->nb_cfg;
7178
7179 const char *mac_addr_prefix = smap_get(&nb->options, "mac_prefix");
7180 if (mac_addr_prefix) {
7181 struct eth_addr addr;
7182
7183 memset(&addr, 0, sizeof addr);
7184 if (ovs_scan(mac_addr_prefix, "%"SCNx8":%"SCNx8":%"SCNx8,
7185 &addr.ea[0], &addr.ea[1], &addr.ea[2])) {
7186 mac_prefix = addr;
7187 }
7188 }
7189
7190 cleanup_macam(&macam);
7191 }
7192
7193 /* Handle changes to the 'chassis' column of the 'Port_Binding' table. When
7194 * this column is not empty, it means we need to set the corresponding logical
7195 * port as 'up' in the northbound DB. */
7196 static void
7197 update_logical_port_status(struct northd_context *ctx)
7198 {
7199 struct hmap lports_hmap;
7200 const struct sbrec_port_binding *sb;
7201 const struct nbrec_logical_switch_port *nbsp;
7202
7203 struct lport_hash_node {
7204 struct hmap_node node;
7205 const struct nbrec_logical_switch_port *nbsp;
7206 } *hash_node;
7207
7208 hmap_init(&lports_hmap);
7209
7210 NBREC_LOGICAL_SWITCH_PORT_FOR_EACH(nbsp, ctx->ovnnb_idl) {
7211 hash_node = xzalloc(sizeof *hash_node);
7212 hash_node->nbsp = nbsp;
7213 hmap_insert(&lports_hmap, &hash_node->node, hash_string(nbsp->name, 0));
7214 }
7215
7216 SBREC_PORT_BINDING_FOR_EACH(sb, ctx->ovnsb_idl) {
7217 nbsp = NULL;
7218 HMAP_FOR_EACH_WITH_HASH(hash_node, node,
7219 hash_string(sb->logical_port, 0),
7220 &lports_hmap) {
7221 if (!strcmp(sb->logical_port, hash_node->nbsp->name)) {
7222 nbsp = hash_node->nbsp;
7223 break;
7224 }
7225 }
7226
7227 if (!nbsp) {
7228 /* The logical port doesn't exist for this port binding. This can
7229 * happen under normal circumstances when ovn-northd hasn't gotten
7230 * around to pruning the Port_Binding yet. */
7231 continue;
7232 }
7233
7234 bool up = (sb->chassis || !strcmp(nbsp->type, "router"));
7235 if (!nbsp->up || *nbsp->up != up) {
7236 nbrec_logical_switch_port_set_up(nbsp, &up, 1);
7237 }
7238 }
7239
7240 HMAP_FOR_EACH_POP(hash_node, node, &lports_hmap) {
7241 free(hash_node);
7242 }
7243 hmap_destroy(&lports_hmap);
7244 }
7245
7246 static struct gen_opts_map supported_dhcp_opts[] = {
7247 OFFERIP,
7248 DHCP_OPT_NETMASK,
7249 DHCP_OPT_ROUTER,
7250 DHCP_OPT_DNS_SERVER,
7251 DHCP_OPT_LOG_SERVER,
7252 DHCP_OPT_LPR_SERVER,
7253 DHCP_OPT_SWAP_SERVER,
7254 DHCP_OPT_POLICY_FILTER,
7255 DHCP_OPT_ROUTER_SOLICITATION,
7256 DHCP_OPT_NIS_SERVER,
7257 DHCP_OPT_NTP_SERVER,
7258 DHCP_OPT_SERVER_ID,
7259 DHCP_OPT_TFTP_SERVER,
7260 DHCP_OPT_CLASSLESS_STATIC_ROUTE,
7261 DHCP_OPT_MS_CLASSLESS_STATIC_ROUTE,
7262 DHCP_OPT_IP_FORWARD_ENABLE,
7263 DHCP_OPT_ROUTER_DISCOVERY,
7264 DHCP_OPT_ETHERNET_ENCAP,
7265 DHCP_OPT_DEFAULT_TTL,
7266 DHCP_OPT_TCP_TTL,
7267 DHCP_OPT_MTU,
7268 DHCP_OPT_LEASE_TIME,
7269 DHCP_OPT_T1,
7270 DHCP_OPT_T2,
7271 DHCP_OPT_WPAD,
7272 };
7273
7274 static struct gen_opts_map supported_dhcpv6_opts[] = {
7275 DHCPV6_OPT_IA_ADDR,
7276 DHCPV6_OPT_SERVER_ID,
7277 DHCPV6_OPT_DOMAIN_SEARCH,
7278 DHCPV6_OPT_DNS_SERVER
7279 };
7280
7281 static void
7282 check_and_add_supported_dhcp_opts_to_sb_db(struct northd_context *ctx)
7283 {
7284 struct hmap dhcp_opts_to_add = HMAP_INITIALIZER(&dhcp_opts_to_add);
7285 for (size_t i = 0; (i < sizeof(supported_dhcp_opts) /
7286 sizeof(supported_dhcp_opts[0])); i++) {
7287 hmap_insert(&dhcp_opts_to_add, &supported_dhcp_opts[i].hmap_node,
7288 dhcp_opt_hash(supported_dhcp_opts[i].name));
7289 }
7290
7291 const struct sbrec_dhcp_options *opt_row, *opt_row_next;
7292 SBREC_DHCP_OPTIONS_FOR_EACH_SAFE(opt_row, opt_row_next, ctx->ovnsb_idl) {
7293 struct gen_opts_map *dhcp_opt =
7294 dhcp_opts_find(&dhcp_opts_to_add, opt_row->name);
7295 if (dhcp_opt) {
7296 hmap_remove(&dhcp_opts_to_add, &dhcp_opt->hmap_node);
7297 } else {
7298 sbrec_dhcp_options_delete(opt_row);
7299 }
7300 }
7301
7302 struct gen_opts_map *opt;
7303 HMAP_FOR_EACH (opt, hmap_node, &dhcp_opts_to_add) {
7304 struct sbrec_dhcp_options *sbrec_dhcp_option =
7305 sbrec_dhcp_options_insert(ctx->ovnsb_txn);
7306 sbrec_dhcp_options_set_name(sbrec_dhcp_option, opt->name);
7307 sbrec_dhcp_options_set_code(sbrec_dhcp_option, opt->code);
7308 sbrec_dhcp_options_set_type(sbrec_dhcp_option, opt->type);
7309 }
7310
7311 hmap_destroy(&dhcp_opts_to_add);
7312 }
7313
7314 static void
7315 check_and_add_supported_dhcpv6_opts_to_sb_db(struct northd_context *ctx)
7316 {
7317 struct hmap dhcpv6_opts_to_add = HMAP_INITIALIZER(&dhcpv6_opts_to_add);
7318 for (size_t i = 0; (i < sizeof(supported_dhcpv6_opts) /
7319 sizeof(supported_dhcpv6_opts[0])); i++) {
7320 hmap_insert(&dhcpv6_opts_to_add, &supported_dhcpv6_opts[i].hmap_node,
7321 dhcp_opt_hash(supported_dhcpv6_opts[i].name));
7322 }
7323
7324 const struct sbrec_dhcpv6_options *opt_row, *opt_row_next;
7325 SBREC_DHCPV6_OPTIONS_FOR_EACH_SAFE(opt_row, opt_row_next, ctx->ovnsb_idl) {
7326 struct gen_opts_map *dhcp_opt =
7327 dhcp_opts_find(&dhcpv6_opts_to_add, opt_row->name);
7328 if (dhcp_opt) {
7329 hmap_remove(&dhcpv6_opts_to_add, &dhcp_opt->hmap_node);
7330 } else {
7331 sbrec_dhcpv6_options_delete(opt_row);
7332 }
7333 }
7334
7335 struct gen_opts_map *opt;
7336 HMAP_FOR_EACH(opt, hmap_node, &dhcpv6_opts_to_add) {
7337 struct sbrec_dhcpv6_options *sbrec_dhcpv6_option =
7338 sbrec_dhcpv6_options_insert(ctx->ovnsb_txn);
7339 sbrec_dhcpv6_options_set_name(sbrec_dhcpv6_option, opt->name);
7340 sbrec_dhcpv6_options_set_code(sbrec_dhcpv6_option, opt->code);
7341 sbrec_dhcpv6_options_set_type(sbrec_dhcpv6_option, opt->type);
7342 }
7343
7344 hmap_destroy(&dhcpv6_opts_to_add);
7345 }
7346
7347 static const char *rbac_chassis_auth[] =
7348 {"name"};
7349 static const char *rbac_chassis_update[] =
7350 {"nb_cfg", "external_ids", "encaps", "vtep_logical_switches"};
7351
7352 static const char *rbac_encap_auth[] =
7353 {"chassis_name"};
7354 static const char *rbac_encap_update[] =
7355 {"type", "options", "ip"};
7356
7357 static const char *rbac_port_binding_auth[] =
7358 {""};
7359 static const char *rbac_port_binding_update[] =
7360 {"chassis"};
7361
7362 static const char *rbac_mac_binding_auth[] =
7363 {""};
7364 static const char *rbac_mac_binding_update[] =
7365 {"logical_port", "ip", "mac", "datapath"};
7366
7367 static struct rbac_perm_cfg {
7368 const char *table;
7369 const char **auth;
7370 int n_auth;
7371 bool insdel;
7372 const char **update;
7373 int n_update;
7374 const struct sbrec_rbac_permission *row;
7375 } rbac_perm_cfg[] = {
7376 {
7377 .table = "Chassis",
7378 .auth = rbac_chassis_auth,
7379 .n_auth = ARRAY_SIZE(rbac_chassis_auth),
7380 .insdel = true,
7381 .update = rbac_chassis_update,
7382 .n_update = ARRAY_SIZE(rbac_chassis_update),
7383 .row = NULL
7384 },{
7385 .table = "Encap",
7386 .auth = rbac_encap_auth,
7387 .n_auth = ARRAY_SIZE(rbac_encap_auth),
7388 .insdel = true,
7389 .update = rbac_encap_update,
7390 .n_update = ARRAY_SIZE(rbac_encap_update),
7391 .row = NULL
7392 },{
7393 .table = "Port_Binding",
7394 .auth = rbac_port_binding_auth,
7395 .n_auth = ARRAY_SIZE(rbac_port_binding_auth),
7396 .insdel = false,
7397 .update = rbac_port_binding_update,
7398 .n_update = ARRAY_SIZE(rbac_port_binding_update),
7399 .row = NULL
7400 },{
7401 .table = "MAC_Binding",
7402 .auth = rbac_mac_binding_auth,
7403 .n_auth = ARRAY_SIZE(rbac_mac_binding_auth),
7404 .insdel = true,
7405 .update = rbac_mac_binding_update,
7406 .n_update = ARRAY_SIZE(rbac_mac_binding_update),
7407 .row = NULL
7408 },{
7409 .table = NULL,
7410 .auth = NULL,
7411 .n_auth = 0,
7412 .insdel = false,
7413 .update = NULL,
7414 .n_update = 0,
7415 .row = NULL
7416 }
7417 };
7418
7419 static bool
7420 ovn_rbac_validate_perm(const struct sbrec_rbac_permission *perm)
7421 {
7422 struct rbac_perm_cfg *pcfg;
7423 int i, j, n_found;
7424
7425 for (pcfg = rbac_perm_cfg; pcfg->table; pcfg++) {
7426 if (!strcmp(perm->table, pcfg->table)) {
7427 break;
7428 }
7429 }
7430 if (!pcfg->table) {
7431 return false;
7432 }
7433 if (perm->n_authorization != pcfg->n_auth ||
7434 perm->n_update != pcfg->n_update) {
7435 return false;
7436 }
7437 if (perm->insert_delete != pcfg->insdel) {
7438 return false;
7439 }
7440 /* verify perm->authorization vs. pcfg->auth */
7441 n_found = 0;
7442 for (i = 0; i < pcfg->n_auth; i++) {
7443 for (j = 0; j < perm->n_authorization; j++) {
7444 if (!strcmp(pcfg->auth[i], perm->authorization[j])) {
7445 n_found++;
7446 break;
7447 }
7448 }
7449 }
7450 if (n_found != pcfg->n_auth) {
7451 return false;
7452 }
7453
7454 /* verify perm->update vs. pcfg->update */
7455 n_found = 0;
7456 for (i = 0; i < pcfg->n_update; i++) {
7457 for (j = 0; j < perm->n_update; j++) {
7458 if (!strcmp(pcfg->update[i], perm->update[j])) {
7459 n_found++;
7460 break;
7461 }
7462 }
7463 }
7464 if (n_found != pcfg->n_update) {
7465 return false;
7466 }
7467
7468 /* Success, db state matches expected state */
7469 pcfg->row = perm;
7470 return true;
7471 }
7472
7473 static void
7474 ovn_rbac_create_perm(struct rbac_perm_cfg *pcfg,
7475 struct northd_context *ctx,
7476 const struct sbrec_rbac_role *rbac_role)
7477 {
7478 struct sbrec_rbac_permission *rbac_perm;
7479
7480 rbac_perm = sbrec_rbac_permission_insert(ctx->ovnsb_txn);
7481 sbrec_rbac_permission_set_table(rbac_perm, pcfg->table);
7482 sbrec_rbac_permission_set_authorization(rbac_perm,
7483 pcfg->auth,
7484 pcfg->n_auth);
7485 sbrec_rbac_permission_set_insert_delete(rbac_perm, pcfg->insdel);
7486 sbrec_rbac_permission_set_update(rbac_perm,
7487 pcfg->update,
7488 pcfg->n_update);
7489 sbrec_rbac_role_update_permissions_setkey(rbac_role, pcfg->table,
7490 rbac_perm);
7491 }
7492
7493 static void
7494 check_and_update_rbac(struct northd_context *ctx)
7495 {
7496 const struct sbrec_rbac_role *rbac_role = NULL;
7497 const struct sbrec_rbac_permission *perm_row, *perm_next;
7498 const struct sbrec_rbac_role *role_row, *role_row_next;
7499 struct rbac_perm_cfg *pcfg;
7500
7501 for (pcfg = rbac_perm_cfg; pcfg->table; pcfg++) {
7502 pcfg->row = NULL;
7503 }
7504
7505 SBREC_RBAC_PERMISSION_FOR_EACH_SAFE (perm_row, perm_next, ctx->ovnsb_idl) {
7506 if (!ovn_rbac_validate_perm(perm_row)) {
7507 sbrec_rbac_permission_delete(perm_row);
7508 }
7509 }
7510 SBREC_RBAC_ROLE_FOR_EACH_SAFE (role_row, role_row_next, ctx->ovnsb_idl) {
7511 if (strcmp(role_row->name, "ovn-controller")) {
7512 sbrec_rbac_role_delete(role_row);
7513 } else {
7514 rbac_role = role_row;
7515 }
7516 }
7517
7518 if (!rbac_role) {
7519 rbac_role = sbrec_rbac_role_insert(ctx->ovnsb_txn);
7520 sbrec_rbac_role_set_name(rbac_role, "ovn-controller");
7521 }
7522
7523 for (pcfg = rbac_perm_cfg; pcfg->table; pcfg++) {
7524 if (!pcfg->row) {
7525 ovn_rbac_create_perm(pcfg, ctx, rbac_role);
7526 }
7527 }
7528 }
7529
7530 /* Updates the sb_cfg and hv_cfg columns in the northbound NB_Global table. */
7531 static void
7532 update_northbound_cfg(struct northd_context *ctx,
7533 struct ovsdb_idl_loop *sb_loop)
7534 {
7535 /* Update northbound sb_cfg if appropriate. */
7536 const struct nbrec_nb_global *nbg = nbrec_nb_global_first(ctx->ovnnb_idl);
7537 int64_t sb_cfg = sb_loop->cur_cfg;
7538 if (nbg && sb_cfg && nbg->sb_cfg != sb_cfg) {
7539 nbrec_nb_global_set_sb_cfg(nbg, sb_cfg);
7540 }
7541
7542 /* Update northbound hv_cfg if appropriate. */
7543 if (nbg) {
7544 /* Find minimum nb_cfg among all chassis. */
7545 const struct sbrec_chassis *chassis;
7546 int64_t hv_cfg = nbg->nb_cfg;
7547 SBREC_CHASSIS_FOR_EACH (chassis, ctx->ovnsb_idl) {
7548 if (chassis->nb_cfg < hv_cfg) {
7549 hv_cfg = chassis->nb_cfg;
7550 }
7551 }
7552
7553 /* Update hv_cfg. */
7554 if (nbg->hv_cfg != hv_cfg) {
7555 nbrec_nb_global_set_hv_cfg(nbg, hv_cfg);
7556 }
7557 }
7558 }
7559
7560 /* Handle a fairly small set of changes in the southbound database. */
7561 static void
7562 ovnsb_db_run(struct northd_context *ctx, struct ovsdb_idl_loop *sb_loop)
7563 {
7564 if (!ctx->ovnnb_txn || !ovsdb_idl_has_ever_connected(ctx->ovnsb_idl)) {
7565 return;
7566 }
7567
7568 update_logical_port_status(ctx);
7569 update_northbound_cfg(ctx, sb_loop);
7570 }
7571 \f
7572 static void
7573 parse_options(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
7574 {
7575 enum {
7576 DAEMON_OPTION_ENUMS,
7577 VLOG_OPTION_ENUMS,
7578 SSL_OPTION_ENUMS,
7579 };
7580 static const struct option long_options[] = {
7581 {"ovnsb-db", required_argument, NULL, 'd'},
7582 {"ovnnb-db", required_argument, NULL, 'D'},
7583 {"unixctl", required_argument, NULL, 'u'},
7584 {"help", no_argument, NULL, 'h'},
7585 {"options", no_argument, NULL, 'o'},
7586 {"version", no_argument, NULL, 'V'},
7587 DAEMON_LONG_OPTIONS,
7588 VLOG_LONG_OPTIONS,
7589 STREAM_SSL_LONG_OPTIONS,
7590 {NULL, 0, NULL, 0},
7591 };
7592 char *short_options = ovs_cmdl_long_options_to_short_options(long_options);
7593
7594 for (;;) {
7595 int c;
7596
7597 c = getopt_long(argc, argv, short_options, long_options, NULL);
7598 if (c == -1) {
7599 break;
7600 }
7601
7602 switch (c) {
7603 DAEMON_OPTION_HANDLERS;
7604 VLOG_OPTION_HANDLERS;
7605 STREAM_SSL_OPTION_HANDLERS;
7606
7607 case 'd':
7608 ovnsb_db = optarg;
7609 break;
7610
7611 case 'D':
7612 ovnnb_db = optarg;
7613 break;
7614
7615 case 'u':
7616 unixctl_path = optarg;
7617 break;
7618
7619 case 'h':
7620 usage();
7621 exit(EXIT_SUCCESS);
7622
7623 case 'o':
7624 ovs_cmdl_print_options(long_options);
7625 exit(EXIT_SUCCESS);
7626
7627 case 'V':
7628 ovs_print_version(0, 0);
7629 exit(EXIT_SUCCESS);
7630
7631 default:
7632 break;
7633 }
7634 }
7635
7636 if (!ovnsb_db) {
7637 ovnsb_db = default_sb_db();
7638 }
7639
7640 if (!ovnnb_db) {
7641 ovnnb_db = default_nb_db();
7642 }
7643
7644 free(short_options);
7645 }
7646
7647 static void
7648 add_column_noalert(struct ovsdb_idl *idl,
7649 const struct ovsdb_idl_column *column)
7650 {
7651 ovsdb_idl_add_column(idl, column);
7652 ovsdb_idl_omit_alert(idl, column);
7653 }
7654
7655 int
7656 main(int argc, char *argv[])
7657 {
7658 int res = EXIT_SUCCESS;
7659 struct unixctl_server *unixctl;
7660 int retval;
7661 bool exiting;
7662
7663 fatal_ignore_sigpipe();
7664 ovs_cmdl_proctitle_init(argc, argv);
7665 set_program_name(argv[0]);
7666 service_start(&argc, &argv);
7667 parse_options(argc, argv);
7668
7669 daemonize_start(false);
7670
7671 retval = unixctl_server_create(unixctl_path, &unixctl);
7672 if (retval) {
7673 exit(EXIT_FAILURE);
7674 }
7675 unixctl_command_register("exit", "", 0, 0, ovn_northd_exit, &exiting);
7676
7677 daemonize_complete();
7678
7679 /* We want to detect (almost) all changes to the ovn-nb db. */
7680 struct ovsdb_idl_loop ovnnb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
7681 ovsdb_idl_create(ovnnb_db, &nbrec_idl_class, true, true));
7682 ovsdb_idl_omit_alert(ovnnb_idl_loop.idl, &nbrec_nb_global_col_sb_cfg);
7683 ovsdb_idl_omit_alert(ovnnb_idl_loop.idl, &nbrec_nb_global_col_hv_cfg);
7684
7685 /* We want to detect only selected changes to the ovn-sb db. */
7686 struct ovsdb_idl_loop ovnsb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
7687 ovsdb_idl_create(ovnsb_db, &sbrec_idl_class, false, true));
7688
7689 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_sb_global);
7690 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_sb_global_col_nb_cfg);
7691 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_sb_global_col_options);
7692
7693 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_logical_flow);
7694 add_column_noalert(ovnsb_idl_loop.idl,
7695 &sbrec_logical_flow_col_logical_datapath);
7696 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_pipeline);
7697 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_table_id);
7698 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_priority);
7699 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_match);
7700 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_actions);
7701
7702 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_multicast_group);
7703 add_column_noalert(ovnsb_idl_loop.idl,
7704 &sbrec_multicast_group_col_datapath);
7705 add_column_noalert(ovnsb_idl_loop.idl,
7706 &sbrec_multicast_group_col_tunnel_key);
7707 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_name);
7708 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_ports);
7709
7710 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_datapath_binding);
7711 add_column_noalert(ovnsb_idl_loop.idl,
7712 &sbrec_datapath_binding_col_tunnel_key);
7713 add_column_noalert(ovnsb_idl_loop.idl,
7714 &sbrec_datapath_binding_col_external_ids);
7715
7716 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_port_binding);
7717 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_datapath);
7718 add_column_noalert(ovnsb_idl_loop.idl,
7719 &sbrec_port_binding_col_logical_port);
7720 add_column_noalert(ovnsb_idl_loop.idl,
7721 &sbrec_port_binding_col_tunnel_key);
7722 add_column_noalert(ovnsb_idl_loop.idl,
7723 &sbrec_port_binding_col_parent_port);
7724 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_tag);
7725 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_type);
7726 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_options);
7727 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_mac);
7728 add_column_noalert(ovnsb_idl_loop.idl,
7729 &sbrec_port_binding_col_nat_addresses);
7730 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_port_binding_col_chassis);
7731 ovsdb_idl_add_column(ovnsb_idl_loop.idl,
7732 &sbrec_port_binding_col_gateway_chassis);
7733 ovsdb_idl_add_column(ovnsb_idl_loop.idl,
7734 &sbrec_gateway_chassis_col_chassis);
7735 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_gateway_chassis_col_name);
7736 ovsdb_idl_add_column(ovnsb_idl_loop.idl,
7737 &sbrec_gateway_chassis_col_priority);
7738 ovsdb_idl_add_column(ovnsb_idl_loop.idl,
7739 &sbrec_gateway_chassis_col_external_ids);
7740 ovsdb_idl_add_column(ovnsb_idl_loop.idl,
7741 &sbrec_gateway_chassis_col_options);
7742 add_column_noalert(ovnsb_idl_loop.idl,
7743 &sbrec_port_binding_col_external_ids);
7744 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_mac_binding);
7745 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_datapath);
7746 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_ip);
7747 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_mac);
7748 add_column_noalert(ovnsb_idl_loop.idl,
7749 &sbrec_mac_binding_col_logical_port);
7750 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_dhcp_options);
7751 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_code);
7752 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_type);
7753 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_name);
7754 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_dhcpv6_options);
7755 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_code);
7756 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_type);
7757 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_name);
7758 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_address_set);
7759 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_name);
7760 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_addresses);
7761 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_port_group);
7762 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_group_col_name);
7763 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_group_col_ports);
7764
7765 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_dns);
7766 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dns_col_datapaths);
7767 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dns_col_records);
7768 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dns_col_external_ids);
7769
7770 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_rbac_role);
7771 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_rbac_role_col_name);
7772 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_rbac_role_col_permissions);
7773
7774 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_rbac_permission);
7775 add_column_noalert(ovnsb_idl_loop.idl,
7776 &sbrec_rbac_permission_col_table);
7777 add_column_noalert(ovnsb_idl_loop.idl,
7778 &sbrec_rbac_permission_col_authorization);
7779 add_column_noalert(ovnsb_idl_loop.idl,
7780 &sbrec_rbac_permission_col_insert_delete);
7781 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_rbac_permission_col_update);
7782
7783 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_meter);
7784 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_meter_col_name);
7785 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_meter_col_unit);
7786 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_meter_col_bands);
7787
7788 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_meter_band);
7789 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_meter_band_col_action);
7790 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_meter_band_col_rate);
7791 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_meter_band_col_burst_size);
7792
7793 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_chassis);
7794 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_nb_cfg);
7795 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_name);
7796
7797 struct ovsdb_idl_index *sbrec_chassis_by_name
7798 = chassis_index_create(ovnsb_idl_loop.idl);
7799
7800 /* Ensure that only a single ovn-northd is active in the deployment by
7801 * acquiring a lock called "ovn_northd" on the southbound database
7802 * and then only performing DB transactions if the lock is held. */
7803 ovsdb_idl_set_lock(ovnsb_idl_loop.idl, "ovn_northd");
7804 bool had_lock = false;
7805
7806 /* Main loop. */
7807 exiting = false;
7808 while (!exiting) {
7809 struct northd_context ctx = {
7810 .ovnnb_idl = ovnnb_idl_loop.idl,
7811 .ovnnb_txn = ovsdb_idl_loop_run(&ovnnb_idl_loop),
7812 .ovnsb_idl = ovnsb_idl_loop.idl,
7813 .ovnsb_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop),
7814 };
7815
7816 if (!had_lock && ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) {
7817 VLOG_INFO("ovn-northd lock acquired. "
7818 "This ovn-northd instance is now active.");
7819 had_lock = true;
7820 } else if (had_lock && !ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) {
7821 VLOG_INFO("ovn-northd lock lost. "
7822 "This ovn-northd instance is now on standby.");
7823 had_lock = false;
7824 }
7825
7826 if (ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) {
7827 ovnnb_db_run(&ctx, sbrec_chassis_by_name, &ovnsb_idl_loop);
7828 ovnsb_db_run(&ctx, &ovnsb_idl_loop);
7829 if (ctx.ovnsb_txn) {
7830 check_and_add_supported_dhcp_opts_to_sb_db(&ctx);
7831 check_and_add_supported_dhcpv6_opts_to_sb_db(&ctx);
7832 check_and_update_rbac(&ctx);
7833 }
7834 }
7835
7836 unixctl_server_run(unixctl);
7837 unixctl_server_wait(unixctl);
7838 if (exiting) {
7839 poll_immediate_wake();
7840 }
7841 ovsdb_idl_loop_commit_and_wait(&ovnnb_idl_loop);
7842 ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);
7843
7844 poll_block();
7845 if (should_service_stop()) {
7846 exiting = true;
7847 }
7848 }
7849
7850 unixctl_server_destroy(unixctl);
7851 ovsdb_idl_loop_destroy(&ovnnb_idl_loop);
7852 ovsdb_idl_loop_destroy(&ovnsb_idl_loop);
7853 service_stop();
7854
7855 exit(res);
7856 }
7857
7858 static void
7859 ovn_northd_exit(struct unixctl_conn *conn, int argc OVS_UNUSED,
7860 const char *argv[] OVS_UNUSED, void *exiting_)
7861 {
7862 bool *exiting = exiting_;
7863 *exiting = true;
7864
7865 unixctl_command_reply(conn, NULL);
7866 }