]> git.proxmox.com Git - mirror_ovs.git/blob - ovn/northd/ovn-northd.c
ovn: Support address sets generated from port groups
[mirror_ovs.git] / ovn / northd / ovn-northd.c
1 /*
2 * Licensed under the Apache License, Version 2.0 (the "License");
3 * you may not use this file except in compliance with the License.
4 * You may obtain a copy of the License at:
5 *
6 * http://www.apache.org/licenses/LICENSE-2.0
7 *
8 * Unless required by applicable law or agreed to in writing, software
9 * distributed under the License is distributed on an "AS IS" BASIS,
10 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 * See the License for the specific language governing permissions and
12 * limitations under the License.
13 */
14
15 #include <config.h>
16
17 #include <getopt.h>
18 #include <stdlib.h>
19 #include <stdio.h>
20
21 #include "bitmap.h"
22 #include "command-line.h"
23 #include "daemon.h"
24 #include "dirs.h"
25 #include "openvswitch/dynamic-string.h"
26 #include "fatal-signal.h"
27 #include "hash.h"
28 #include "openvswitch/hmap.h"
29 #include "openvswitch/json.h"
30 #include "ovn/lex.h"
31 #include "ovn/lib/chassis-index.h"
32 #include "ovn/lib/logical-fields.h"
33 #include "ovn/lib/ovn-l7.h"
34 #include "ovn/lib/ovn-nb-idl.h"
35 #include "ovn/lib/ovn-sb-idl.h"
36 #include "ovn/lib/ovn-util.h"
37 #include "ovn/actions.h"
38 #include "packets.h"
39 #include "openvswitch/poll-loop.h"
40 #include "smap.h"
41 #include "sset.h"
42 #include "stream.h"
43 #include "stream-ssl.h"
44 #include "unixctl.h"
45 #include "util.h"
46 #include "uuid.h"
47 #include "openvswitch/vlog.h"
48
49 VLOG_DEFINE_THIS_MODULE(ovn_northd);
50
51 static unixctl_cb_func ovn_northd_exit;
52
53 struct northd_context {
54 struct ovsdb_idl *ovnnb_idl;
55 struct ovsdb_idl *ovnsb_idl;
56 struct ovsdb_idl_txn *ovnnb_txn;
57 struct ovsdb_idl_txn *ovnsb_txn;
58 };
59
60 static const char *ovnnb_db;
61 static const char *ovnsb_db;
62 static const char *unixctl_path;
63
64 #define MAC_ADDR_PREFIX 0x0A0000000000ULL
65 #define MAC_ADDR_SPACE 0xffffff
66
67 /* MAC address management (macam) table of "struct eth_addr"s, that holds the
68 * MAC addresses allocated by the OVN ipam module. */
69 static struct hmap macam = HMAP_INITIALIZER(&macam);
70
71 #define MAX_OVN_TAGS 4096
72 \f
73 /* Pipeline stages. */
74
75 /* The two pipelines in an OVN logical flow table. */
76 enum ovn_pipeline {
77 P_IN, /* Ingress pipeline. */
78 P_OUT /* Egress pipeline. */
79 };
80
81 /* The two purposes for which ovn-northd uses OVN logical datapaths. */
82 enum ovn_datapath_type {
83 DP_SWITCH, /* OVN logical switch. */
84 DP_ROUTER /* OVN logical router. */
85 };
86
87 /* Returns an "enum ovn_stage" built from the arguments.
88 *
89 * (It's better to use ovn_stage_build() for type-safety reasons, but inline
90 * functions can't be used in enums or switch cases.) */
91 #define OVN_STAGE_BUILD(DP_TYPE, PIPELINE, TABLE) \
92 (((DP_TYPE) << 9) | ((PIPELINE) << 8) | (TABLE))
93
94 /* A stage within an OVN logical switch or router.
95 *
96 * An "enum ovn_stage" indicates whether the stage is part of a logical switch
97 * or router, whether the stage is part of the ingress or egress pipeline, and
98 * the table within that pipeline. The first three components are combined to
99 * form the stage's full name, e.g. S_SWITCH_IN_PORT_SEC_L2,
100 * S_ROUTER_OUT_DELIVERY. */
101 enum ovn_stage {
102 #define PIPELINE_STAGES \
103 /* Logical switch ingress stages. */ \
104 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_L2, 0, "ls_in_port_sec_l2") \
105 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_IP, 1, "ls_in_port_sec_ip") \
106 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_ND, 2, "ls_in_port_sec_nd") \
107 PIPELINE_STAGE(SWITCH, IN, PRE_ACL, 3, "ls_in_pre_acl") \
108 PIPELINE_STAGE(SWITCH, IN, PRE_LB, 4, "ls_in_pre_lb") \
109 PIPELINE_STAGE(SWITCH, IN, PRE_STATEFUL, 5, "ls_in_pre_stateful") \
110 PIPELINE_STAGE(SWITCH, IN, ACL, 6, "ls_in_acl") \
111 PIPELINE_STAGE(SWITCH, IN, QOS_MARK, 7, "ls_in_qos_mark") \
112 PIPELINE_STAGE(SWITCH, IN, QOS_METER, 8, "ls_in_qos_meter") \
113 PIPELINE_STAGE(SWITCH, IN, LB, 9, "ls_in_lb") \
114 PIPELINE_STAGE(SWITCH, IN, STATEFUL, 10, "ls_in_stateful") \
115 PIPELINE_STAGE(SWITCH, IN, ARP_ND_RSP, 11, "ls_in_arp_rsp") \
116 PIPELINE_STAGE(SWITCH, IN, DHCP_OPTIONS, 12, "ls_in_dhcp_options") \
117 PIPELINE_STAGE(SWITCH, IN, DHCP_RESPONSE, 13, "ls_in_dhcp_response") \
118 PIPELINE_STAGE(SWITCH, IN, DNS_LOOKUP, 14, "ls_in_dns_lookup") \
119 PIPELINE_STAGE(SWITCH, IN, DNS_RESPONSE, 15, "ls_in_dns_response") \
120 PIPELINE_STAGE(SWITCH, IN, L2_LKUP, 16, "ls_in_l2_lkup") \
121 \
122 /* Logical switch egress stages. */ \
123 PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 0, "ls_out_pre_lb") \
124 PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 1, "ls_out_pre_acl") \
125 PIPELINE_STAGE(SWITCH, OUT, PRE_STATEFUL, 2, "ls_out_pre_stateful") \
126 PIPELINE_STAGE(SWITCH, OUT, LB, 3, "ls_out_lb") \
127 PIPELINE_STAGE(SWITCH, OUT, ACL, 4, "ls_out_acl") \
128 PIPELINE_STAGE(SWITCH, OUT, QOS_MARK, 5, "ls_out_qos_mark") \
129 PIPELINE_STAGE(SWITCH, OUT, QOS_METER, 6, "ls_out_qos_meter") \
130 PIPELINE_STAGE(SWITCH, OUT, STATEFUL, 7, "ls_out_stateful") \
131 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_IP, 8, "ls_out_port_sec_ip") \
132 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_L2, 9, "ls_out_port_sec_l2") \
133 \
134 /* Logical router ingress stages. */ \
135 PIPELINE_STAGE(ROUTER, IN, ADMISSION, 0, "lr_in_admission") \
136 PIPELINE_STAGE(ROUTER, IN, IP_INPUT, 1, "lr_in_ip_input") \
137 PIPELINE_STAGE(ROUTER, IN, DEFRAG, 2, "lr_in_defrag") \
138 PIPELINE_STAGE(ROUTER, IN, UNSNAT, 3, "lr_in_unsnat") \
139 PIPELINE_STAGE(ROUTER, IN, DNAT, 4, "lr_in_dnat") \
140 PIPELINE_STAGE(ROUTER, IN, ND_RA_OPTIONS, 5, "lr_in_nd_ra_options") \
141 PIPELINE_STAGE(ROUTER, IN, ND_RA_RESPONSE, 6, "lr_in_nd_ra_response") \
142 PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 7, "lr_in_ip_routing") \
143 PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 8, "lr_in_arp_resolve") \
144 PIPELINE_STAGE(ROUTER, IN, GW_REDIRECT, 9, "lr_in_gw_redirect") \
145 PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 10, "lr_in_arp_request") \
146 \
147 /* Logical router egress stages. */ \
148 PIPELINE_STAGE(ROUTER, OUT, UNDNAT, 0, "lr_out_undnat") \
149 PIPELINE_STAGE(ROUTER, OUT, SNAT, 1, "lr_out_snat") \
150 PIPELINE_STAGE(ROUTER, OUT, EGR_LOOP, 2, "lr_out_egr_loop") \
151 PIPELINE_STAGE(ROUTER, OUT, DELIVERY, 3, "lr_out_delivery")
152
153 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
154 S_##DP_TYPE##_##PIPELINE##_##STAGE \
155 = OVN_STAGE_BUILD(DP_##DP_TYPE, P_##PIPELINE, TABLE),
156 PIPELINE_STAGES
157 #undef PIPELINE_STAGE
158 };
159
160 /* Due to various hard-coded priorities need to implement ACLs, the
161 * northbound database supports a smaller range of ACL priorities than
162 * are available to logical flows. This value is added to an ACL
163 * priority to determine the ACL's logical flow priority. */
164 #define OVN_ACL_PRI_OFFSET 1000
165
166 /* Register definitions specific to switches. */
167 #define REGBIT_CONNTRACK_DEFRAG "reg0[0]"
168 #define REGBIT_CONNTRACK_COMMIT "reg0[1]"
169 #define REGBIT_CONNTRACK_NAT "reg0[2]"
170 #define REGBIT_DHCP_OPTS_RESULT "reg0[3]"
171 #define REGBIT_DNS_LOOKUP_RESULT "reg0[4]"
172 #define REGBIT_ND_RA_OPTS_RESULT "reg0[5]"
173
174 /* Register definitions for switches and routers. */
175 #define REGBIT_NAT_REDIRECT "reg9[0]"
176 /* Indicate that this packet has been recirculated using egress
177 * loopback. This allows certain checks to be bypassed, such as a
178 * logical router dropping packets with source IP address equals
179 * one of the logical router's own IP addresses. */
180 #define REGBIT_EGRESS_LOOPBACK "reg9[1]"
181
182 /* Returns an "enum ovn_stage" built from the arguments. */
183 static enum ovn_stage
184 ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline,
185 uint8_t table)
186 {
187 return OVN_STAGE_BUILD(dp_type, pipeline, table);
188 }
189
190 /* Returns the pipeline to which 'stage' belongs. */
191 static enum ovn_pipeline
192 ovn_stage_get_pipeline(enum ovn_stage stage)
193 {
194 return (stage >> 8) & 1;
195 }
196
197 /* Returns the pipeline name to which 'stage' belongs. */
198 static const char *
199 ovn_stage_get_pipeline_name(enum ovn_stage stage)
200 {
201 return ovn_stage_get_pipeline(stage) == P_IN ? "ingress" : "egress";
202 }
203
204 /* Returns the table to which 'stage' belongs. */
205 static uint8_t
206 ovn_stage_get_table(enum ovn_stage stage)
207 {
208 return stage & 0xff;
209 }
210
211 /* Returns a string name for 'stage'. */
212 static const char *
213 ovn_stage_to_str(enum ovn_stage stage)
214 {
215 switch (stage) {
216 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
217 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return NAME;
218 PIPELINE_STAGES
219 #undef PIPELINE_STAGE
220 default: return "<unknown>";
221 }
222 }
223
224 /* Returns the type of the datapath to which a flow with the given 'stage' may
225 * be added. */
226 static enum ovn_datapath_type
227 ovn_stage_to_datapath_type(enum ovn_stage stage)
228 {
229 switch (stage) {
230 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
231 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return DP_##DP_TYPE;
232 PIPELINE_STAGES
233 #undef PIPELINE_STAGE
234 default: OVS_NOT_REACHED();
235 }
236 }
237 \f
238 static void
239 usage(void)
240 {
241 printf("\
242 %s: OVN northbound management daemon\n\
243 usage: %s [OPTIONS]\n\
244 \n\
245 Options:\n\
246 --ovnnb-db=DATABASE connect to ovn-nb database at DATABASE\n\
247 (default: %s)\n\
248 --ovnsb-db=DATABASE connect to ovn-sb database at DATABASE\n\
249 (default: %s)\n\
250 --unixctl=SOCKET override default control socket name\n\
251 -h, --help display this help message\n\
252 -o, --options list available options\n\
253 -V, --version display version information\n\
254 ", program_name, program_name, default_nb_db(), default_sb_db());
255 daemon_usage();
256 vlog_usage();
257 stream_usage("database", true, true, false);
258 }
259 \f
260 struct tnlid_node {
261 struct hmap_node hmap_node;
262 uint32_t tnlid;
263 };
264
265 static void
266 destroy_tnlids(struct hmap *tnlids)
267 {
268 struct tnlid_node *node;
269 HMAP_FOR_EACH_POP (node, hmap_node, tnlids) {
270 free(node);
271 }
272 hmap_destroy(tnlids);
273 }
274
275 static void
276 add_tnlid(struct hmap *set, uint32_t tnlid)
277 {
278 struct tnlid_node *node = xmalloc(sizeof *node);
279 hmap_insert(set, &node->hmap_node, hash_int(tnlid, 0));
280 node->tnlid = tnlid;
281 }
282
283 static bool
284 tnlid_in_use(const struct hmap *set, uint32_t tnlid)
285 {
286 const struct tnlid_node *node;
287 HMAP_FOR_EACH_IN_BUCKET (node, hmap_node, hash_int(tnlid, 0), set) {
288 if (node->tnlid == tnlid) {
289 return true;
290 }
291 }
292 return false;
293 }
294
295 static uint32_t
296 next_tnlid(uint32_t tnlid, uint32_t max)
297 {
298 return tnlid + 1 <= max ? tnlid + 1 : 1;
299 }
300
301 static uint32_t
302 allocate_tnlid(struct hmap *set, const char *name, uint32_t max,
303 uint32_t *hint)
304 {
305 for (uint32_t tnlid = next_tnlid(*hint, max); tnlid != *hint;
306 tnlid = next_tnlid(tnlid, max)) {
307 if (!tnlid_in_use(set, tnlid)) {
308 add_tnlid(set, tnlid);
309 *hint = tnlid;
310 return tnlid;
311 }
312 }
313
314 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
315 VLOG_WARN_RL(&rl, "all %s tunnel ids exhausted", name);
316 return 0;
317 }
318 \f
319 struct ovn_chassis_qdisc_queues {
320 struct hmap_node key_node;
321 uint32_t queue_id;
322 struct uuid chassis_uuid;
323 };
324
325 static void
326 destroy_chassis_queues(struct hmap *set)
327 {
328 struct ovn_chassis_qdisc_queues *node;
329 HMAP_FOR_EACH_POP (node, key_node, set) {
330 free(node);
331 }
332 hmap_destroy(set);
333 }
334
335 static void
336 add_chassis_queue(struct hmap *set, struct uuid *chassis_uuid,
337 uint32_t queue_id)
338 {
339 struct ovn_chassis_qdisc_queues *node = xmalloc(sizeof *node);
340 node->queue_id = queue_id;
341 memcpy(&node->chassis_uuid, chassis_uuid, sizeof node->chassis_uuid);
342 hmap_insert(set, &node->key_node, uuid_hash(chassis_uuid));
343 }
344
345 static bool
346 chassis_queueid_in_use(const struct hmap *set, struct uuid *chassis_uuid,
347 uint32_t queue_id)
348 {
349 const struct ovn_chassis_qdisc_queues *node;
350 HMAP_FOR_EACH_WITH_HASH (node, key_node, uuid_hash(chassis_uuid), set) {
351 if (uuid_equals(chassis_uuid, &node->chassis_uuid)
352 && node->queue_id == queue_id) {
353 return true;
354 }
355 }
356 return false;
357 }
358
359 static uint32_t
360 allocate_chassis_queueid(struct hmap *set, struct sbrec_chassis *chassis)
361 {
362 for (uint32_t queue_id = QDISC_MIN_QUEUE_ID + 1;
363 queue_id <= QDISC_MAX_QUEUE_ID;
364 queue_id++) {
365 if (!chassis_queueid_in_use(set, &chassis->header_.uuid, queue_id)) {
366 add_chassis_queue(set, &chassis->header_.uuid, queue_id);
367 return queue_id;
368 }
369 }
370
371 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
372 VLOG_WARN_RL(&rl, "all %s queue ids exhausted", chassis->name);
373 return 0;
374 }
375
376 static void
377 free_chassis_queueid(struct hmap *set, struct sbrec_chassis *chassis,
378 uint32_t queue_id)
379 {
380 struct ovn_chassis_qdisc_queues *node;
381 HMAP_FOR_EACH_WITH_HASH (node, key_node,
382 uuid_hash(&chassis->header_.uuid),
383 set) {
384 if (uuid_equals(&chassis->header_.uuid, &node->chassis_uuid)
385 && node->queue_id == queue_id) {
386 hmap_remove(set, &node->key_node);
387 break;
388 }
389 }
390 }
391
392 static inline bool
393 port_has_qos_params(const struct smap *opts)
394 {
395 return (smap_get(opts, "qos_max_rate") ||
396 smap_get(opts, "qos_burst"));
397 }
398 \f
399
400 struct ipam_info {
401 uint32_t start_ipv4;
402 size_t total_ipv4s;
403 unsigned long *allocated_ipv4s; /* A bitmap of allocated IPv4s */
404 bool ipv6_prefix_set;
405 struct in6_addr ipv6_prefix;
406 };
407
408 /* The 'key' comes from nbs->header_.uuid or nbr->header_.uuid or
409 * sb->external_ids:logical-switch. */
410 struct ovn_datapath {
411 struct hmap_node key_node; /* Index on 'key'. */
412 struct uuid key; /* (nbs/nbr)->header_.uuid. */
413
414 const struct nbrec_logical_switch *nbs; /* May be NULL. */
415 const struct nbrec_logical_router *nbr; /* May be NULL. */
416 const struct sbrec_datapath_binding *sb; /* May be NULL. */
417
418 struct ovs_list list; /* In list of similar records. */
419
420 /* Logical switch data. */
421 struct ovn_port **router_ports;
422 size_t n_router_ports;
423
424 struct hmap port_tnlids;
425 uint32_t port_key_hint;
426
427 bool has_unknown;
428
429 /* IPAM data. */
430 struct ipam_info *ipam_info;
431
432 /* OVN northd only needs to know about the logical router gateway port for
433 * NAT on a distributed router. This "distributed gateway port" is
434 * populated only when there is a "redirect-chassis" specified for one of
435 * the ports on the logical router. Otherwise this will be NULL. */
436 struct ovn_port *l3dgw_port;
437 /* The "derived" OVN port representing the instance of l3dgw_port on
438 * the "redirect-chassis". */
439 struct ovn_port *l3redirect_port;
440 struct ovn_port *localnet_port;
441 };
442
443 struct macam_node {
444 struct hmap_node hmap_node;
445 struct eth_addr mac_addr; /* Allocated MAC address. */
446 };
447
448 static void
449 cleanup_macam(struct hmap *macam_)
450 {
451 struct macam_node *node;
452 HMAP_FOR_EACH_POP (node, hmap_node, macam_) {
453 free(node);
454 }
455 }
456
457 static struct ovn_datapath *
458 ovn_datapath_create(struct hmap *datapaths, const struct uuid *key,
459 const struct nbrec_logical_switch *nbs,
460 const struct nbrec_logical_router *nbr,
461 const struct sbrec_datapath_binding *sb)
462 {
463 struct ovn_datapath *od = xzalloc(sizeof *od);
464 od->key = *key;
465 od->sb = sb;
466 od->nbs = nbs;
467 od->nbr = nbr;
468 hmap_init(&od->port_tnlids);
469 od->port_key_hint = 0;
470 hmap_insert(datapaths, &od->key_node, uuid_hash(&od->key));
471 return od;
472 }
473
474 static void
475 ovn_datapath_destroy(struct hmap *datapaths, struct ovn_datapath *od)
476 {
477 if (od) {
478 /* Don't remove od->list. It is used within build_datapaths() as a
479 * private list and once we've exited that function it is not safe to
480 * use it. */
481 hmap_remove(datapaths, &od->key_node);
482 destroy_tnlids(&od->port_tnlids);
483 if (od->ipam_info) {
484 bitmap_free(od->ipam_info->allocated_ipv4s);
485 free(od->ipam_info);
486 }
487 free(od->router_ports);
488 free(od);
489 }
490 }
491
492 /* Returns 'od''s datapath type. */
493 static enum ovn_datapath_type
494 ovn_datapath_get_type(const struct ovn_datapath *od)
495 {
496 return od->nbs ? DP_SWITCH : DP_ROUTER;
497 }
498
499 static struct ovn_datapath *
500 ovn_datapath_find(struct hmap *datapaths, const struct uuid *uuid)
501 {
502 struct ovn_datapath *od;
503
504 HMAP_FOR_EACH_WITH_HASH (od, key_node, uuid_hash(uuid), datapaths) {
505 if (uuid_equals(uuid, &od->key)) {
506 return od;
507 }
508 }
509 return NULL;
510 }
511
512 static struct ovn_datapath *
513 ovn_datapath_from_sbrec(struct hmap *datapaths,
514 const struct sbrec_datapath_binding *sb)
515 {
516 struct uuid key;
517
518 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
519 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
520 return NULL;
521 }
522 return ovn_datapath_find(datapaths, &key);
523 }
524
525 static bool
526 lrouter_is_enabled(const struct nbrec_logical_router *lrouter)
527 {
528 return !lrouter->enabled || *lrouter->enabled;
529 }
530
531 static void
532 init_ipam_info_for_datapath(struct ovn_datapath *od)
533 {
534 if (!od->nbs) {
535 return;
536 }
537
538 const char *subnet_str = smap_get(&od->nbs->other_config, "subnet");
539 const char *ipv6_prefix = smap_get(&od->nbs->other_config, "ipv6_prefix");
540
541 if (ipv6_prefix) {
542 od->ipam_info = xzalloc(sizeof *od->ipam_info);
543 od->ipam_info->ipv6_prefix_set = ipv6_parse(
544 ipv6_prefix, &od->ipam_info->ipv6_prefix);
545 }
546
547 if (!subnet_str) {
548 return;
549 }
550
551 ovs_be32 subnet, mask;
552 char *error = ip_parse_masked(subnet_str, &subnet, &mask);
553 if (error || mask == OVS_BE32_MAX || !ip_is_cidr(mask)) {
554 static struct vlog_rate_limit rl
555 = VLOG_RATE_LIMIT_INIT(5, 1);
556 VLOG_WARN_RL(&rl, "bad 'subnet' %s", subnet_str);
557 free(error);
558 return;
559 }
560
561 if (!od->ipam_info) {
562 od->ipam_info = xzalloc(sizeof *od->ipam_info);
563 }
564 od->ipam_info->start_ipv4 = ntohl(subnet) + 1;
565 od->ipam_info->total_ipv4s = ~ntohl(mask);
566 od->ipam_info->allocated_ipv4s =
567 bitmap_allocate(od->ipam_info->total_ipv4s);
568
569 /* Mark first IP as taken */
570 bitmap_set1(od->ipam_info->allocated_ipv4s, 0);
571
572 /* Check if there are any reserver IPs (list) to be excluded from IPAM */
573 const char *exclude_ip_list = smap_get(&od->nbs->other_config,
574 "exclude_ips");
575 if (!exclude_ip_list) {
576 return;
577 }
578
579 struct lexer lexer;
580 lexer_init(&lexer, exclude_ip_list);
581 /* exclude_ip_list could be in the format -
582 * "10.0.0.4 10.0.0.10 10.0.0.20..10.0.0.50 10.0.0.100..10.0.0.110".
583 */
584 lexer_get(&lexer);
585 while (lexer.token.type != LEX_T_END) {
586 if (lexer.token.type != LEX_T_INTEGER) {
587 lexer_syntax_error(&lexer, "expecting address");
588 break;
589 }
590 uint32_t start = ntohl(lexer.token.value.ipv4);
591 lexer_get(&lexer);
592
593 uint32_t end = start + 1;
594 if (lexer_match(&lexer, LEX_T_ELLIPSIS)) {
595 if (lexer.token.type != LEX_T_INTEGER) {
596 lexer_syntax_error(&lexer, "expecting address range");
597 break;
598 }
599 end = ntohl(lexer.token.value.ipv4) + 1;
600 lexer_get(&lexer);
601 }
602
603 /* Clamp start...end to fit the subnet. */
604 start = MAX(od->ipam_info->start_ipv4, start);
605 end = MIN(od->ipam_info->start_ipv4 + od->ipam_info->total_ipv4s, end);
606 if (end > start) {
607 bitmap_set_multiple(od->ipam_info->allocated_ipv4s,
608 start - od->ipam_info->start_ipv4,
609 end - start, 1);
610 } else {
611 lexer_error(&lexer, "excluded addresses not in subnet");
612 }
613 }
614 if (lexer.error) {
615 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
616 VLOG_WARN_RL(&rl, "logical switch "UUID_FMT": bad exclude_ips (%s)",
617 UUID_ARGS(&od->key), lexer.error);
618 }
619 lexer_destroy(&lexer);
620 }
621
622 static void
623 ovn_datapath_update_external_ids(struct ovn_datapath *od)
624 {
625 /* Get the logical-switch or logical-router UUID to set in
626 * external-ids. */
627 char uuid_s[UUID_LEN + 1];
628 sprintf(uuid_s, UUID_FMT, UUID_ARGS(&od->key));
629 const char *key = od->nbs ? "logical-switch" : "logical-router";
630
631 /* Get names to set in external-ids. */
632 const char *name = od->nbs ? od->nbs->name : od->nbr->name;
633 const char *name2 = (od->nbs
634 ? smap_get(&od->nbs->external_ids,
635 "neutron:network_name")
636 : smap_get(&od->nbr->external_ids,
637 "neutron:router_name"));
638
639 /* Set external-ids. */
640 struct smap ids = SMAP_INITIALIZER(&ids);
641 smap_add(&ids, key, uuid_s);
642 smap_add(&ids, "name", name);
643 if (name2 && name2[0]) {
644 smap_add(&ids, "name2", name2);
645 }
646 sbrec_datapath_binding_set_external_ids(od->sb, &ids);
647 smap_destroy(&ids);
648 }
649
650 static void
651 join_datapaths(struct northd_context *ctx, struct hmap *datapaths,
652 struct ovs_list *sb_only, struct ovs_list *nb_only,
653 struct ovs_list *both)
654 {
655 hmap_init(datapaths);
656 ovs_list_init(sb_only);
657 ovs_list_init(nb_only);
658 ovs_list_init(both);
659
660 const struct sbrec_datapath_binding *sb, *sb_next;
661 SBREC_DATAPATH_BINDING_FOR_EACH_SAFE (sb, sb_next, ctx->ovnsb_idl) {
662 struct uuid key;
663 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
664 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
665 ovsdb_idl_txn_add_comment(
666 ctx->ovnsb_txn,
667 "deleting Datapath_Binding "UUID_FMT" that lacks "
668 "external-ids:logical-switch and "
669 "external-ids:logical-router",
670 UUID_ARGS(&sb->header_.uuid));
671 sbrec_datapath_binding_delete(sb);
672 continue;
673 }
674
675 if (ovn_datapath_find(datapaths, &key)) {
676 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
677 VLOG_INFO_RL(
678 &rl, "deleting Datapath_Binding "UUID_FMT" with "
679 "duplicate external-ids:logical-switch/router "UUID_FMT,
680 UUID_ARGS(&sb->header_.uuid), UUID_ARGS(&key));
681 sbrec_datapath_binding_delete(sb);
682 continue;
683 }
684
685 struct ovn_datapath *od = ovn_datapath_create(datapaths, &key,
686 NULL, NULL, sb);
687 ovs_list_push_back(sb_only, &od->list);
688 }
689
690 const struct nbrec_logical_switch *nbs;
691 NBREC_LOGICAL_SWITCH_FOR_EACH (nbs, ctx->ovnnb_idl) {
692 struct ovn_datapath *od = ovn_datapath_find(datapaths,
693 &nbs->header_.uuid);
694 if (od) {
695 od->nbs = nbs;
696 ovs_list_remove(&od->list);
697 ovs_list_push_back(both, &od->list);
698 ovn_datapath_update_external_ids(od);
699 } else {
700 od = ovn_datapath_create(datapaths, &nbs->header_.uuid,
701 nbs, NULL, NULL);
702 ovs_list_push_back(nb_only, &od->list);
703 }
704
705 init_ipam_info_for_datapath(od);
706 }
707
708 const struct nbrec_logical_router *nbr;
709 NBREC_LOGICAL_ROUTER_FOR_EACH (nbr, ctx->ovnnb_idl) {
710 if (!lrouter_is_enabled(nbr)) {
711 continue;
712 }
713
714 struct ovn_datapath *od = ovn_datapath_find(datapaths,
715 &nbr->header_.uuid);
716 if (od) {
717 if (!od->nbs) {
718 od->nbr = nbr;
719 ovs_list_remove(&od->list);
720 ovs_list_push_back(both, &od->list);
721 ovn_datapath_update_external_ids(od);
722 } else {
723 /* Can't happen! */
724 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
725 VLOG_WARN_RL(&rl,
726 "duplicate UUID "UUID_FMT" in OVN_Northbound",
727 UUID_ARGS(&nbr->header_.uuid));
728 continue;
729 }
730 } else {
731 od = ovn_datapath_create(datapaths, &nbr->header_.uuid,
732 NULL, nbr, NULL);
733 ovs_list_push_back(nb_only, &od->list);
734 }
735 }
736 }
737
738 static uint32_t
739 ovn_datapath_allocate_key(struct hmap *dp_tnlids)
740 {
741 static uint32_t hint;
742 return allocate_tnlid(dp_tnlids, "datapath", (1u << 24) - 1, &hint);
743 }
744
745 /* Updates the southbound Datapath_Binding table so that it contains the
746 * logical switches and routers specified by the northbound database.
747 *
748 * Initializes 'datapaths' to contain a "struct ovn_datapath" for every logical
749 * switch and router. */
750 static void
751 build_datapaths(struct northd_context *ctx, struct hmap *datapaths)
752 {
753 struct ovs_list sb_only, nb_only, both;
754
755 join_datapaths(ctx, datapaths, &sb_only, &nb_only, &both);
756
757 if (!ovs_list_is_empty(&nb_only)) {
758 /* First index the in-use datapath tunnel IDs. */
759 struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
760 struct ovn_datapath *od;
761 LIST_FOR_EACH (od, list, &both) {
762 add_tnlid(&dp_tnlids, od->sb->tunnel_key);
763 }
764
765 /* Add southbound record for each unmatched northbound record. */
766 LIST_FOR_EACH (od, list, &nb_only) {
767 uint16_t tunnel_key = ovn_datapath_allocate_key(&dp_tnlids);
768 if (!tunnel_key) {
769 break;
770 }
771
772 od->sb = sbrec_datapath_binding_insert(ctx->ovnsb_txn);
773 ovn_datapath_update_external_ids(od);
774 sbrec_datapath_binding_set_tunnel_key(od->sb, tunnel_key);
775 }
776 destroy_tnlids(&dp_tnlids);
777 }
778
779 /* Delete southbound records without northbound matches. */
780 struct ovn_datapath *od, *next;
781 LIST_FOR_EACH_SAFE (od, next, list, &sb_only) {
782 ovs_list_remove(&od->list);
783 sbrec_datapath_binding_delete(od->sb);
784 ovn_datapath_destroy(datapaths, od);
785 }
786 }
787 \f
788 struct ovn_port {
789 struct hmap_node key_node; /* Index on 'key'. */
790 char *key; /* nbs->name, nbr->name, sb->logical_port. */
791 char *json_key; /* 'key', quoted for use in JSON. */
792
793 const struct sbrec_port_binding *sb; /* May be NULL. */
794
795 /* Logical switch port data. */
796 const struct nbrec_logical_switch_port *nbsp; /* May be NULL. */
797
798 struct lport_addresses *lsp_addrs; /* Logical switch port addresses. */
799 unsigned int n_lsp_addrs;
800
801 struct lport_addresses *ps_addrs; /* Port security addresses. */
802 unsigned int n_ps_addrs;
803
804 /* Logical router port data. */
805 const struct nbrec_logical_router_port *nbrp; /* May be NULL. */
806
807 struct lport_addresses lrp_networks;
808
809 bool derived; /* Indicates whether this is an additional port
810 * derived from nbsp or nbrp. */
811
812 /* The port's peer:
813 *
814 * - A switch port S of type "router" has a router port R as a peer,
815 * and R in turn has S has its peer.
816 *
817 * - Two connected logical router ports have each other as peer. */
818 struct ovn_port *peer;
819
820 struct ovn_datapath *od;
821
822 struct ovs_list list; /* In list of similar records. */
823 };
824
825 static struct ovn_port *
826 ovn_port_create(struct hmap *ports, const char *key,
827 const struct nbrec_logical_switch_port *nbsp,
828 const struct nbrec_logical_router_port *nbrp,
829 const struct sbrec_port_binding *sb)
830 {
831 struct ovn_port *op = xzalloc(sizeof *op);
832
833 struct ds json_key = DS_EMPTY_INITIALIZER;
834 json_string_escape(key, &json_key);
835 op->json_key = ds_steal_cstr(&json_key);
836
837 op->key = xstrdup(key);
838 op->sb = sb;
839 op->nbsp = nbsp;
840 op->nbrp = nbrp;
841 op->derived = false;
842 hmap_insert(ports, &op->key_node, hash_string(op->key, 0));
843 return op;
844 }
845
846 static void
847 ovn_port_destroy(struct hmap *ports, struct ovn_port *port)
848 {
849 if (port) {
850 /* Don't remove port->list. It is used within build_ports() as a
851 * private list and once we've exited that function it is not safe to
852 * use it. */
853 hmap_remove(ports, &port->key_node);
854
855 for (int i = 0; i < port->n_lsp_addrs; i++) {
856 destroy_lport_addresses(&port->lsp_addrs[i]);
857 }
858 free(port->lsp_addrs);
859
860 for (int i = 0; i < port->n_ps_addrs; i++) {
861 destroy_lport_addresses(&port->ps_addrs[i]);
862 }
863 free(port->ps_addrs);
864
865 destroy_lport_addresses(&port->lrp_networks);
866 free(port->json_key);
867 free(port->key);
868 free(port);
869 }
870 }
871
872 static struct ovn_port *
873 ovn_port_find(struct hmap *ports, const char *name)
874 {
875 struct ovn_port *op;
876
877 HMAP_FOR_EACH_WITH_HASH (op, key_node, hash_string(name, 0), ports) {
878 if (!strcmp(op->key, name)) {
879 return op;
880 }
881 }
882 return NULL;
883 }
884
885 static uint32_t
886 ovn_port_allocate_key(struct ovn_datapath *od)
887 {
888 return allocate_tnlid(&od->port_tnlids, "port",
889 (1u << 15) - 1, &od->port_key_hint);
890 }
891
892 static char *
893 chassis_redirect_name(const char *port_name)
894 {
895 return xasprintf("cr-%s", port_name);
896 }
897
898 static bool
899 ipam_is_duplicate_mac(struct eth_addr *ea, uint64_t mac64, bool warn)
900 {
901 struct macam_node *macam_node;
902 HMAP_FOR_EACH_WITH_HASH (macam_node, hmap_node, hash_uint64(mac64),
903 &macam) {
904 if (eth_addr_equals(*ea, macam_node->mac_addr)) {
905 if (warn) {
906 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
907 VLOG_WARN_RL(&rl, "Duplicate MAC set: "ETH_ADDR_FMT,
908 ETH_ADDR_ARGS(macam_node->mac_addr));
909 }
910 return true;
911 }
912 }
913 return false;
914 }
915
916 static void
917 ipam_insert_mac(struct eth_addr *ea, bool check)
918 {
919 if (!ea) {
920 return;
921 }
922
923 uint64_t mac64 = eth_addr_to_uint64(*ea);
924 /* If the new MAC was not assigned by this address management system or
925 * check is true and the new MAC is a duplicate, do not insert it into the
926 * macam hmap. */
927 if (((mac64 ^ MAC_ADDR_PREFIX) >> 24)
928 || (check && ipam_is_duplicate_mac(ea, mac64, true))) {
929 return;
930 }
931
932 struct macam_node *new_macam_node = xmalloc(sizeof *new_macam_node);
933 new_macam_node->mac_addr = *ea;
934 hmap_insert(&macam, &new_macam_node->hmap_node, hash_uint64(mac64));
935 }
936
937 static void
938 ipam_insert_ip(struct ovn_datapath *od, uint32_t ip)
939 {
940 if (!od || !od->ipam_info || !od->ipam_info->allocated_ipv4s) {
941 return;
942 }
943
944 if (ip >= od->ipam_info->start_ipv4 &&
945 ip < (od->ipam_info->start_ipv4 + od->ipam_info->total_ipv4s)) {
946 bitmap_set1(od->ipam_info->allocated_ipv4s,
947 ip - od->ipam_info->start_ipv4);
948 }
949 }
950
951 static void
952 ipam_insert_lsp_addresses(struct ovn_datapath *od, struct ovn_port *op,
953 char *address)
954 {
955 if (!od || !op || !address || !strcmp(address, "unknown")
956 || !strcmp(address, "router") || is_dynamic_lsp_address(address)) {
957 return;
958 }
959
960 struct lport_addresses laddrs;
961 if (!extract_lsp_addresses(address, &laddrs)) {
962 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
963 VLOG_WARN_RL(&rl, "Extract addresses failed.");
964 return;
965 }
966 ipam_insert_mac(&laddrs.ea, true);
967
968 /* IP is only added to IPAM if the switch's subnet option
969 * is set, whereas MAC is always added to MACAM. */
970 if (!od->ipam_info || !od->ipam_info->allocated_ipv4s) {
971 destroy_lport_addresses(&laddrs);
972 return;
973 }
974
975 for (size_t j = 0; j < laddrs.n_ipv4_addrs; j++) {
976 uint32_t ip = ntohl(laddrs.ipv4_addrs[j].addr);
977 ipam_insert_ip(od, ip);
978 }
979
980 destroy_lport_addresses(&laddrs);
981 }
982
983 static void
984 ipam_add_port_addresses(struct ovn_datapath *od, struct ovn_port *op)
985 {
986 if (!od || !op) {
987 return;
988 }
989
990 if (op->nbsp) {
991 /* Add all the port's addresses to address data structures. */
992 for (size_t i = 0; i < op->nbsp->n_addresses; i++) {
993 ipam_insert_lsp_addresses(od, op, op->nbsp->addresses[i]);
994 }
995 if (op->nbsp->dynamic_addresses) {
996 ipam_insert_lsp_addresses(od, op, op->nbsp->dynamic_addresses);
997 }
998 } else if (op->nbrp) {
999 struct lport_addresses lrp_networks;
1000 if (!extract_lrp_networks(op->nbrp, &lrp_networks)) {
1001 static struct vlog_rate_limit rl
1002 = VLOG_RATE_LIMIT_INIT(1, 1);
1003 VLOG_WARN_RL(&rl, "Extract addresses failed.");
1004 return;
1005 }
1006 ipam_insert_mac(&lrp_networks.ea, true);
1007
1008 if (!op->peer || !op->peer->nbsp || !op->peer->od || !op->peer->od->nbs
1009 || !smap_get(&op->peer->od->nbs->other_config, "subnet")) {
1010 destroy_lport_addresses(&lrp_networks);
1011 return;
1012 }
1013
1014 for (size_t i = 0; i < lrp_networks.n_ipv4_addrs; i++) {
1015 uint32_t ip = ntohl(lrp_networks.ipv4_addrs[i].addr);
1016 ipam_insert_ip(op->peer->od, ip);
1017 }
1018
1019 destroy_lport_addresses(&lrp_networks);
1020 }
1021 }
1022
1023 static uint64_t
1024 ipam_get_unused_mac(void)
1025 {
1026 /* Stores the suffix of the most recently ipam-allocated MAC address. */
1027 static uint32_t last_mac;
1028
1029 uint64_t mac64;
1030 struct eth_addr mac;
1031 uint32_t mac_addr_suffix, i;
1032 for (i = 0; i < MAC_ADDR_SPACE - 1; i++) {
1033 /* The tentative MAC's suffix will be in the interval (1, 0xfffffe). */
1034 mac_addr_suffix = ((last_mac + i) % (MAC_ADDR_SPACE - 1)) + 1;
1035 mac64 = MAC_ADDR_PREFIX | mac_addr_suffix;
1036 eth_addr_from_uint64(mac64, &mac);
1037 if (!ipam_is_duplicate_mac(&mac, mac64, false)) {
1038 last_mac = mac_addr_suffix;
1039 break;
1040 }
1041 }
1042
1043 if (i == MAC_ADDR_SPACE) {
1044 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1045 VLOG_WARN_RL(&rl, "MAC address space exhausted.");
1046 mac64 = 0;
1047 }
1048
1049 return mac64;
1050 }
1051
1052 static uint32_t
1053 ipam_get_unused_ip(struct ovn_datapath *od)
1054 {
1055 if (!od || !od->ipam_info || !od->ipam_info->allocated_ipv4s) {
1056 return 0;
1057 }
1058
1059 size_t new_ip_index = bitmap_scan(od->ipam_info->allocated_ipv4s, 0, 0,
1060 od->ipam_info->total_ipv4s - 1);
1061 if (new_ip_index == od->ipam_info->total_ipv4s - 1) {
1062 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1063 VLOG_WARN_RL( &rl, "Subnet address space has been exhausted.");
1064 return 0;
1065 }
1066
1067 return od->ipam_info->start_ipv4 + new_ip_index;
1068 }
1069
1070 static bool
1071 ipam_allocate_addresses(struct ovn_datapath *od, struct ovn_port *op,
1072 const char *addrspec)
1073 {
1074 if (!op->nbsp || !od->ipam_info) {
1075 return false;
1076 }
1077
1078 /* Get or generate MAC address. */
1079 struct eth_addr mac;
1080 bool dynamic_mac;
1081 int n = 0;
1082 if (ovs_scan(addrspec, ETH_ADDR_SCAN_FMT" dynamic%n",
1083 ETH_ADDR_SCAN_ARGS(mac), &n)
1084 && addrspec[n] == '\0') {
1085 dynamic_mac = false;
1086 } else {
1087 uint64_t mac64 = ipam_get_unused_mac();
1088 if (!mac64) {
1089 return false;
1090 }
1091 eth_addr_from_uint64(mac64, &mac);
1092 dynamic_mac = true;
1093 }
1094
1095 /* Generate IPv4 address, if desirable. */
1096 bool dynamic_ip4 = od->ipam_info->allocated_ipv4s != NULL;
1097 uint32_t ip4 = dynamic_ip4 ? ipam_get_unused_ip(od) : 0;
1098
1099 /* Generate IPv6 address, if desirable. */
1100 bool dynamic_ip6 = od->ipam_info->ipv6_prefix_set;
1101 struct in6_addr ip6;
1102 if (dynamic_ip6) {
1103 in6_generate_eui64(mac, &od->ipam_info->ipv6_prefix, &ip6);
1104 }
1105
1106 /* If we didn't generate anything, bail out. */
1107 if (!dynamic_ip4 && !dynamic_ip6) {
1108 return false;
1109 }
1110
1111 /* Save the dynamic addresses. */
1112 struct ds new_addr = DS_EMPTY_INITIALIZER;
1113 ds_put_format(&new_addr, ETH_ADDR_FMT, ETH_ADDR_ARGS(mac));
1114 if (dynamic_ip4 && ip4) {
1115 ipam_insert_ip(od, ip4);
1116 ds_put_format(&new_addr, " "IP_FMT, IP_ARGS(htonl(ip4)));
1117 }
1118 if (dynamic_ip6) {
1119 char ip6_s[INET6_ADDRSTRLEN + 1];
1120 ipv6_string_mapped(ip6_s, &ip6);
1121 ds_put_format(&new_addr, " %s", ip6_s);
1122 }
1123 ipam_insert_mac(&mac, !dynamic_mac);
1124 nbrec_logical_switch_port_set_dynamic_addresses(op->nbsp,
1125 ds_cstr(&new_addr));
1126 ds_destroy(&new_addr);
1127 return true;
1128 }
1129
1130 static void
1131 build_ipam(struct hmap *datapaths, struct hmap *ports)
1132 {
1133 /* IPAM generally stands for IP address management. In non-virtualized
1134 * world, MAC addresses come with the hardware. But, with virtualized
1135 * workloads, they need to be assigned and managed. This function
1136 * does both IP address management (ipam) and MAC address management
1137 * (macam). */
1138
1139 /* If the switch's other_config:subnet is set, allocate new addresses for
1140 * ports that have the "dynamic" keyword in their addresses column. */
1141 struct ovn_datapath *od;
1142 HMAP_FOR_EACH (od, key_node, datapaths) {
1143 if (!od->nbs || !od->ipam_info) {
1144 continue;
1145 }
1146
1147 struct ovn_port *op;
1148 for (size_t i = 0; i < od->nbs->n_ports; i++) {
1149 const struct nbrec_logical_switch_port *nbsp =
1150 od->nbs->ports[i];
1151
1152 if (!nbsp) {
1153 continue;
1154 }
1155
1156 op = ovn_port_find(ports, nbsp->name);
1157 if (!op || (op->nbsp && op->peer)) {
1158 /* Do not allocate addresses for logical switch ports that
1159 * have a peer. */
1160 continue;
1161 }
1162
1163 for (size_t j = 0; j < nbsp->n_addresses; j++) {
1164 if (is_dynamic_lsp_address(nbsp->addresses[j])
1165 && !nbsp->dynamic_addresses) {
1166 if (!ipam_allocate_addresses(od, op, nbsp->addresses[j])
1167 || !extract_lsp_addresses(nbsp->dynamic_addresses,
1168 &op->lsp_addrs[op->n_lsp_addrs])) {
1169 static struct vlog_rate_limit rl
1170 = VLOG_RATE_LIMIT_INIT(1, 1);
1171 VLOG_INFO_RL(&rl, "Failed to allocate address.");
1172 } else {
1173 op->n_lsp_addrs++;
1174 }
1175 break;
1176 }
1177 }
1178
1179 if (!nbsp->n_addresses && nbsp->dynamic_addresses) {
1180 nbrec_logical_switch_port_set_dynamic_addresses(op->nbsp,
1181 NULL);
1182 }
1183 }
1184 }
1185 }
1186 \f
1187 /* Tag allocation for nested containers.
1188 *
1189 * For a logical switch port with 'parent_name' and a request to allocate tags,
1190 * keeps a track of all allocated tags. */
1191 struct tag_alloc_node {
1192 struct hmap_node hmap_node;
1193 char *parent_name;
1194 unsigned long *allocated_tags; /* A bitmap to track allocated tags. */
1195 };
1196
1197 static void
1198 tag_alloc_destroy(struct hmap *tag_alloc_table)
1199 {
1200 struct tag_alloc_node *node;
1201 HMAP_FOR_EACH_POP (node, hmap_node, tag_alloc_table) {
1202 bitmap_free(node->allocated_tags);
1203 free(node->parent_name);
1204 free(node);
1205 }
1206 hmap_destroy(tag_alloc_table);
1207 }
1208
1209 static struct tag_alloc_node *
1210 tag_alloc_get_node(struct hmap *tag_alloc_table, const char *parent_name)
1211 {
1212 /* If a node for the 'parent_name' exists, return it. */
1213 struct tag_alloc_node *tag_alloc_node;
1214 HMAP_FOR_EACH_WITH_HASH (tag_alloc_node, hmap_node,
1215 hash_string(parent_name, 0),
1216 tag_alloc_table) {
1217 if (!strcmp(tag_alloc_node->parent_name, parent_name)) {
1218 return tag_alloc_node;
1219 }
1220 }
1221
1222 /* Create a new node. */
1223 tag_alloc_node = xmalloc(sizeof *tag_alloc_node);
1224 tag_alloc_node->parent_name = xstrdup(parent_name);
1225 tag_alloc_node->allocated_tags = bitmap_allocate(MAX_OVN_TAGS);
1226 /* Tag 0 is invalid for nested containers. */
1227 bitmap_set1(tag_alloc_node->allocated_tags, 0);
1228 hmap_insert(tag_alloc_table, &tag_alloc_node->hmap_node,
1229 hash_string(parent_name, 0));
1230
1231 return tag_alloc_node;
1232 }
1233
1234 static void
1235 tag_alloc_add_existing_tags(struct hmap *tag_alloc_table,
1236 const struct nbrec_logical_switch_port *nbsp)
1237 {
1238 /* Add the tags of already existing nested containers. If there is no
1239 * 'nbsp->parent_name' or no 'nbsp->tag' set, there is nothing to do. */
1240 if (!nbsp->parent_name || !nbsp->parent_name[0] || !nbsp->tag) {
1241 return;
1242 }
1243
1244 struct tag_alloc_node *tag_alloc_node;
1245 tag_alloc_node = tag_alloc_get_node(tag_alloc_table, nbsp->parent_name);
1246 bitmap_set1(tag_alloc_node->allocated_tags, *nbsp->tag);
1247 }
1248
1249 static void
1250 tag_alloc_create_new_tag(struct hmap *tag_alloc_table,
1251 const struct nbrec_logical_switch_port *nbsp)
1252 {
1253 if (!nbsp->tag_request) {
1254 return;
1255 }
1256
1257 if (nbsp->parent_name && nbsp->parent_name[0]
1258 && *nbsp->tag_request == 0) {
1259 /* For nested containers that need allocation, do the allocation. */
1260
1261 if (nbsp->tag) {
1262 /* This has already been allocated. */
1263 return;
1264 }
1265
1266 struct tag_alloc_node *tag_alloc_node;
1267 int64_t tag;
1268 tag_alloc_node = tag_alloc_get_node(tag_alloc_table,
1269 nbsp->parent_name);
1270 tag = bitmap_scan(tag_alloc_node->allocated_tags, 0, 1, MAX_OVN_TAGS);
1271 if (tag == MAX_OVN_TAGS) {
1272 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1273 VLOG_ERR_RL(&rl, "out of vlans for logical switch ports with "
1274 "parent %s", nbsp->parent_name);
1275 return;
1276 }
1277 bitmap_set1(tag_alloc_node->allocated_tags, tag);
1278 nbrec_logical_switch_port_set_tag(nbsp, &tag, 1);
1279 } else if (*nbsp->tag_request != 0) {
1280 /* For everything else, copy the contents of 'tag_request' to 'tag'. */
1281 nbrec_logical_switch_port_set_tag(nbsp, nbsp->tag_request, 1);
1282 }
1283 }
1284 \f
1285
1286 /*
1287 * This function checks if the MAC in "address" parameter (if present) is
1288 * different from the one stored in Logical_Switch_Port.dynamic_addresses
1289 * and updates it.
1290 */
1291 static void
1292 check_and_update_mac_in_dynamic_addresses(
1293 const char *address,
1294 const struct nbrec_logical_switch_port *nbsp)
1295 {
1296 if (!nbsp->dynamic_addresses) {
1297 return;
1298 }
1299 int buf_index = 0;
1300 struct eth_addr ea;
1301 if (!ovs_scan_len(address, &buf_index,
1302 ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(ea))) {
1303 return;
1304 }
1305
1306 struct eth_addr present_ea;
1307 buf_index = 0;
1308 if (ovs_scan_len(nbsp->dynamic_addresses, &buf_index,
1309 ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(present_ea))
1310 && !eth_addr_equals(ea, present_ea)) {
1311 /* MAC address has changed. Update it */
1312 char *new_addr = xasprintf(
1313 ETH_ADDR_FMT"%s", ETH_ADDR_ARGS(ea),
1314 &nbsp->dynamic_addresses[buf_index]);
1315 nbrec_logical_switch_port_set_dynamic_addresses(
1316 nbsp, new_addr);
1317 free(new_addr);
1318 }
1319 }
1320
1321 static void
1322 join_logical_ports(struct northd_context *ctx,
1323 struct hmap *datapaths, struct hmap *ports,
1324 struct hmap *chassis_qdisc_queues,
1325 struct hmap *tag_alloc_table, struct ovs_list *sb_only,
1326 struct ovs_list *nb_only, struct ovs_list *both)
1327 {
1328 hmap_init(ports);
1329 ovs_list_init(sb_only);
1330 ovs_list_init(nb_only);
1331 ovs_list_init(both);
1332
1333 const struct sbrec_port_binding *sb;
1334 SBREC_PORT_BINDING_FOR_EACH (sb, ctx->ovnsb_idl) {
1335 struct ovn_port *op = ovn_port_create(ports, sb->logical_port,
1336 NULL, NULL, sb);
1337 ovs_list_push_back(sb_only, &op->list);
1338 }
1339
1340 struct ovn_datapath *od;
1341 HMAP_FOR_EACH (od, key_node, datapaths) {
1342 if (od->nbs) {
1343 for (size_t i = 0; i < od->nbs->n_ports; i++) {
1344 const struct nbrec_logical_switch_port *nbsp
1345 = od->nbs->ports[i];
1346 struct ovn_port *op = ovn_port_find(ports, nbsp->name);
1347 if (op) {
1348 if (op->nbsp || op->nbrp) {
1349 static struct vlog_rate_limit rl
1350 = VLOG_RATE_LIMIT_INIT(5, 1);
1351 VLOG_WARN_RL(&rl, "duplicate logical port %s",
1352 nbsp->name);
1353 continue;
1354 }
1355 op->nbsp = nbsp;
1356 ovs_list_remove(&op->list);
1357
1358 uint32_t queue_id = smap_get_int(&op->sb->options,
1359 "qdisc_queue_id", 0);
1360 if (queue_id && op->sb->chassis) {
1361 add_chassis_queue(
1362 chassis_qdisc_queues, &op->sb->chassis->header_.uuid,
1363 queue_id);
1364 }
1365
1366 ovs_list_push_back(both, &op->list);
1367
1368 /* This port exists due to a SB binding, but should
1369 * not have been initialized fully. */
1370 ovs_assert(!op->n_lsp_addrs && !op->n_ps_addrs);
1371 } else {
1372 op = ovn_port_create(ports, nbsp->name, nbsp, NULL, NULL);
1373 ovs_list_push_back(nb_only, &op->list);
1374 }
1375
1376 if (!strcmp(nbsp->type, "localnet")) {
1377 od->localnet_port = op;
1378 }
1379
1380 op->lsp_addrs
1381 = xmalloc(sizeof *op->lsp_addrs * nbsp->n_addresses);
1382 for (size_t j = 0; j < nbsp->n_addresses; j++) {
1383 if (!strcmp(nbsp->addresses[j], "unknown")
1384 || !strcmp(nbsp->addresses[j], "router")) {
1385 continue;
1386 }
1387 if (is_dynamic_lsp_address(nbsp->addresses[j])) {
1388 if (nbsp->dynamic_addresses) {
1389 check_and_update_mac_in_dynamic_addresses(
1390 nbsp->addresses[j], nbsp);
1391 if (!extract_lsp_addresses(nbsp->dynamic_addresses,
1392 &op->lsp_addrs[op->n_lsp_addrs])) {
1393 static struct vlog_rate_limit rl
1394 = VLOG_RATE_LIMIT_INIT(1, 1);
1395 VLOG_INFO_RL(&rl, "invalid syntax '%s' in "
1396 "logical switch port "
1397 "dynamic_addresses. No "
1398 "MAC address found",
1399 op->nbsp->dynamic_addresses);
1400 continue;
1401 }
1402 } else {
1403 continue;
1404 }
1405 } else if (!extract_lsp_addresses(nbsp->addresses[j],
1406 &op->lsp_addrs[op->n_lsp_addrs])) {
1407 static struct vlog_rate_limit rl
1408 = VLOG_RATE_LIMIT_INIT(1, 1);
1409 VLOG_INFO_RL(&rl, "invalid syntax '%s' in logical "
1410 "switch port addresses. No MAC "
1411 "address found",
1412 op->nbsp->addresses[j]);
1413 continue;
1414 }
1415 op->n_lsp_addrs++;
1416 }
1417
1418 op->ps_addrs
1419 = xmalloc(sizeof *op->ps_addrs * nbsp->n_port_security);
1420 for (size_t j = 0; j < nbsp->n_port_security; j++) {
1421 if (!extract_lsp_addresses(nbsp->port_security[j],
1422 &op->ps_addrs[op->n_ps_addrs])) {
1423 static struct vlog_rate_limit rl
1424 = VLOG_RATE_LIMIT_INIT(1, 1);
1425 VLOG_INFO_RL(&rl, "invalid syntax '%s' in port "
1426 "security. No MAC address found",
1427 op->nbsp->port_security[j]);
1428 continue;
1429 }
1430 op->n_ps_addrs++;
1431 }
1432
1433 op->od = od;
1434 ipam_add_port_addresses(od, op);
1435 tag_alloc_add_existing_tags(tag_alloc_table, nbsp);
1436 }
1437 } else {
1438 for (size_t i = 0; i < od->nbr->n_ports; i++) {
1439 const struct nbrec_logical_router_port *nbrp
1440 = od->nbr->ports[i];
1441
1442 struct lport_addresses lrp_networks;
1443 if (!extract_lrp_networks(nbrp, &lrp_networks)) {
1444 static struct vlog_rate_limit rl
1445 = VLOG_RATE_LIMIT_INIT(5, 1);
1446 VLOG_WARN_RL(&rl, "bad 'mac' %s", nbrp->mac);
1447 continue;
1448 }
1449
1450 if (!lrp_networks.n_ipv4_addrs && !lrp_networks.n_ipv6_addrs) {
1451 continue;
1452 }
1453
1454 struct ovn_port *op = ovn_port_find(ports, nbrp->name);
1455 if (op) {
1456 if (op->nbsp || op->nbrp) {
1457 static struct vlog_rate_limit rl
1458 = VLOG_RATE_LIMIT_INIT(5, 1);
1459 VLOG_WARN_RL(&rl, "duplicate logical router port %s",
1460 nbrp->name);
1461 continue;
1462 }
1463 op->nbrp = nbrp;
1464 ovs_list_remove(&op->list);
1465 ovs_list_push_back(both, &op->list);
1466
1467 /* This port exists but should not have been
1468 * initialized fully. */
1469 ovs_assert(!op->lrp_networks.n_ipv4_addrs
1470 && !op->lrp_networks.n_ipv6_addrs);
1471 } else {
1472 op = ovn_port_create(ports, nbrp->name, NULL, nbrp, NULL);
1473 ovs_list_push_back(nb_only, &op->list);
1474 }
1475
1476 op->lrp_networks = lrp_networks;
1477 op->od = od;
1478 ipam_add_port_addresses(op->od, op);
1479
1480 const char *redirect_chassis = smap_get(&op->nbrp->options,
1481 "redirect-chassis");
1482 if (redirect_chassis || op->nbrp->n_gateway_chassis) {
1483 /* Additional "derived" ovn_port crp represents the
1484 * instance of op on the "redirect-chassis". */
1485 const char *gw_chassis = smap_get(&op->od->nbr->options,
1486 "chassis");
1487 if (gw_chassis) {
1488 static struct vlog_rate_limit rl
1489 = VLOG_RATE_LIMIT_INIT(1, 1);
1490 VLOG_WARN_RL(&rl, "Bad configuration: "
1491 "redirect-chassis configured on port %s "
1492 "on L3 gateway router", nbrp->name);
1493 continue;
1494 }
1495 if (od->l3dgw_port || od->l3redirect_port) {
1496 static struct vlog_rate_limit rl
1497 = VLOG_RATE_LIMIT_INIT(1, 1);
1498 VLOG_WARN_RL(&rl, "Bad configuration: multiple ports "
1499 "with redirect-chassis on same logical "
1500 "router %s", od->nbr->name);
1501 continue;
1502 }
1503
1504 char *redirect_name = chassis_redirect_name(nbrp->name);
1505 struct ovn_port *crp = ovn_port_find(ports, redirect_name);
1506 if (crp) {
1507 crp->derived = true;
1508 crp->nbrp = nbrp;
1509 ovs_list_remove(&crp->list);
1510 ovs_list_push_back(both, &crp->list);
1511 } else {
1512 crp = ovn_port_create(ports, redirect_name,
1513 NULL, nbrp, NULL);
1514 crp->derived = true;
1515 ovs_list_push_back(nb_only, &crp->list);
1516 }
1517 crp->od = od;
1518 free(redirect_name);
1519
1520 /* Set l3dgw_port and l3redirect_port in od, for later
1521 * use during flow creation. */
1522 od->l3dgw_port = op;
1523 od->l3redirect_port = crp;
1524 }
1525 }
1526 }
1527 }
1528
1529 /* Connect logical router ports, and logical switch ports of type "router",
1530 * to their peers. */
1531 struct ovn_port *op;
1532 HMAP_FOR_EACH (op, key_node, ports) {
1533 if (op->nbsp && !strcmp(op->nbsp->type, "router") && !op->derived) {
1534 const char *peer_name = smap_get(&op->nbsp->options, "router-port");
1535 if (!peer_name) {
1536 continue;
1537 }
1538
1539 struct ovn_port *peer = ovn_port_find(ports, peer_name);
1540 if (!peer || !peer->nbrp) {
1541 continue;
1542 }
1543
1544 peer->peer = op;
1545 op->peer = peer;
1546 op->od->router_ports = xrealloc(
1547 op->od->router_ports,
1548 sizeof *op->od->router_ports * (op->od->n_router_ports + 1));
1549 op->od->router_ports[op->od->n_router_ports++] = op;
1550
1551 /* Fill op->lsp_addrs for op->nbsp->addresses[] with
1552 * contents "router", which was skipped in the loop above. */
1553 for (size_t j = 0; j < op->nbsp->n_addresses; j++) {
1554 if (!strcmp(op->nbsp->addresses[j], "router")) {
1555 if (extract_lrp_networks(peer->nbrp,
1556 &op->lsp_addrs[op->n_lsp_addrs])) {
1557 op->n_lsp_addrs++;
1558 }
1559 break;
1560 }
1561 }
1562 } else if (op->nbrp && op->nbrp->peer && !op->derived) {
1563 struct ovn_port *peer = ovn_port_find(ports, op->nbrp->peer);
1564 if (peer) {
1565 if (peer->nbrp) {
1566 op->peer = peer;
1567 } else if (peer->nbsp) {
1568 /* An ovn_port for a switch port of type "router" does have
1569 * a router port as its peer (see the case above for
1570 * "router" ports), but this is set via options:router-port
1571 * in Logical_Switch_Port and does not involve the
1572 * Logical_Router_Port's 'peer' column. */
1573 static struct vlog_rate_limit rl =
1574 VLOG_RATE_LIMIT_INIT(5, 1);
1575 VLOG_WARN_RL(&rl, "Bad configuration: The peer of router "
1576 "port %s is a switch port", op->key);
1577 }
1578 }
1579 }
1580 }
1581 }
1582
1583 static void
1584 ip_address_and_port_from_lb_key(const char *key, char **ip_address,
1585 uint16_t *port, int *addr_family);
1586
1587 static void
1588 get_router_load_balancer_ips(const struct ovn_datapath *od,
1589 struct sset *all_ips, int *addr_family)
1590 {
1591 if (!od->nbr) {
1592 return;
1593 }
1594
1595 for (int i = 0; i < od->nbr->n_load_balancer; i++) {
1596 struct nbrec_load_balancer *lb = od->nbr->load_balancer[i];
1597 struct smap *vips = &lb->vips;
1598 struct smap_node *node;
1599
1600 SMAP_FOR_EACH (node, vips) {
1601 /* node->key contains IP:port or just IP. */
1602 char *ip_address = NULL;
1603 uint16_t port;
1604
1605 ip_address_and_port_from_lb_key(node->key, &ip_address, &port,
1606 addr_family);
1607 if (!ip_address) {
1608 continue;
1609 }
1610
1611 if (!sset_contains(all_ips, ip_address)) {
1612 sset_add(all_ips, ip_address);
1613 }
1614
1615 free(ip_address);
1616 }
1617 }
1618 }
1619
1620 /* Returns an array of strings, each consisting of a MAC address followed
1621 * by one or more IP addresses, and if the port is a distributed gateway
1622 * port, followed by 'is_chassis_resident("LPORT_NAME")', where the
1623 * LPORT_NAME is the name of the L3 redirect port or the name of the
1624 * logical_port specified in a NAT rule. These strings include the
1625 * external IP addresses of all NAT rules defined on that router, and all
1626 * of the IP addresses used in load balancer VIPs defined on that router.
1627 *
1628 * The caller must free each of the n returned strings with free(),
1629 * and must free the returned array when it is no longer needed. */
1630 static char **
1631 get_nat_addresses(const struct ovn_port *op, size_t *n)
1632 {
1633 size_t n_nats = 0;
1634 struct eth_addr mac;
1635 if (!op->nbrp || !op->od || !op->od->nbr
1636 || (!op->od->nbr->n_nat && !op->od->nbr->n_load_balancer)
1637 || !eth_addr_from_string(op->nbrp->mac, &mac)) {
1638 *n = n_nats;
1639 return NULL;
1640 }
1641
1642 struct ds c_addresses = DS_EMPTY_INITIALIZER;
1643 ds_put_format(&c_addresses, ETH_ADDR_FMT, ETH_ADDR_ARGS(mac));
1644 bool central_ip_address = false;
1645
1646 char **addresses;
1647 addresses = xmalloc(sizeof *addresses * (op->od->nbr->n_nat + 1));
1648
1649 /* Get NAT IP addresses. */
1650 for (size_t i = 0; i < op->od->nbr->n_nat; i++) {
1651 const struct nbrec_nat *nat = op->od->nbr->nat[i];
1652 ovs_be32 ip, mask;
1653
1654 char *error = ip_parse_masked(nat->external_ip, &ip, &mask);
1655 if (error || mask != OVS_BE32_MAX) {
1656 free(error);
1657 continue;
1658 }
1659
1660 /* Determine whether this NAT rule satisfies the conditions for
1661 * distributed NAT processing. */
1662 if (op->od->l3redirect_port && !strcmp(nat->type, "dnat_and_snat")
1663 && nat->logical_port && nat->external_mac) {
1664 /* Distributed NAT rule. */
1665 if (eth_addr_from_string(nat->external_mac, &mac)) {
1666 struct ds address = DS_EMPTY_INITIALIZER;
1667 ds_put_format(&address, ETH_ADDR_FMT, ETH_ADDR_ARGS(mac));
1668 ds_put_format(&address, " %s", nat->external_ip);
1669 ds_put_format(&address, " is_chassis_resident(\"%s\")",
1670 nat->logical_port);
1671 addresses[n_nats++] = ds_steal_cstr(&address);
1672 }
1673 } else {
1674 /* Centralized NAT rule, either on gateway router or distributed
1675 * router. */
1676 ds_put_format(&c_addresses, " %s", nat->external_ip);
1677 central_ip_address = true;
1678 }
1679 }
1680
1681 /* A set to hold all load-balancer vips. */
1682 struct sset all_ips = SSET_INITIALIZER(&all_ips);
1683 int addr_family;
1684 get_router_load_balancer_ips(op->od, &all_ips, &addr_family);
1685
1686 const char *ip_address;
1687 SSET_FOR_EACH (ip_address, &all_ips) {
1688 ds_put_format(&c_addresses, " %s", ip_address);
1689 central_ip_address = true;
1690 }
1691 sset_destroy(&all_ips);
1692
1693 if (central_ip_address) {
1694 /* Gratuitous ARP for centralized NAT rules on distributed gateway
1695 * ports should be restricted to the "redirect-chassis". */
1696 if (op->od->l3redirect_port) {
1697 ds_put_format(&c_addresses, " is_chassis_resident(%s)",
1698 op->od->l3redirect_port->json_key);
1699 }
1700
1701 addresses[n_nats++] = ds_steal_cstr(&c_addresses);
1702 }
1703
1704 *n = n_nats;
1705
1706 return addresses;
1707 }
1708
1709 static bool
1710 gateway_chassis_equal(const struct nbrec_gateway_chassis *nb_gwc,
1711 const struct sbrec_chassis *nb_gwc_c,
1712 const struct sbrec_gateway_chassis *sb_gwc)
1713 {
1714 bool equal = !strcmp(nb_gwc->name, sb_gwc->name)
1715 && nb_gwc->priority == sb_gwc->priority
1716 && smap_equal(&nb_gwc->options, &sb_gwc->options)
1717 && smap_equal(&nb_gwc->external_ids, &sb_gwc->external_ids);
1718
1719 if (!equal) {
1720 return false;
1721 }
1722
1723 /* If everything else matched and we were unable to find the SBDB
1724 * Chassis entry at this time, assume a match and return true.
1725 * This happens when an ovn-controller is restarting and the Chassis
1726 * entry is gone away momentarily */
1727 return !nb_gwc_c
1728 || (sb_gwc->chassis && !strcmp(nb_gwc_c->name,
1729 sb_gwc->chassis->name));
1730 }
1731
1732 static bool
1733 sbpb_gw_chassis_needs_update(
1734 const struct sbrec_port_binding *port_binding,
1735 const struct nbrec_logical_router_port *lrp,
1736 const struct chassis_index *chassis_index)
1737 {
1738 if (!lrp || !port_binding) {
1739 return false;
1740 }
1741
1742 /* These arrays are used to collect valid Gateway_Chassis and valid
1743 * Chassis records from the Logical_Router_Port Gateway_Chassis list,
1744 * we ignore the ones we can't match on the SBDB */
1745 struct nbrec_gateway_chassis **lrp_gwc = xzalloc(lrp->n_gateway_chassis *
1746 sizeof *lrp_gwc);
1747 const struct sbrec_chassis **lrp_gwc_c = xzalloc(lrp->n_gateway_chassis *
1748 sizeof *lrp_gwc_c);
1749
1750 /* Count the number of gateway chassis chassis names from the logical
1751 * router port that we are able to match on the southbound database */
1752 int lrp_n_gateway_chassis = 0;
1753 int n;
1754 for (n = 0; n < lrp->n_gateway_chassis; n++) {
1755
1756 if (!lrp->gateway_chassis[n]->chassis_name) {
1757 continue;
1758 }
1759
1760 const struct sbrec_chassis *chassis =
1761 chassis_lookup_by_name(chassis_index,
1762 lrp->gateway_chassis[n]->chassis_name);
1763
1764 lrp_gwc_c[lrp_n_gateway_chassis] = chassis;
1765 lrp_gwc[lrp_n_gateway_chassis] = lrp->gateway_chassis[n];
1766 lrp_n_gateway_chassis++;
1767 if (!chassis) {
1768 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1769 VLOG_WARN_RL(
1770 &rl, "Chassis name %s referenced in NBDB via Gateway_Chassis "
1771 "on logical router port %s does not exist in SBDB",
1772 lrp->gateway_chassis[n]->chassis_name, lrp->name);
1773 }
1774 }
1775
1776 /* Basic check, different amount of Gateway_Chassis means that we
1777 * need to update southbound database Port_Binding */
1778 if (lrp_n_gateway_chassis != port_binding->n_gateway_chassis) {
1779 free(lrp_gwc_c);
1780 free(lrp_gwc);
1781 return true;
1782 }
1783
1784 for (n = 0; n < lrp_n_gateway_chassis; n++) {
1785 int i;
1786 /* For each of the valid gw chassis on the lrp, check if there's
1787 * a match on the Port_Binding list, we assume order is not
1788 * persisted */
1789 for (i = 0; i < port_binding->n_gateway_chassis; i++) {
1790 if (gateway_chassis_equal(lrp_gwc[n],
1791 lrp_gwc_c[n],
1792 port_binding->gateway_chassis[i])) {
1793 break; /* we found a match */
1794 }
1795 }
1796
1797 /* if no Port_Binding gateway chassis matched for the entry... */
1798 if (i == port_binding->n_gateway_chassis) {
1799 free(lrp_gwc_c);
1800 free(lrp_gwc);
1801 return true; /* found no match for this gateway chassis on lrp */
1802 }
1803 }
1804
1805 /* no need for update, all ports matched */
1806 free(lrp_gwc_c);
1807 free(lrp_gwc);
1808 return false;
1809 }
1810
1811 /* This functions translates the gw chassis on the nb database
1812 * to sb database entries, the only difference is that SB database
1813 * Gateway_Chassis table references the chassis directly instead
1814 * of using the name */
1815 static void
1816 copy_gw_chassis_from_nbrp_to_sbpb(
1817 struct northd_context *ctx,
1818 const struct nbrec_logical_router_port *lrp,
1819 const struct chassis_index *chassis_index,
1820 const struct sbrec_port_binding *port_binding) {
1821
1822 if (!lrp || !port_binding || !lrp->n_gateway_chassis) {
1823 return;
1824 }
1825
1826 struct sbrec_gateway_chassis **gw_chassis = NULL;
1827 int n_gwc = 0;
1828 int n;
1829
1830 /* XXX: This can be improved. This code will generate a set of new
1831 * Gateway_Chassis and push them all in a single transaction, instead
1832 * this would be more optimal if we just add/update/remove the rows in
1833 * the southbound db that need to change. We don't expect lots of
1834 * changes to the Gateway_Chassis table, but if that proves to be wrong
1835 * we should optimize this. */
1836 for (n = 0; n < lrp->n_gateway_chassis; n++) {
1837 struct nbrec_gateway_chassis *lrp_gwc = lrp->gateway_chassis[n];
1838 if (!lrp_gwc->chassis_name) {
1839 continue;
1840 }
1841
1842 const struct sbrec_chassis *chassis =
1843 chassis_lookup_by_name(chassis_index, lrp_gwc->chassis_name);
1844
1845 gw_chassis = xrealloc(gw_chassis, (n_gwc + 1) * sizeof *gw_chassis);
1846
1847 struct sbrec_gateway_chassis *pb_gwc =
1848 sbrec_gateway_chassis_insert(ctx->ovnsb_txn);
1849
1850 sbrec_gateway_chassis_set_name(pb_gwc, lrp_gwc->name);
1851 sbrec_gateway_chassis_set_priority(pb_gwc, lrp_gwc->priority);
1852 sbrec_gateway_chassis_set_chassis(pb_gwc, chassis);
1853 sbrec_gateway_chassis_set_options(pb_gwc, &lrp_gwc->options);
1854 sbrec_gateway_chassis_set_external_ids(pb_gwc, &lrp_gwc->external_ids);
1855
1856 gw_chassis[n_gwc++] = pb_gwc;
1857 }
1858 sbrec_port_binding_set_gateway_chassis(port_binding, gw_chassis, n_gwc);
1859 free(gw_chassis);
1860 }
1861
1862 static void
1863 ovn_port_update_sbrec(struct northd_context *ctx,
1864 const struct ovn_port *op,
1865 const struct chassis_index *chassis_index,
1866 struct hmap *chassis_qdisc_queues)
1867 {
1868 sbrec_port_binding_set_datapath(op->sb, op->od->sb);
1869 if (op->nbrp) {
1870 /* If the router is for l3 gateway, it resides on a chassis
1871 * and its port type is "l3gateway". */
1872 const char *chassis_name = smap_get(&op->od->nbr->options, "chassis");
1873 if (op->derived) {
1874 sbrec_port_binding_set_type(op->sb, "chassisredirect");
1875 } else if (chassis_name) {
1876 sbrec_port_binding_set_type(op->sb, "l3gateway");
1877 } else {
1878 sbrec_port_binding_set_type(op->sb, "patch");
1879 }
1880
1881 struct smap new;
1882 smap_init(&new);
1883 if (op->derived) {
1884 const char *redirect_chassis = smap_get(&op->nbrp->options,
1885 "redirect-chassis");
1886 if (op->nbrp->n_gateway_chassis && redirect_chassis) {
1887 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1888 VLOG_WARN_RL(
1889 &rl, "logical router port %s has both options:"
1890 "redirect-chassis and gateway_chassis populated "
1891 "redirect-chassis will be ignored in favour of "
1892 "gateway chassis", op->nbrp->name);
1893 }
1894
1895 if (op->nbrp->n_gateway_chassis) {
1896 if (sbpb_gw_chassis_needs_update(op->sb, op->nbrp,
1897 chassis_index)) {
1898 copy_gw_chassis_from_nbrp_to_sbpb(ctx, op->nbrp,
1899 chassis_index, op->sb);
1900 }
1901
1902 } else if (redirect_chassis) {
1903 /* Handle ports that had redirect-chassis option attached
1904 * to them, and for backwards compatibility convert them
1905 * to a single Gateway_Chassis entry */
1906 const struct sbrec_chassis *chassis =
1907 chassis_lookup_by_name(chassis_index, redirect_chassis);
1908 if (chassis) {
1909 /* If we found the chassis, and the gw chassis on record
1910 * differs from what we expect go ahead and update */
1911 if (op->sb->n_gateway_chassis != 1
1912 || !op->sb->gateway_chassis[0]->chassis
1913 || strcmp(op->sb->gateway_chassis[0]->chassis->name,
1914 chassis->name)
1915 || op->sb->gateway_chassis[0]->priority != 0) {
1916 /* Construct a single Gateway_Chassis entry on the
1917 * Port_Binding attached to the redirect_chassis
1918 * name */
1919 struct sbrec_gateway_chassis *gw_chassis =
1920 sbrec_gateway_chassis_insert(ctx->ovnsb_txn);
1921
1922 char *gwc_name = xasprintf("%s_%s", op->nbrp->name,
1923 chassis->name);
1924
1925 /* XXX: Again, here, we could just update an existing
1926 * Gateway_Chassis, instead of creating a new one
1927 * and replacing it */
1928 sbrec_gateway_chassis_set_name(gw_chassis, gwc_name);
1929 sbrec_gateway_chassis_set_priority(gw_chassis, 0);
1930 sbrec_gateway_chassis_set_chassis(gw_chassis, chassis);
1931 sbrec_gateway_chassis_set_external_ids(gw_chassis,
1932 &op->nbrp->external_ids);
1933 sbrec_port_binding_set_gateway_chassis(op->sb,
1934 &gw_chassis, 1);
1935 free(gwc_name);
1936 }
1937 } else {
1938 VLOG_WARN("chassis name '%s' from redirect from logical "
1939 " router port '%s' redirect-chassis not found",
1940 redirect_chassis, op->nbrp->name);
1941 if (op->sb->n_gateway_chassis) {
1942 sbrec_port_binding_set_gateway_chassis(op->sb, NULL,
1943 0);
1944 }
1945 }
1946 }
1947 smap_add(&new, "distributed-port", op->nbrp->name);
1948 } else {
1949 if (op->peer) {
1950 smap_add(&new, "peer", op->peer->key);
1951 }
1952 if (chassis_name) {
1953 smap_add(&new, "l3gateway-chassis", chassis_name);
1954 }
1955 }
1956 sbrec_port_binding_set_options(op->sb, &new);
1957 smap_destroy(&new);
1958
1959 sbrec_port_binding_set_parent_port(op->sb, NULL);
1960 sbrec_port_binding_set_tag(op->sb, NULL, 0);
1961
1962 struct ds s = DS_EMPTY_INITIALIZER;
1963 ds_put_cstr(&s, op->nbrp->mac);
1964 for (int i = 0; i < op->nbrp->n_networks; ++i) {
1965 ds_put_format(&s, " %s", op->nbrp->networks[i]);
1966 }
1967 const char *addresses = ds_cstr(&s);
1968 sbrec_port_binding_set_mac(op->sb, &addresses, 1);
1969 ds_destroy(&s);
1970
1971 struct smap ids = SMAP_INITIALIZER(&ids);
1972 sbrec_port_binding_set_external_ids(op->sb, &ids);
1973 } else {
1974 if (strcmp(op->nbsp->type, "router")) {
1975 uint32_t queue_id = smap_get_int(
1976 &op->sb->options, "qdisc_queue_id", 0);
1977 bool has_qos = port_has_qos_params(&op->nbsp->options);
1978 struct smap options;
1979
1980 if (op->sb->chassis && has_qos && !queue_id) {
1981 queue_id = allocate_chassis_queueid(chassis_qdisc_queues,
1982 op->sb->chassis);
1983 } else if (!has_qos && queue_id) {
1984 free_chassis_queueid(chassis_qdisc_queues,
1985 op->sb->chassis,
1986 queue_id);
1987 queue_id = 0;
1988 }
1989
1990 smap_clone(&options, &op->nbsp->options);
1991 if (queue_id) {
1992 smap_add_format(&options,
1993 "qdisc_queue_id", "%d", queue_id);
1994 }
1995 sbrec_port_binding_set_options(op->sb, &options);
1996 smap_destroy(&options);
1997 if (ovn_is_known_nb_lsp_type(op->nbsp->type)) {
1998 sbrec_port_binding_set_type(op->sb, op->nbsp->type);
1999 } else {
2000 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
2001 VLOG_WARN_RL(
2002 &rl, "Unknown port type '%s' set on logical switch '%s'.",
2003 op->nbsp->type, op->nbsp->name);
2004 }
2005 } else {
2006 const char *chassis = NULL;
2007 if (op->peer && op->peer->od && op->peer->od->nbr) {
2008 chassis = smap_get(&op->peer->od->nbr->options, "chassis");
2009 }
2010
2011 /* A switch port connected to a gateway router is also of
2012 * type "l3gateway". */
2013 if (chassis) {
2014 sbrec_port_binding_set_type(op->sb, "l3gateway");
2015 } else {
2016 sbrec_port_binding_set_type(op->sb, "patch");
2017 }
2018
2019 const char *router_port = smap_get(&op->nbsp->options,
2020 "router-port");
2021 if (router_port || chassis) {
2022 struct smap new;
2023 smap_init(&new);
2024 if (router_port) {
2025 smap_add(&new, "peer", router_port);
2026 }
2027 if (chassis) {
2028 smap_add(&new, "l3gateway-chassis", chassis);
2029 }
2030 sbrec_port_binding_set_options(op->sb, &new);
2031 smap_destroy(&new);
2032 }
2033
2034 const char *nat_addresses = smap_get(&op->nbsp->options,
2035 "nat-addresses");
2036 if (nat_addresses && !strcmp(nat_addresses, "router")) {
2037 if (op->peer && op->peer->od
2038 && (chassis || op->peer->od->l3redirect_port)) {
2039 size_t n_nats;
2040 char **nats = get_nat_addresses(op->peer, &n_nats);
2041 if (n_nats) {
2042 sbrec_port_binding_set_nat_addresses(op->sb,
2043 (const char **) nats, n_nats);
2044 for (size_t i = 0; i < n_nats; i++) {
2045 free(nats[i]);
2046 }
2047 free(nats);
2048 } else {
2049 sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0);
2050 }
2051 } else {
2052 sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0);
2053 }
2054 /* Only accept manual specification of ethernet address
2055 * followed by IPv4 addresses on type "l3gateway" ports. */
2056 } else if (nat_addresses && chassis) {
2057 struct lport_addresses laddrs;
2058 if (!extract_lsp_addresses(nat_addresses, &laddrs)) {
2059 static struct vlog_rate_limit rl =
2060 VLOG_RATE_LIMIT_INIT(1, 1);
2061 VLOG_WARN_RL(&rl, "Error extracting nat-addresses.");
2062 sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0);
2063 } else {
2064 sbrec_port_binding_set_nat_addresses(op->sb,
2065 &nat_addresses, 1);
2066 destroy_lport_addresses(&laddrs);
2067 }
2068 } else {
2069 sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0);
2070 }
2071 }
2072 sbrec_port_binding_set_parent_port(op->sb, op->nbsp->parent_name);
2073 sbrec_port_binding_set_tag(op->sb, op->nbsp->tag, op->nbsp->n_tag);
2074 sbrec_port_binding_set_mac(op->sb, (const char **) op->nbsp->addresses,
2075 op->nbsp->n_addresses);
2076
2077 struct smap ids = SMAP_INITIALIZER(&ids);
2078 smap_clone(&ids, &op->nbsp->external_ids);
2079 const char *name = smap_get(&ids, "neutron:port_name");
2080 if (name && name[0]) {
2081 smap_add(&ids, "name", name);
2082 }
2083 sbrec_port_binding_set_external_ids(op->sb, &ids);
2084 smap_destroy(&ids);
2085 }
2086 }
2087
2088 /* Remove mac_binding entries that refer to logical_ports which are
2089 * deleted. */
2090 static void
2091 cleanup_mac_bindings(struct northd_context *ctx, struct hmap *ports)
2092 {
2093 const struct sbrec_mac_binding *b, *n;
2094 SBREC_MAC_BINDING_FOR_EACH_SAFE (b, n, ctx->ovnsb_idl) {
2095 if (!ovn_port_find(ports, b->logical_port)) {
2096 sbrec_mac_binding_delete(b);
2097 }
2098 }
2099 }
2100
2101 /* Updates the southbound Port_Binding table so that it contains the logical
2102 * switch ports specified by the northbound database.
2103 *
2104 * Initializes 'ports' to contain a "struct ovn_port" for every logical port,
2105 * using the "struct ovn_datapath"s in 'datapaths' to look up logical
2106 * datapaths. */
2107 static void
2108 build_ports(struct northd_context *ctx, struct hmap *datapaths,
2109 const struct chassis_index *chassis_index, struct hmap *ports)
2110 {
2111 struct ovs_list sb_only, nb_only, both;
2112 struct hmap tag_alloc_table = HMAP_INITIALIZER(&tag_alloc_table);
2113 struct hmap chassis_qdisc_queues = HMAP_INITIALIZER(&chassis_qdisc_queues);
2114
2115 join_logical_ports(ctx, datapaths, ports, &chassis_qdisc_queues,
2116 &tag_alloc_table, &sb_only, &nb_only, &both);
2117
2118 struct ovn_port *op, *next;
2119 /* For logical ports that are in both databases, update the southbound
2120 * record based on northbound data. Also index the in-use tunnel_keys.
2121 * For logical ports that are in NB database, do any tag allocation
2122 * needed. */
2123 LIST_FOR_EACH_SAFE (op, next, list, &both) {
2124 if (op->nbsp) {
2125 tag_alloc_create_new_tag(&tag_alloc_table, op->nbsp);
2126 }
2127 ovn_port_update_sbrec(ctx, op, chassis_index, &chassis_qdisc_queues);
2128
2129 add_tnlid(&op->od->port_tnlids, op->sb->tunnel_key);
2130 if (op->sb->tunnel_key > op->od->port_key_hint) {
2131 op->od->port_key_hint = op->sb->tunnel_key;
2132 }
2133 }
2134
2135 /* Add southbound record for each unmatched northbound record. */
2136 LIST_FOR_EACH_SAFE (op, next, list, &nb_only) {
2137 uint16_t tunnel_key = ovn_port_allocate_key(op->od);
2138 if (!tunnel_key) {
2139 continue;
2140 }
2141
2142 op->sb = sbrec_port_binding_insert(ctx->ovnsb_txn);
2143 ovn_port_update_sbrec(ctx, op, chassis_index, &chassis_qdisc_queues);
2144
2145 sbrec_port_binding_set_logical_port(op->sb, op->key);
2146 sbrec_port_binding_set_tunnel_key(op->sb, tunnel_key);
2147 }
2148
2149 bool remove_mac_bindings = false;
2150 if (!ovs_list_is_empty(&sb_only)) {
2151 remove_mac_bindings = true;
2152 }
2153
2154 /* Delete southbound records without northbound matches. */
2155 LIST_FOR_EACH_SAFE(op, next, list, &sb_only) {
2156 ovs_list_remove(&op->list);
2157 sbrec_port_binding_delete(op->sb);
2158 ovn_port_destroy(ports, op);
2159 }
2160 if (remove_mac_bindings) {
2161 cleanup_mac_bindings(ctx, ports);
2162 }
2163
2164 tag_alloc_destroy(&tag_alloc_table);
2165 destroy_chassis_queues(&chassis_qdisc_queues);
2166 }
2167 \f
2168 #define OVN_MIN_MULTICAST 32768
2169 #define OVN_MAX_MULTICAST 65535
2170
2171 struct multicast_group {
2172 const char *name;
2173 uint16_t key; /* OVN_MIN_MULTICAST...OVN_MAX_MULTICAST. */
2174 };
2175
2176 #define MC_FLOOD "_MC_flood"
2177 static const struct multicast_group mc_flood = { MC_FLOOD, 65535 };
2178
2179 #define MC_UNKNOWN "_MC_unknown"
2180 static const struct multicast_group mc_unknown = { MC_UNKNOWN, 65534 };
2181
2182 static bool
2183 multicast_group_equal(const struct multicast_group *a,
2184 const struct multicast_group *b)
2185 {
2186 return !strcmp(a->name, b->name) && a->key == b->key;
2187 }
2188
2189 /* Multicast group entry. */
2190 struct ovn_multicast {
2191 struct hmap_node hmap_node; /* Index on 'datapath' and 'key'. */
2192 struct ovn_datapath *datapath;
2193 const struct multicast_group *group;
2194
2195 struct ovn_port **ports;
2196 size_t n_ports, allocated_ports;
2197 };
2198
2199 static uint32_t
2200 ovn_multicast_hash(const struct ovn_datapath *datapath,
2201 const struct multicast_group *group)
2202 {
2203 return hash_pointer(datapath, group->key);
2204 }
2205
2206 static struct ovn_multicast *
2207 ovn_multicast_find(struct hmap *mcgroups, struct ovn_datapath *datapath,
2208 const struct multicast_group *group)
2209 {
2210 struct ovn_multicast *mc;
2211
2212 HMAP_FOR_EACH_WITH_HASH (mc, hmap_node,
2213 ovn_multicast_hash(datapath, group), mcgroups) {
2214 if (mc->datapath == datapath
2215 && multicast_group_equal(mc->group, group)) {
2216 return mc;
2217 }
2218 }
2219 return NULL;
2220 }
2221
2222 static void
2223 ovn_multicast_add(struct hmap *mcgroups, const struct multicast_group *group,
2224 struct ovn_port *port)
2225 {
2226 struct ovn_datapath *od = port->od;
2227 struct ovn_multicast *mc = ovn_multicast_find(mcgroups, od, group);
2228 if (!mc) {
2229 mc = xmalloc(sizeof *mc);
2230 hmap_insert(mcgroups, &mc->hmap_node, ovn_multicast_hash(od, group));
2231 mc->datapath = od;
2232 mc->group = group;
2233 mc->n_ports = 0;
2234 mc->allocated_ports = 4;
2235 mc->ports = xmalloc(mc->allocated_ports * sizeof *mc->ports);
2236 }
2237 if (mc->n_ports >= mc->allocated_ports) {
2238 mc->ports = x2nrealloc(mc->ports, &mc->allocated_ports,
2239 sizeof *mc->ports);
2240 }
2241 mc->ports[mc->n_ports++] = port;
2242 }
2243
2244 static void
2245 ovn_multicast_destroy(struct hmap *mcgroups, struct ovn_multicast *mc)
2246 {
2247 if (mc) {
2248 hmap_remove(mcgroups, &mc->hmap_node);
2249 free(mc->ports);
2250 free(mc);
2251 }
2252 }
2253
2254 static void
2255 ovn_multicast_update_sbrec(const struct ovn_multicast *mc,
2256 const struct sbrec_multicast_group *sb)
2257 {
2258 struct sbrec_port_binding **ports = xmalloc(mc->n_ports * sizeof *ports);
2259 for (size_t i = 0; i < mc->n_ports; i++) {
2260 ports[i] = CONST_CAST(struct sbrec_port_binding *, mc->ports[i]->sb);
2261 }
2262 sbrec_multicast_group_set_ports(sb, ports, mc->n_ports);
2263 free(ports);
2264 }
2265 \f
2266 /* Logical flow generation.
2267 *
2268 * This code generates the Logical_Flow table in the southbound database, as a
2269 * function of most of the northbound database.
2270 */
2271
2272 struct ovn_lflow {
2273 struct hmap_node hmap_node;
2274
2275 struct ovn_datapath *od;
2276 enum ovn_stage stage;
2277 uint16_t priority;
2278 char *match;
2279 char *actions;
2280 char *stage_hint;
2281 const char *where;
2282 };
2283
2284 static size_t
2285 ovn_lflow_hash(const struct ovn_lflow *lflow)
2286 {
2287 return ovn_logical_flow_hash(&lflow->od->sb->header_.uuid,
2288 ovn_stage_get_table(lflow->stage),
2289 ovn_stage_get_pipeline_name(lflow->stage),
2290 lflow->priority, lflow->match,
2291 lflow->actions);
2292 }
2293
2294 static bool
2295 ovn_lflow_equal(const struct ovn_lflow *a, const struct ovn_lflow *b)
2296 {
2297 return (a->od == b->od
2298 && a->stage == b->stage
2299 && a->priority == b->priority
2300 && !strcmp(a->match, b->match)
2301 && !strcmp(a->actions, b->actions));
2302 }
2303
2304 static void
2305 ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
2306 enum ovn_stage stage, uint16_t priority,
2307 char *match, char *actions, char *stage_hint,
2308 const char *where)
2309 {
2310 lflow->od = od;
2311 lflow->stage = stage;
2312 lflow->priority = priority;
2313 lflow->match = match;
2314 lflow->actions = actions;
2315 lflow->stage_hint = stage_hint;
2316 lflow->where = where;
2317 }
2318
2319 /* Adds a row with the specified contents to the Logical_Flow table. */
2320 static void
2321 ovn_lflow_add_at(struct hmap *lflow_map, struct ovn_datapath *od,
2322 enum ovn_stage stage, uint16_t priority,
2323 const char *match, const char *actions,
2324 const char *stage_hint, const char *where)
2325 {
2326 ovs_assert(ovn_stage_to_datapath_type(stage) == ovn_datapath_get_type(od));
2327
2328 struct ovn_lflow *lflow = xmalloc(sizeof *lflow);
2329 ovn_lflow_init(lflow, od, stage, priority,
2330 xstrdup(match), xstrdup(actions),
2331 nullable_xstrdup(stage_hint), where);
2332 hmap_insert(lflow_map, &lflow->hmap_node, ovn_lflow_hash(lflow));
2333 }
2334
2335 /* Adds a row with the specified contents to the Logical_Flow table. */
2336 #define ovn_lflow_add_with_hint(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, \
2337 ACTIONS, STAGE_HINT) \
2338 ovn_lflow_add_at(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, ACTIONS, \
2339 STAGE_HINT, OVS_SOURCE_LOCATOR)
2340
2341 #define ovn_lflow_add(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, ACTIONS) \
2342 ovn_lflow_add_with_hint(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, \
2343 ACTIONS, NULL)
2344
2345 static struct ovn_lflow *
2346 ovn_lflow_find(struct hmap *lflows, struct ovn_datapath *od,
2347 enum ovn_stage stage, uint16_t priority,
2348 const char *match, const char *actions, uint32_t hash)
2349 {
2350 struct ovn_lflow target;
2351 ovn_lflow_init(&target, od, stage, priority,
2352 CONST_CAST(char *, match), CONST_CAST(char *, actions),
2353 NULL, NULL);
2354
2355 struct ovn_lflow *lflow;
2356 HMAP_FOR_EACH_WITH_HASH (lflow, hmap_node, hash, lflows) {
2357 if (ovn_lflow_equal(lflow, &target)) {
2358 return lflow;
2359 }
2360 }
2361 return NULL;
2362 }
2363
2364 static void
2365 ovn_lflow_destroy(struct hmap *lflows, struct ovn_lflow *lflow)
2366 {
2367 if (lflow) {
2368 hmap_remove(lflows, &lflow->hmap_node);
2369 free(lflow->match);
2370 free(lflow->actions);
2371 free(lflow->stage_hint);
2372 free(lflow);
2373 }
2374 }
2375
2376 /* Appends port security constraints on L2 address field 'eth_addr_field'
2377 * (e.g. "eth.src" or "eth.dst") to 'match'. 'ps_addrs', with 'n_ps_addrs'
2378 * elements, is the collection of port_security constraints from an
2379 * OVN_NB Logical_Switch_Port row generated by extract_lsp_addresses(). */
2380 static void
2381 build_port_security_l2(const char *eth_addr_field,
2382 struct lport_addresses *ps_addrs,
2383 unsigned int n_ps_addrs,
2384 struct ds *match)
2385 {
2386 if (!n_ps_addrs) {
2387 return;
2388 }
2389
2390 ds_put_format(match, " && %s == {", eth_addr_field);
2391
2392 for (size_t i = 0; i < n_ps_addrs; i++) {
2393 ds_put_format(match, "%s ", ps_addrs[i].ea_s);
2394 }
2395 ds_chomp(match, ' ');
2396 ds_put_cstr(match, "}");
2397 }
2398
2399 static void
2400 build_port_security_ipv6_nd_flow(
2401 struct ds *match, struct eth_addr ea, struct ipv6_netaddr *ipv6_addrs,
2402 int n_ipv6_addrs)
2403 {
2404 ds_put_format(match, " && ip6 && nd && ((nd.sll == "ETH_ADDR_FMT" || "
2405 "nd.sll == "ETH_ADDR_FMT") || ((nd.tll == "ETH_ADDR_FMT" || "
2406 "nd.tll == "ETH_ADDR_FMT")", ETH_ADDR_ARGS(eth_addr_zero),
2407 ETH_ADDR_ARGS(ea), ETH_ADDR_ARGS(eth_addr_zero),
2408 ETH_ADDR_ARGS(ea));
2409 if (!n_ipv6_addrs) {
2410 ds_put_cstr(match, "))");
2411 return;
2412 }
2413
2414 char ip6_str[INET6_ADDRSTRLEN + 1];
2415 struct in6_addr lla;
2416 in6_generate_lla(ea, &lla);
2417 memset(ip6_str, 0, sizeof(ip6_str));
2418 ipv6_string_mapped(ip6_str, &lla);
2419 ds_put_format(match, " && (nd.target == %s", ip6_str);
2420
2421 for(int i = 0; i < n_ipv6_addrs; i++) {
2422 memset(ip6_str, 0, sizeof(ip6_str));
2423 ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr);
2424 ds_put_format(match, " || nd.target == %s", ip6_str);
2425 }
2426
2427 ds_put_format(match, ")))");
2428 }
2429
2430 static void
2431 build_port_security_ipv6_flow(
2432 enum ovn_pipeline pipeline, struct ds *match, struct eth_addr ea,
2433 struct ipv6_netaddr *ipv6_addrs, int n_ipv6_addrs)
2434 {
2435 char ip6_str[INET6_ADDRSTRLEN + 1];
2436
2437 ds_put_format(match, " && %s == {",
2438 pipeline == P_IN ? "ip6.src" : "ip6.dst");
2439
2440 /* Allow link-local address. */
2441 struct in6_addr lla;
2442 in6_generate_lla(ea, &lla);
2443 ipv6_string_mapped(ip6_str, &lla);
2444 ds_put_format(match, "%s, ", ip6_str);
2445
2446 /* Allow ip6.dst=ff00::/8 for multicast packets */
2447 if (pipeline == P_OUT) {
2448 ds_put_cstr(match, "ff00::/8, ");
2449 }
2450 for(int i = 0; i < n_ipv6_addrs; i++) {
2451 ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr);
2452 ds_put_format(match, "%s, ", ip6_str);
2453 }
2454 /* Replace ", " by "}". */
2455 ds_chomp(match, ' ');
2456 ds_chomp(match, ',');
2457 ds_put_cstr(match, "}");
2458 }
2459
2460 /**
2461 * Build port security constraints on ARP and IPv6 ND fields
2462 * and add logical flows to S_SWITCH_IN_PORT_SEC_ND stage.
2463 *
2464 * For each port security of the logical port, following
2465 * logical flows are added
2466 * - If the port security has no IP (both IPv4 and IPv6) or
2467 * if it has IPv4 address(es)
2468 * - Priority 90 flow to allow ARP packets for known MAC addresses
2469 * in the eth.src and arp.spa fields. If the port security
2470 * has IPv4 addresses, allow known IPv4 addresses in the arp.tpa field.
2471 *
2472 * - If the port security has no IP (both IPv4 and IPv6) or
2473 * if it has IPv6 address(es)
2474 * - Priority 90 flow to allow IPv6 ND packets for known MAC addresses
2475 * in the eth.src and nd.sll/nd.tll fields. If the port security
2476 * has IPv6 addresses, allow known IPv6 addresses in the nd.target field
2477 * for IPv6 Neighbor Advertisement packet.
2478 *
2479 * - Priority 80 flow to drop ARP and IPv6 ND packets.
2480 */
2481 static void
2482 build_port_security_nd(struct ovn_port *op, struct hmap *lflows)
2483 {
2484 struct ds match = DS_EMPTY_INITIALIZER;
2485
2486 for (size_t i = 0; i < op->n_ps_addrs; i++) {
2487 struct lport_addresses *ps = &op->ps_addrs[i];
2488
2489 bool no_ip = !(ps->n_ipv4_addrs || ps->n_ipv6_addrs);
2490
2491 ds_clear(&match);
2492 if (ps->n_ipv4_addrs || no_ip) {
2493 ds_put_format(&match,
2494 "inport == %s && eth.src == %s && arp.sha == %s",
2495 op->json_key, ps->ea_s, ps->ea_s);
2496
2497 if (ps->n_ipv4_addrs) {
2498 ds_put_cstr(&match, " && arp.spa == {");
2499 for (size_t j = 0; j < ps->n_ipv4_addrs; j++) {
2500 /* When the netmask is applied, if the host portion is
2501 * non-zero, the host can only use the specified
2502 * address in the arp.spa. If zero, the host is allowed
2503 * to use any address in the subnet. */
2504 if (ps->ipv4_addrs[j].plen == 32
2505 || ps->ipv4_addrs[j].addr & ~ps->ipv4_addrs[j].mask) {
2506 ds_put_cstr(&match, ps->ipv4_addrs[j].addr_s);
2507 } else {
2508 ds_put_format(&match, "%s/%d",
2509 ps->ipv4_addrs[j].network_s,
2510 ps->ipv4_addrs[j].plen);
2511 }
2512 ds_put_cstr(&match, ", ");
2513 }
2514 ds_chomp(&match, ' ');
2515 ds_chomp(&match, ',');
2516 ds_put_cstr(&match, "}");
2517 }
2518 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90,
2519 ds_cstr(&match), "next;");
2520 }
2521
2522 if (ps->n_ipv6_addrs || no_ip) {
2523 ds_clear(&match);
2524 ds_put_format(&match, "inport == %s && eth.src == %s",
2525 op->json_key, ps->ea_s);
2526 build_port_security_ipv6_nd_flow(&match, ps->ea, ps->ipv6_addrs,
2527 ps->n_ipv6_addrs);
2528 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90,
2529 ds_cstr(&match), "next;");
2530 }
2531 }
2532
2533 ds_clear(&match);
2534 ds_put_format(&match, "inport == %s && (arp || nd)", op->json_key);
2535 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 80,
2536 ds_cstr(&match), "drop;");
2537 ds_destroy(&match);
2538 }
2539
2540 /**
2541 * Build port security constraints on IPv4 and IPv6 src and dst fields
2542 * and add logical flows to S_SWITCH_(IN/OUT)_PORT_SEC_IP stage.
2543 *
2544 * For each port security of the logical port, following
2545 * logical flows are added
2546 * - If the port security has IPv4 addresses,
2547 * - Priority 90 flow to allow IPv4 packets for known IPv4 addresses
2548 *
2549 * - If the port security has IPv6 addresses,
2550 * - Priority 90 flow to allow IPv6 packets for known IPv6 addresses
2551 *
2552 * - If the port security has IPv4 addresses or IPv6 addresses or both
2553 * - Priority 80 flow to drop all IPv4 and IPv6 traffic
2554 */
2555 static void
2556 build_port_security_ip(enum ovn_pipeline pipeline, struct ovn_port *op,
2557 struct hmap *lflows)
2558 {
2559 char *port_direction;
2560 enum ovn_stage stage;
2561 if (pipeline == P_IN) {
2562 port_direction = "inport";
2563 stage = S_SWITCH_IN_PORT_SEC_IP;
2564 } else {
2565 port_direction = "outport";
2566 stage = S_SWITCH_OUT_PORT_SEC_IP;
2567 }
2568
2569 for (size_t i = 0; i < op->n_ps_addrs; i++) {
2570 struct lport_addresses *ps = &op->ps_addrs[i];
2571
2572 if (!(ps->n_ipv4_addrs || ps->n_ipv6_addrs)) {
2573 continue;
2574 }
2575
2576 if (ps->n_ipv4_addrs) {
2577 struct ds match = DS_EMPTY_INITIALIZER;
2578 if (pipeline == P_IN) {
2579 /* Permit use of the unspecified address for DHCP discovery */
2580 struct ds dhcp_match = DS_EMPTY_INITIALIZER;
2581 ds_put_format(&dhcp_match, "inport == %s"
2582 " && eth.src == %s"
2583 " && ip4.src == 0.0.0.0"
2584 " && ip4.dst == 255.255.255.255"
2585 " && udp.src == 68 && udp.dst == 67",
2586 op->json_key, ps->ea_s);
2587 ovn_lflow_add(lflows, op->od, stage, 90,
2588 ds_cstr(&dhcp_match), "next;");
2589 ds_destroy(&dhcp_match);
2590 ds_put_format(&match, "inport == %s && eth.src == %s"
2591 " && ip4.src == {", op->json_key,
2592 ps->ea_s);
2593 } else {
2594 ds_put_format(&match, "outport == %s && eth.dst == %s"
2595 " && ip4.dst == {255.255.255.255, 224.0.0.0/4, ",
2596 op->json_key, ps->ea_s);
2597 }
2598
2599 for (int j = 0; j < ps->n_ipv4_addrs; j++) {
2600 ovs_be32 mask = ps->ipv4_addrs[j].mask;
2601 /* When the netmask is applied, if the host portion is
2602 * non-zero, the host can only use the specified
2603 * address. If zero, the host is allowed to use any
2604 * address in the subnet.
2605 */
2606 if (ps->ipv4_addrs[j].plen == 32
2607 || ps->ipv4_addrs[j].addr & ~mask) {
2608 ds_put_format(&match, "%s", ps->ipv4_addrs[j].addr_s);
2609 if (pipeline == P_OUT && ps->ipv4_addrs[j].plen != 32) {
2610 /* Host is also allowed to receive packets to the
2611 * broadcast address in the specified subnet. */
2612 ds_put_format(&match, ", %s",
2613 ps->ipv4_addrs[j].bcast_s);
2614 }
2615 } else {
2616 /* host portion is zero */
2617 ds_put_format(&match, "%s/%d", ps->ipv4_addrs[j].network_s,
2618 ps->ipv4_addrs[j].plen);
2619 }
2620 ds_put_cstr(&match, ", ");
2621 }
2622
2623 /* Replace ", " by "}". */
2624 ds_chomp(&match, ' ');
2625 ds_chomp(&match, ',');
2626 ds_put_cstr(&match, "}");
2627 ovn_lflow_add(lflows, op->od, stage, 90, ds_cstr(&match), "next;");
2628 ds_destroy(&match);
2629 }
2630
2631 if (ps->n_ipv6_addrs) {
2632 struct ds match = DS_EMPTY_INITIALIZER;
2633 if (pipeline == P_IN) {
2634 /* Permit use of unspecified address for duplicate address
2635 * detection */
2636 struct ds dad_match = DS_EMPTY_INITIALIZER;
2637 ds_put_format(&dad_match, "inport == %s"
2638 " && eth.src == %s"
2639 " && ip6.src == ::"
2640 " && ip6.dst == ff02::/16"
2641 " && icmp6.type == {131, 135, 143}", op->json_key,
2642 ps->ea_s);
2643 ovn_lflow_add(lflows, op->od, stage, 90,
2644 ds_cstr(&dad_match), "next;");
2645 ds_destroy(&dad_match);
2646 }
2647 ds_put_format(&match, "%s == %s && %s == %s",
2648 port_direction, op->json_key,
2649 pipeline == P_IN ? "eth.src" : "eth.dst", ps->ea_s);
2650 build_port_security_ipv6_flow(pipeline, &match, ps->ea,
2651 ps->ipv6_addrs, ps->n_ipv6_addrs);
2652 ovn_lflow_add(lflows, op->od, stage, 90,
2653 ds_cstr(&match), "next;");
2654 ds_destroy(&match);
2655 }
2656
2657 char *match = xasprintf("%s == %s && %s == %s && ip",
2658 port_direction, op->json_key,
2659 pipeline == P_IN ? "eth.src" : "eth.dst",
2660 ps->ea_s);
2661 ovn_lflow_add(lflows, op->od, stage, 80, match, "drop;");
2662 free(match);
2663 }
2664
2665 }
2666
2667 static bool
2668 lsp_is_enabled(const struct nbrec_logical_switch_port *lsp)
2669 {
2670 return !lsp->enabled || *lsp->enabled;
2671 }
2672
2673 static bool
2674 lsp_is_up(const struct nbrec_logical_switch_port *lsp)
2675 {
2676 return !lsp->up || *lsp->up;
2677 }
2678
2679 static bool
2680 build_dhcpv4_action(struct ovn_port *op, ovs_be32 offer_ip,
2681 struct ds *options_action, struct ds *response_action,
2682 struct ds *ipv4_addr_match)
2683 {
2684 if (!op->nbsp->dhcpv4_options) {
2685 /* CMS has disabled native DHCPv4 for this lport. */
2686 return false;
2687 }
2688
2689 ovs_be32 host_ip, mask;
2690 char *error = ip_parse_masked(op->nbsp->dhcpv4_options->cidr, &host_ip,
2691 &mask);
2692 if (error || ((offer_ip ^ host_ip) & mask)) {
2693 /* Either
2694 * - cidr defined is invalid or
2695 * - the offer ip of the logical port doesn't belong to the cidr
2696 * defined in the DHCPv4 options.
2697 * */
2698 free(error);
2699 return false;
2700 }
2701
2702 const char *server_ip = smap_get(
2703 &op->nbsp->dhcpv4_options->options, "server_id");
2704 const char *server_mac = smap_get(
2705 &op->nbsp->dhcpv4_options->options, "server_mac");
2706 const char *lease_time = smap_get(
2707 &op->nbsp->dhcpv4_options->options, "lease_time");
2708
2709 if (!(server_ip && server_mac && lease_time)) {
2710 /* "server_id", "server_mac" and "lease_time" should be
2711 * present in the dhcp_options. */
2712 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
2713 VLOG_WARN_RL(&rl, "Required DHCPv4 options not defined for lport - %s",
2714 op->json_key);
2715 return false;
2716 }
2717
2718 struct smap dhcpv4_options = SMAP_INITIALIZER(&dhcpv4_options);
2719 smap_clone(&dhcpv4_options, &op->nbsp->dhcpv4_options->options);
2720
2721 /* server_mac is not DHCPv4 option, delete it from the smap. */
2722 smap_remove(&dhcpv4_options, "server_mac");
2723 char *netmask = xasprintf(IP_FMT, IP_ARGS(mask));
2724 smap_add(&dhcpv4_options, "netmask", netmask);
2725 free(netmask);
2726
2727 ds_put_format(options_action,
2728 REGBIT_DHCP_OPTS_RESULT" = put_dhcp_opts(offerip = "
2729 IP_FMT", ", IP_ARGS(offer_ip));
2730
2731 /* We're not using SMAP_FOR_EACH because we want a consistent order of the
2732 * options on different architectures (big or little endian, SSE4.2) */
2733 const struct smap_node **sorted_opts = smap_sort(&dhcpv4_options);
2734 for (size_t i = 0; i < smap_count(&dhcpv4_options); i++) {
2735 const struct smap_node *node = sorted_opts[i];
2736 ds_put_format(options_action, "%s = %s, ", node->key, node->value);
2737 }
2738 free(sorted_opts);
2739
2740 ds_chomp(options_action, ' ');
2741 ds_chomp(options_action, ',');
2742 ds_put_cstr(options_action, "); next;");
2743
2744 ds_put_format(response_action, "eth.dst = eth.src; eth.src = %s; "
2745 "ip4.dst = "IP_FMT"; ip4.src = %s; udp.src = 67; "
2746 "udp.dst = 68; outport = inport; flags.loopback = 1; "
2747 "output;",
2748 server_mac, IP_ARGS(offer_ip), server_ip);
2749
2750 ds_put_format(ipv4_addr_match,
2751 "ip4.src == "IP_FMT" && ip4.dst == {%s, 255.255.255.255}",
2752 IP_ARGS(offer_ip), server_ip);
2753 smap_destroy(&dhcpv4_options);
2754 return true;
2755 }
2756
2757 static bool
2758 build_dhcpv6_action(struct ovn_port *op, struct in6_addr *offer_ip,
2759 struct ds *options_action, struct ds *response_action)
2760 {
2761 if (!op->nbsp->dhcpv6_options) {
2762 /* CMS has disabled native DHCPv6 for this lport. */
2763 return false;
2764 }
2765
2766 struct in6_addr host_ip, mask;
2767
2768 char *error = ipv6_parse_masked(op->nbsp->dhcpv6_options->cidr, &host_ip,
2769 &mask);
2770 if (error) {
2771 free(error);
2772 return false;
2773 }
2774 struct in6_addr ip6_mask = ipv6_addr_bitxor(offer_ip, &host_ip);
2775 ip6_mask = ipv6_addr_bitand(&ip6_mask, &mask);
2776 if (!ipv6_mask_is_any(&ip6_mask)) {
2777 /* offer_ip doesn't belongs to the cidr defined in lport's DHCPv6
2778 * options.*/
2779 return false;
2780 }
2781
2782 const struct smap *options_map = &op->nbsp->dhcpv6_options->options;
2783 /* "server_id" should be the MAC address. */
2784 const char *server_mac = smap_get(options_map, "server_id");
2785 struct eth_addr ea;
2786 if (!server_mac || !eth_addr_from_string(server_mac, &ea)) {
2787 /* "server_id" should be present in the dhcpv6_options. */
2788 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2789 VLOG_WARN_RL(&rl, "server_id not present in the DHCPv6 options"
2790 " for lport %s", op->json_key);
2791 return false;
2792 }
2793
2794 /* Get the link local IP of the DHCPv6 server from the server MAC. */
2795 struct in6_addr lla;
2796 in6_generate_lla(ea, &lla);
2797
2798 char server_ip[INET6_ADDRSTRLEN + 1];
2799 ipv6_string_mapped(server_ip, &lla);
2800
2801 char ia_addr[INET6_ADDRSTRLEN + 1];
2802 ipv6_string_mapped(ia_addr, offer_ip);
2803
2804 ds_put_format(options_action,
2805 REGBIT_DHCP_OPTS_RESULT" = put_dhcpv6_opts(");
2806
2807 /* Check whether the dhcpv6 options should be configured as stateful.
2808 * Only reply with ia_addr option for dhcpv6 stateful address mode. */
2809 if (!smap_get_bool(options_map, "dhcpv6_stateless", false)) {
2810 ipv6_string_mapped(ia_addr, offer_ip);
2811 ds_put_format(options_action, "ia_addr = %s, ", ia_addr);
2812 }
2813
2814 /* We're not using SMAP_FOR_EACH because we want a consistent order of the
2815 * options on different architectures (big or little endian, SSE4.2) */
2816 const struct smap_node **sorted_opts = smap_sort(options_map);
2817 for (size_t i = 0; i < smap_count(options_map); i++) {
2818 const struct smap_node *node = sorted_opts[i];
2819 if (strcmp(node->key, "dhcpv6_stateless")) {
2820 ds_put_format(options_action, "%s = %s, ", node->key, node->value);
2821 }
2822 }
2823 free(sorted_opts);
2824
2825 ds_chomp(options_action, ' ');
2826 ds_chomp(options_action, ',');
2827 ds_put_cstr(options_action, "); next;");
2828
2829 ds_put_format(response_action, "eth.dst = eth.src; eth.src = %s; "
2830 "ip6.dst = ip6.src; ip6.src = %s; udp.src = 547; "
2831 "udp.dst = 546; outport = inport; flags.loopback = 1; "
2832 "output;",
2833 server_mac, server_ip);
2834
2835 return true;
2836 }
2837
2838 static bool
2839 has_stateful_acl(struct ovn_datapath *od)
2840 {
2841 for (size_t i = 0; i < od->nbs->n_acls; i++) {
2842 struct nbrec_acl *acl = od->nbs->acls[i];
2843 if (!strcmp(acl->action, "allow-related")) {
2844 return true;
2845 }
2846 }
2847
2848 return false;
2849 }
2850
2851 static void
2852 build_pre_acls(struct ovn_datapath *od, struct hmap *lflows)
2853 {
2854 bool has_stateful = has_stateful_acl(od);
2855
2856 /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are
2857 * allowed by default. */
2858 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 0, "1", "next;");
2859 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 0, "1", "next;");
2860
2861 /* If there are any stateful ACL rules in this datapath, we must
2862 * send all IP packets through the conntrack action, which handles
2863 * defragmentation, in order to match L4 headers. */
2864 if (has_stateful) {
2865 for (size_t i = 0; i < od->n_router_ports; i++) {
2866 struct ovn_port *op = od->router_ports[i];
2867 /* Can't use ct() for router ports. Consider the
2868 * following configuration: lp1(10.0.0.2) on
2869 * hostA--ls1--lr0--ls2--lp2(10.0.1.2) on hostB, For a
2870 * ping from lp1 to lp2, First, the response will go
2871 * through ct() with a zone for lp2 in the ls2 ingress
2872 * pipeline on hostB. That ct zone knows about this
2873 * connection. Next, it goes through ct() with the zone
2874 * for the router port in the egress pipeline of ls2 on
2875 * hostB. This zone does not know about the connection,
2876 * as the icmp request went through the logical router
2877 * on hostA, not hostB. This would only work with
2878 * distributed conntrack state across all chassis. */
2879 struct ds match_in = DS_EMPTY_INITIALIZER;
2880 struct ds match_out = DS_EMPTY_INITIALIZER;
2881
2882 ds_put_format(&match_in, "ip && inport == %s", op->json_key);
2883 ds_put_format(&match_out, "ip && outport == %s", op->json_key);
2884 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110,
2885 ds_cstr(&match_in), "next;");
2886 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110,
2887 ds_cstr(&match_out), "next;");
2888
2889 ds_destroy(&match_in);
2890 ds_destroy(&match_out);
2891 }
2892 if (od->localnet_port) {
2893 struct ds match_in = DS_EMPTY_INITIALIZER;
2894 struct ds match_out = DS_EMPTY_INITIALIZER;
2895
2896 ds_put_format(&match_in, "ip && inport == %s",
2897 od->localnet_port->json_key);
2898 ds_put_format(&match_out, "ip && outport == %s",
2899 od->localnet_port->json_key);
2900 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110,
2901 ds_cstr(&match_in), "next;");
2902 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110,
2903 ds_cstr(&match_out), "next;");
2904
2905 ds_destroy(&match_in);
2906 ds_destroy(&match_out);
2907 }
2908
2909 /* Ingress and Egress Pre-ACL Table (Priority 110).
2910 *
2911 * Not to do conntrack on ND and ICMP destination
2912 * unreachable packets. */
2913 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110,
2914 "nd || nd_rs || nd_ra || icmp4.type == 3 || "
2915 "icmp6.type == 1 || (tcp && tcp.flags == 4)",
2916 "next;");
2917 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110,
2918 "nd || nd_rs || nd_ra || icmp4.type == 3 || "
2919 "icmp6.type == 1 || (tcp && tcp.flags == 4)",
2920 "next;");
2921
2922 /* Ingress and Egress Pre-ACL Table (Priority 100).
2923 *
2924 * Regardless of whether the ACL is "from-lport" or "to-lport",
2925 * we need rules in both the ingress and egress table, because
2926 * the return traffic needs to be followed.
2927 *
2928 * 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send
2929 * it to conntrack for tracking and defragmentation. */
2930 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 100, "ip",
2931 REGBIT_CONNTRACK_DEFRAG" = 1; next;");
2932 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip",
2933 REGBIT_CONNTRACK_DEFRAG" = 1; next;");
2934 }
2935 }
2936
2937 /* For a 'key' of the form "IP:port" or just "IP", sets 'port' and
2938 * 'ip_address'. The caller must free() the memory allocated for
2939 * 'ip_address'. */
2940 static void
2941 ip_address_and_port_from_lb_key(const char *key, char **ip_address,
2942 uint16_t *port, int *addr_family)
2943 {
2944 struct sockaddr_storage ss;
2945 char ip_addr_buf[INET6_ADDRSTRLEN];
2946 char *error;
2947
2948 error = ipv46_parse(key, PORT_OPTIONAL, &ss);
2949 if (error) {
2950 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2951 VLOG_WARN_RL(&rl, "bad ip address or port for load balancer key %s",
2952 key);
2953 free(error);
2954 return;
2955 }
2956
2957 if (ss.ss_family == AF_INET) {
2958 struct sockaddr_in *sin = ALIGNED_CAST(struct sockaddr_in *, &ss);
2959 *port = sin->sin_port == 0 ? 0 : ntohs(sin->sin_port);
2960 inet_ntop(AF_INET, &sin->sin_addr, ip_addr_buf, sizeof ip_addr_buf);
2961 } else {
2962 struct sockaddr_in6 *sin6 = ALIGNED_CAST(struct sockaddr_in6 *, &ss);
2963 *port = sin6->sin6_port == 0 ? 0 : ntohs(sin6->sin6_port);
2964 inet_ntop(AF_INET6, &sin6->sin6_addr, ip_addr_buf, sizeof ip_addr_buf);
2965 }
2966
2967 *ip_address = xstrdup(ip_addr_buf);
2968 *addr_family = ss.ss_family;
2969 }
2970
2971 /*
2972 * Returns true if logical switch is configured with DNS records, false
2973 * otherwise.
2974 */
2975 static bool
2976 ls_has_dns_records(const struct nbrec_logical_switch *nbs)
2977 {
2978 for (size_t i = 0; i < nbs->n_dns_records; i++) {
2979 if (!smap_is_empty(&nbs->dns_records[i]->records)) {
2980 return true;
2981 }
2982 }
2983
2984 return false;
2985 }
2986
2987 static void
2988 build_pre_lb(struct ovn_datapath *od, struct hmap *lflows)
2989 {
2990 /* Allow all packets to go to next tables by default. */
2991 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 0, "1", "next;");
2992 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 0, "1", "next;");
2993
2994 struct sset all_ips = SSET_INITIALIZER(&all_ips);
2995 bool vip_configured = false;
2996 int addr_family = AF_INET;
2997 for (int i = 0; i < od->nbs->n_load_balancer; i++) {
2998 struct nbrec_load_balancer *lb = od->nbs->load_balancer[i];
2999 struct smap *vips = &lb->vips;
3000 struct smap_node *node;
3001
3002 SMAP_FOR_EACH (node, vips) {
3003 vip_configured = true;
3004
3005 /* node->key contains IP:port or just IP. */
3006 char *ip_address = NULL;
3007 uint16_t port;
3008 ip_address_and_port_from_lb_key(node->key, &ip_address, &port,
3009 &addr_family);
3010 if (!ip_address) {
3011 continue;
3012 }
3013
3014 if (!sset_contains(&all_ips, ip_address)) {
3015 sset_add(&all_ips, ip_address);
3016 }
3017
3018 free(ip_address);
3019
3020 /* Ignore L4 port information in the key because fragmented packets
3021 * may not have L4 information. The pre-stateful table will send
3022 * the packet through ct() action to de-fragment. In stateful
3023 * table, we will eventually look at L4 information. */
3024 }
3025 }
3026
3027 /* 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send
3028 * packet to conntrack for defragmentation. */
3029 const char *ip_address;
3030 SSET_FOR_EACH(ip_address, &all_ips) {
3031 char *match;
3032
3033 if (addr_family == AF_INET) {
3034 match = xasprintf("ip && ip4.dst == %s", ip_address);
3035 } else {
3036 match = xasprintf("ip && ip6.dst == %s", ip_address);
3037 }
3038 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB,
3039 100, match, REGBIT_CONNTRACK_DEFRAG" = 1; next;");
3040 free(match);
3041 }
3042
3043 sset_destroy(&all_ips);
3044
3045 if (vip_configured) {
3046 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB,
3047 100, "ip", REGBIT_CONNTRACK_DEFRAG" = 1; next;");
3048 }
3049 }
3050
3051 static void
3052 build_pre_stateful(struct ovn_datapath *od, struct hmap *lflows)
3053 {
3054 /* Ingress and Egress pre-stateful Table (Priority 0): Packets are
3055 * allowed by default. */
3056 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 0, "1", "next;");
3057 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 0, "1", "next;");
3058
3059 /* If REGBIT_CONNTRACK_DEFRAG is set as 1, then the packets should be
3060 * sent to conntrack for tracking and defragmentation. */
3061 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 100,
3062 REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;");
3063 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 100,
3064 REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;");
3065 }
3066
3067 static void
3068 build_acl_log(struct ds *actions, const struct nbrec_acl *acl)
3069 {
3070 if (!acl->log) {
3071 return;
3072 }
3073
3074 ds_put_cstr(actions, "log(");
3075
3076 if (acl->name) {
3077 ds_put_format(actions, "name=\"%s\", ", acl->name);
3078 }
3079
3080 /* If a severity level isn't specified, default to "info". */
3081 if (acl->severity) {
3082 ds_put_format(actions, "severity=%s, ", acl->severity);
3083 } else {
3084 ds_put_format(actions, "severity=info, ");
3085 }
3086
3087 if (!strcmp(acl->action, "drop")) {
3088 ds_put_cstr(actions, "verdict=drop, ");
3089 } else if (!strcmp(acl->action, "reject")) {
3090 ds_put_cstr(actions, "verdict=reject, ");
3091 } else if (!strcmp(acl->action, "allow")
3092 || !strcmp(acl->action, "allow-related")) {
3093 ds_put_cstr(actions, "verdict=allow, ");
3094 }
3095
3096 ds_chomp(actions, ' ');
3097 ds_chomp(actions, ',');
3098 ds_put_cstr(actions, "); ");
3099 }
3100
3101 static void
3102 build_reject_acl_rules(struct ovn_datapath *od, struct hmap *lflows,
3103 enum ovn_stage stage, struct nbrec_acl *acl,
3104 struct ds *extra_match, struct ds *extra_actions)
3105 {
3106 struct ds match = DS_EMPTY_INITIALIZER;
3107 struct ds actions = DS_EMPTY_INITIALIZER;
3108 bool ingress = (stage == S_SWITCH_IN_ACL);
3109
3110 /* TCP */
3111 build_acl_log(&actions, acl);
3112 if (extra_match->length > 0) {
3113 ds_put_format(&match, "(%s) && ", extra_match->string);
3114 }
3115 ds_put_format(&match, "ip4 && tcp && (%s)", acl->match);
3116 ds_put_format(&actions, "reg0 = 0; "
3117 "eth.dst <-> eth.src; ip4.dst <-> ip4.src; "
3118 "tcp_reset { outport <-> inport; %s };",
3119 ingress ? "output;" : "next(pipeline=ingress,table=0);");
3120 ovn_lflow_add(lflows, od, stage, acl->priority + OVN_ACL_PRI_OFFSET + 10,
3121 ds_cstr(&match), ds_cstr(&actions));
3122 ds_clear(&match);
3123 ds_clear(&actions);
3124 build_acl_log(&actions, acl);
3125 if (extra_match->length > 0) {
3126 ds_put_format(&match, "(%s) && ", extra_match->string);
3127 }
3128 ds_put_format(&match, "ip6 && tcp && (%s)", acl->match);
3129 ds_put_format(&actions, "reg0 = 0; "
3130 "eth.dst <-> eth.src; ip6.dst <-> ip6.src; "
3131 "tcp_reset { outport <-> inport; %s };",
3132 ingress ? "output;" : "next(pipeline=ingress,table=0);");
3133 ovn_lflow_add(lflows, od, stage, acl->priority + OVN_ACL_PRI_OFFSET + 10,
3134 ds_cstr(&match), ds_cstr(&actions));
3135
3136 /* IP traffic */
3137 ds_clear(&match);
3138 ds_clear(&actions);
3139 build_acl_log(&actions, acl);
3140 if (extra_match->length > 0) {
3141 ds_put_format(&match, "(%s) && ", extra_match->string);
3142 }
3143 ds_put_format(&match, "ip4 && (%s)", acl->match);
3144 if (extra_actions->length > 0) {
3145 ds_put_format(&actions, "%s ", extra_actions->string);
3146 }
3147 ds_put_format(&actions, "reg0 = 0; "
3148 "eth.dst <-> eth.src; ip4.dst <-> ip4.src; "
3149 "icmp4 { outport <-> inport; %s };",
3150 ingress ? "output;" : "next(pipeline=ingress,table=0);");
3151 ovn_lflow_add(lflows, od, stage, acl->priority + OVN_ACL_PRI_OFFSET,
3152 ds_cstr(&match), ds_cstr(&actions));
3153 ds_clear(&match);
3154 ds_clear(&actions);
3155 build_acl_log(&actions, acl);
3156 if (extra_match->length > 0) {
3157 ds_put_format(&match, "(%s) && ", extra_match->string);
3158 }
3159 ds_put_format(&match, "ip6 && (%s)", acl->match);
3160 if (extra_actions->length > 0) {
3161 ds_put_format(&actions, "%s ", extra_actions->string);
3162 }
3163 ds_put_format(&actions, "reg0 = 0; icmp6 { "
3164 "eth.dst <-> eth.src; ip6.dst <-> ip6.src; "
3165 "outport <-> inport; %s };",
3166 ingress ? "output;" : "next(pipeline=ingress,table=0);");
3167 ovn_lflow_add(lflows, od, stage, acl->priority + OVN_ACL_PRI_OFFSET,
3168 ds_cstr(&match), ds_cstr(&actions));
3169
3170 ds_destroy(&match);
3171 ds_destroy(&actions);
3172 }
3173
3174 static void
3175 build_acls(struct ovn_datapath *od, struct hmap *lflows)
3176 {
3177 bool has_stateful = has_stateful_acl(od);
3178
3179 /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
3180 * default. A related rule at priority 1 is added below if there
3181 * are any stateful ACLs in this datapath. */
3182 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 0, "1", "next;");
3183 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 0, "1", "next;");
3184
3185 if (has_stateful) {
3186 /* Ingress and Egress ACL Table (Priority 1).
3187 *
3188 * By default, traffic is allowed. This is partially handled by
3189 * the Priority 0 ACL flows added earlier, but we also need to
3190 * commit IP flows. This is because, while the initiater's
3191 * direction may not have any stateful rules, the server's may
3192 * and then its return traffic would not have an associated
3193 * conntrack entry and would return "+invalid".
3194 *
3195 * We use "ct_commit" for a connection that is not already known
3196 * by the connection tracker. Once a connection is committed,
3197 * subsequent packets will hit the flow at priority 0 that just
3198 * uses "next;"
3199 *
3200 * We also check for established connections that have ct_label.blocked
3201 * set on them. That's a connection that was disallowed, but is
3202 * now allowed by policy again since it hit this default-allow flow.
3203 * We need to set ct_label.blocked=0 to let the connection continue,
3204 * which will be done by ct_commit() in the "stateful" stage.
3205 * Subsequent packets will hit the flow at priority 0 that just
3206 * uses "next;". */
3207 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 1,
3208 "ip && (!ct.est || (ct.est && ct_label.blocked == 1))",
3209 REGBIT_CONNTRACK_COMMIT" = 1; next;");
3210 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 1,
3211 "ip && (!ct.est || (ct.est && ct_label.blocked == 1))",
3212 REGBIT_CONNTRACK_COMMIT" = 1; next;");
3213
3214 /* Ingress and Egress ACL Table (Priority 65535).
3215 *
3216 * Always drop traffic that's in an invalid state. Also drop
3217 * reply direction packets for connections that have been marked
3218 * for deletion (bit 0 of ct_label is set).
3219 *
3220 * This is enforced at a higher priority than ACLs can be defined. */
3221 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
3222 "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
3223 "drop;");
3224 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
3225 "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
3226 "drop;");
3227
3228 /* Ingress and Egress ACL Table (Priority 65535).
3229 *
3230 * Allow reply traffic that is part of an established
3231 * conntrack entry that has not been marked for deletion
3232 * (bit 0 of ct_label). We only match traffic in the
3233 * reply direction because we want traffic in the request
3234 * direction to hit the currently defined policy from ACLs.
3235 *
3236 * This is enforced at a higher priority than ACLs can be defined. */
3237 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
3238 "ct.est && !ct.rel && !ct.new && !ct.inv "
3239 "&& ct.rpl && ct_label.blocked == 0",
3240 "next;");
3241 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
3242 "ct.est && !ct.rel && !ct.new && !ct.inv "
3243 "&& ct.rpl && ct_label.blocked == 0",
3244 "next;");
3245
3246 /* Ingress and Egress ACL Table (Priority 65535).
3247 *
3248 * Allow traffic that is related to an existing conntrack entry that
3249 * has not been marked for deletion (bit 0 of ct_label).
3250 *
3251 * This is enforced at a higher priority than ACLs can be defined.
3252 *
3253 * NOTE: This does not support related data sessions (eg,
3254 * a dynamically negotiated FTP data channel), but will allow
3255 * related traffic such as an ICMP Port Unreachable through
3256 * that's generated from a non-listening UDP port. */
3257 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
3258 "!ct.est && ct.rel && !ct.new && !ct.inv "
3259 "&& ct_label.blocked == 0",
3260 "next;");
3261 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
3262 "!ct.est && ct.rel && !ct.new && !ct.inv "
3263 "&& ct_label.blocked == 0",
3264 "next;");
3265
3266 /* Ingress and Egress ACL Table (Priority 65535).
3267 *
3268 * Not to do conntrack on ND packets. */
3269 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, "nd", "next;");
3270 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, "nd", "next;");
3271 }
3272
3273 /* Ingress or Egress ACL Table (Various priorities). */
3274 for (size_t i = 0; i < od->nbs->n_acls; i++) {
3275 struct nbrec_acl *acl = od->nbs->acls[i];
3276 bool ingress = !strcmp(acl->direction, "from-lport") ? true :false;
3277 enum ovn_stage stage = ingress ? S_SWITCH_IN_ACL : S_SWITCH_OUT_ACL;
3278
3279 char *stage_hint = xasprintf("%08x", acl->header_.uuid.parts[0]);
3280 if (!strcmp(acl->action, "allow")
3281 || !strcmp(acl->action, "allow-related")) {
3282 /* If there are any stateful flows, we must even commit "allow"
3283 * actions. This is because, while the initiater's
3284 * direction may not have any stateful rules, the server's
3285 * may and then its return traffic would not have an
3286 * associated conntrack entry and would return "+invalid". */
3287 if (!has_stateful) {
3288 struct ds actions = DS_EMPTY_INITIALIZER;
3289 build_acl_log(&actions, acl);
3290 ds_put_cstr(&actions, "next;");
3291 ovn_lflow_add_with_hint(lflows, od, stage,
3292 acl->priority + OVN_ACL_PRI_OFFSET,
3293 acl->match, ds_cstr(&actions),
3294 stage_hint);
3295 ds_destroy(&actions);
3296 } else {
3297 struct ds match = DS_EMPTY_INITIALIZER;
3298 struct ds actions = DS_EMPTY_INITIALIZER;
3299
3300 /* Commit the connection tracking entry if it's a new
3301 * connection that matches this ACL. After this commit,
3302 * the reply traffic is allowed by a flow we create at
3303 * priority 65535, defined earlier.
3304 *
3305 * It's also possible that a known connection was marked for
3306 * deletion after a policy was deleted, but the policy was
3307 * re-added while that connection is still known. We catch
3308 * that case here and un-set ct_label.blocked (which will be done
3309 * by ct_commit in the "stateful" stage) to indicate that the
3310 * connection should be allowed to resume.
3311 */
3312 ds_put_format(&match, "((ct.new && !ct.est)"
3313 " || (!ct.new && ct.est && !ct.rpl "
3314 "&& ct_label.blocked == 1)) "
3315 "&& (%s)", acl->match);
3316 ds_put_cstr(&actions, REGBIT_CONNTRACK_COMMIT" = 1; ");
3317 build_acl_log(&actions, acl);
3318 ds_put_cstr(&actions, "next;");
3319 ovn_lflow_add_with_hint(lflows, od, stage,
3320 acl->priority + OVN_ACL_PRI_OFFSET,
3321 ds_cstr(&match),
3322 ds_cstr(&actions),
3323 stage_hint);
3324
3325 /* Match on traffic in the request direction for an established
3326 * connection tracking entry that has not been marked for
3327 * deletion. There is no need to commit here, so we can just
3328 * proceed to the next table. We use this to ensure that this
3329 * connection is still allowed by the currently defined
3330 * policy. */
3331 ds_clear(&match);
3332 ds_clear(&actions);
3333 ds_put_format(&match,
3334 "!ct.new && ct.est && !ct.rpl"
3335 " && ct_label.blocked == 0 && (%s)",
3336 acl->match);
3337
3338 build_acl_log(&actions, acl);
3339 ds_put_cstr(&actions, "next;");
3340 ovn_lflow_add_with_hint(lflows, od, stage,
3341 acl->priority + OVN_ACL_PRI_OFFSET,
3342 ds_cstr(&match), ds_cstr(&actions),
3343 stage_hint);
3344
3345 ds_destroy(&match);
3346 ds_destroy(&actions);
3347 }
3348 } else if (!strcmp(acl->action, "drop")
3349 || !strcmp(acl->action, "reject")) {
3350 struct ds match = DS_EMPTY_INITIALIZER;
3351 struct ds actions = DS_EMPTY_INITIALIZER;
3352
3353 /* The implementation of "drop" differs if stateful ACLs are in
3354 * use for this datapath. In that case, the actions differ
3355 * depending on whether the connection was previously committed
3356 * to the connection tracker with ct_commit. */
3357 if (has_stateful) {
3358 /* If the packet is not part of an established connection, then
3359 * we can simply reject/drop it. */
3360 ds_put_cstr(&match,
3361 "(!ct.est || (ct.est && ct_label.blocked == 1))");
3362 if (!strcmp(acl->action, "reject")) {
3363 build_reject_acl_rules(od, lflows, stage, acl, &match,
3364 &actions);
3365 } else {
3366 ds_put_format(&match, " && (%s)", acl->match);
3367 build_acl_log(&actions, acl);
3368 ds_put_cstr(&actions, "/* drop */");
3369 ovn_lflow_add(lflows, od, stage,
3370 acl->priority + OVN_ACL_PRI_OFFSET,
3371 ds_cstr(&match), ds_cstr(&actions));
3372 }
3373 /* For an existing connection without ct_label set, we've
3374 * encountered a policy change. ACLs previously allowed
3375 * this connection and we committed the connection tracking
3376 * entry. Current policy says that we should drop this
3377 * connection. First, we set bit 0 of ct_label to indicate
3378 * that this connection is set for deletion. By not
3379 * specifying "next;", we implicitly drop the packet after
3380 * updating conntrack state. We would normally defer
3381 * ct_commit() to the "stateful" stage, but since we're
3382 * rejecting/dropping the packet, we go ahead and do it here.
3383 */
3384 ds_clear(&match);
3385 ds_clear(&actions);
3386 ds_put_cstr(&match, "ct.est && ct_label.blocked == 0");
3387 ds_put_cstr(&actions, "ct_commit(ct_label=1/1); ");
3388 if (!strcmp(acl->action, "reject")) {
3389 build_reject_acl_rules(od, lflows, stage, acl, &match,
3390 &actions);
3391 } else {
3392 ds_put_format(&match, " && (%s)", acl->match);
3393 build_acl_log(&actions, acl);
3394 ds_put_cstr(&actions, "/* drop */");
3395 ovn_lflow_add(lflows, od, stage,
3396 acl->priority + OVN_ACL_PRI_OFFSET,
3397 ds_cstr(&match), ds_cstr(&actions));
3398 }
3399 } else {
3400 /* There are no stateful ACLs in use on this datapath,
3401 * so a "reject/drop" ACL is simply the "reject/drop"
3402 * logical flow action in all cases. */
3403 if (!strcmp(acl->action, "reject")) {
3404 build_reject_acl_rules(od, lflows, stage, acl, &match,
3405 &actions);
3406 } else {
3407 build_acl_log(&actions, acl);
3408 ds_put_cstr(&actions, "/* drop */");
3409 ovn_lflow_add(lflows, od, stage,
3410 acl->priority + OVN_ACL_PRI_OFFSET,
3411 acl->match, ds_cstr(&actions));
3412 }
3413 }
3414 ds_destroy(&match);
3415 ds_destroy(&actions);
3416 }
3417 free(stage_hint);
3418 }
3419
3420 /* Add 34000 priority flow to allow DHCP reply from ovn-controller to all
3421 * logical ports of the datapath if the CMS has configured DHCPv4 options.
3422 * */
3423 for (size_t i = 0; i < od->nbs->n_ports; i++) {
3424 if (od->nbs->ports[i]->dhcpv4_options) {
3425 const char *server_id = smap_get(
3426 &od->nbs->ports[i]->dhcpv4_options->options, "server_id");
3427 const char *server_mac = smap_get(
3428 &od->nbs->ports[i]->dhcpv4_options->options, "server_mac");
3429 const char *lease_time = smap_get(
3430 &od->nbs->ports[i]->dhcpv4_options->options, "lease_time");
3431 if (server_id && server_mac && lease_time) {
3432 struct ds match = DS_EMPTY_INITIALIZER;
3433 const char *actions =
3434 has_stateful ? "ct_commit; next;" : "next;";
3435 ds_put_format(&match, "outport == \"%s\" && eth.src == %s "
3436 "&& ip4.src == %s && udp && udp.src == 67 "
3437 "&& udp.dst == 68", od->nbs->ports[i]->name,
3438 server_mac, server_id);
3439 ovn_lflow_add(
3440 lflows, od, S_SWITCH_OUT_ACL, 34000, ds_cstr(&match),
3441 actions);
3442 ds_destroy(&match);
3443 }
3444 }
3445
3446 if (od->nbs->ports[i]->dhcpv6_options) {
3447 const char *server_mac = smap_get(
3448 &od->nbs->ports[i]->dhcpv6_options->options, "server_id");
3449 struct eth_addr ea;
3450 if (server_mac && eth_addr_from_string(server_mac, &ea)) {
3451 /* Get the link local IP of the DHCPv6 server from the
3452 * server MAC. */
3453 struct in6_addr lla;
3454 in6_generate_lla(ea, &lla);
3455
3456 char server_ip[INET6_ADDRSTRLEN + 1];
3457 ipv6_string_mapped(server_ip, &lla);
3458
3459 struct ds match = DS_EMPTY_INITIALIZER;
3460 const char *actions = has_stateful ? "ct_commit; next;" :
3461 "next;";
3462 ds_put_format(&match, "outport == \"%s\" && eth.src == %s "
3463 "&& ip6.src == %s && udp && udp.src == 547 "
3464 "&& udp.dst == 546", od->nbs->ports[i]->name,
3465 server_mac, server_ip);
3466 ovn_lflow_add(
3467 lflows, od, S_SWITCH_OUT_ACL, 34000, ds_cstr(&match),
3468 actions);
3469 ds_destroy(&match);
3470 }
3471 }
3472 }
3473
3474 /* Add a 34000 priority flow to advance the DNS reply from ovn-controller,
3475 * if the CMS has configured DNS records for the datapath.
3476 */
3477 if (ls_has_dns_records(od->nbs)) {
3478 const char *actions = has_stateful ? "ct_commit; next;" : "next;";
3479 ovn_lflow_add(
3480 lflows, od, S_SWITCH_OUT_ACL, 34000, "udp.src == 53",
3481 actions);
3482 }
3483 }
3484
3485 static void
3486 build_qos(struct ovn_datapath *od, struct hmap *lflows) {
3487 ovn_lflow_add(lflows, od, S_SWITCH_IN_QOS_MARK, 0, "1", "next;");
3488 ovn_lflow_add(lflows, od, S_SWITCH_OUT_QOS_MARK, 0, "1", "next;");
3489 ovn_lflow_add(lflows, od, S_SWITCH_IN_QOS_METER, 0, "1", "next;");
3490 ovn_lflow_add(lflows, od, S_SWITCH_OUT_QOS_METER, 0, "1", "next;");
3491
3492 for (size_t i = 0; i < od->nbs->n_qos_rules; i++) {
3493 struct nbrec_qos *qos = od->nbs->qos_rules[i];
3494 bool ingress = !strcmp(qos->direction, "from-lport") ? true :false;
3495 enum ovn_stage stage = ingress ? S_SWITCH_IN_QOS_MARK : S_SWITCH_OUT_QOS_MARK;
3496 int64_t rate = 0;
3497 int64_t burst = 0;
3498
3499 for (size_t j = 0; j < qos->n_action; j++) {
3500 if (!strcmp(qos->key_action[j], "dscp")) {
3501 struct ds dscp_action = DS_EMPTY_INITIALIZER;
3502
3503 ds_put_format(&dscp_action, "ip.dscp = %"PRId64"; next;",
3504 qos->value_action[j]);
3505 ovn_lflow_add(lflows, od, stage,
3506 qos->priority,
3507 qos->match, ds_cstr(&dscp_action));
3508 ds_destroy(&dscp_action);
3509 }
3510 }
3511
3512 for (size_t n = 0; n < qos->n_bandwidth; n++) {
3513 if (!strcmp(qos->key_bandwidth[n], "rate")) {
3514 rate = qos->value_bandwidth[n];
3515 } else if (!strcmp(qos->key_bandwidth[n], "burst")) {
3516 burst = qos->value_bandwidth[n];
3517 }
3518 }
3519 if (rate) {
3520 struct ds meter_action = DS_EMPTY_INITIALIZER;
3521 stage = ingress ? S_SWITCH_IN_QOS_METER : S_SWITCH_OUT_QOS_METER;
3522 if (burst) {
3523 ds_put_format(&meter_action,
3524 "set_meter(%"PRId64", %"PRId64"); next;",
3525 rate, burst);
3526 } else {
3527 ds_put_format(&meter_action,
3528 "set_meter(%"PRId64"); next;",
3529 rate);
3530 }
3531
3532 /* Ingress and Egress QoS Meter Table.
3533 *
3534 * We limit the bandwidth of this flow by adding a meter table.
3535 */
3536 ovn_lflow_add(lflows, od, stage,
3537 qos->priority,
3538 qos->match, ds_cstr(&meter_action));
3539 ds_destroy(&meter_action);
3540 }
3541 }
3542 }
3543
3544 static void
3545 build_lb(struct ovn_datapath *od, struct hmap *lflows)
3546 {
3547 /* Ingress and Egress LB Table (Priority 0): Packets are allowed by
3548 * default. */
3549 ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, 0, "1", "next;");
3550 ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, 0, "1", "next;");
3551
3552 if (od->nbs->load_balancer) {
3553 /* Ingress and Egress LB Table (Priority 65535).
3554 *
3555 * Send established traffic through conntrack for just NAT. */
3556 ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, UINT16_MAX,
3557 "ct.est && !ct.rel && !ct.new && !ct.inv",
3558 REGBIT_CONNTRACK_NAT" = 1; next;");
3559 ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, UINT16_MAX,
3560 "ct.est && !ct.rel && !ct.new && !ct.inv",
3561 REGBIT_CONNTRACK_NAT" = 1; next;");
3562 }
3563 }
3564
3565 static void
3566 build_stateful(struct ovn_datapath *od, struct hmap *lflows)
3567 {
3568 /* Ingress and Egress stateful Table (Priority 0): Packets are
3569 * allowed by default. */
3570 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 0, "1", "next;");
3571 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 0, "1", "next;");
3572
3573 /* If REGBIT_CONNTRACK_COMMIT is set as 1, then the packets should be
3574 * committed to conntrack. We always set ct_label.blocked to 0 here as
3575 * any packet that makes it this far is part of a connection we
3576 * want to allow to continue. */
3577 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100,
3578 REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit(ct_label=0/1); next;");
3579 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100,
3580 REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit(ct_label=0/1); next;");
3581
3582 /* If REGBIT_CONNTRACK_NAT is set as 1, then packets should just be sent
3583 * through nat (without committing).
3584 *
3585 * REGBIT_CONNTRACK_COMMIT is set for new connections and
3586 * REGBIT_CONNTRACK_NAT is set for established connections. So they
3587 * don't overlap.
3588 */
3589 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100,
3590 REGBIT_CONNTRACK_NAT" == 1", "ct_lb;");
3591 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100,
3592 REGBIT_CONNTRACK_NAT" == 1", "ct_lb;");
3593
3594 /* Load balancing rules for new connections get committed to conntrack
3595 * table. So even if REGBIT_CONNTRACK_COMMIT is set in a previous table
3596 * a higher priority rule for load balancing below also commits the
3597 * connection, so it is okay if we do not hit the above match on
3598 * REGBIT_CONNTRACK_COMMIT. */
3599 for (int i = 0; i < od->nbs->n_load_balancer; i++) {
3600 struct nbrec_load_balancer *lb = od->nbs->load_balancer[i];
3601 struct smap *vips = &lb->vips;
3602 struct smap_node *node;
3603
3604 SMAP_FOR_EACH (node, vips) {
3605 uint16_t port = 0;
3606 int addr_family;
3607
3608 /* node->key contains IP:port or just IP. */
3609 char *ip_address = NULL;
3610 ip_address_and_port_from_lb_key(node->key, &ip_address, &port,
3611 &addr_family);
3612 if (!ip_address) {
3613 continue;
3614 }
3615
3616 /* New connections in Ingress table. */
3617 char *action = xasprintf("ct_lb(%s);", node->value);
3618 struct ds match = DS_EMPTY_INITIALIZER;
3619 if (addr_family == AF_INET) {
3620 ds_put_format(&match, "ct.new && ip4.dst == %s", ip_address);
3621 } else {
3622 ds_put_format(&match, "ct.new && ip6.dst == %s", ip_address);
3623 }
3624 if (port) {
3625 if (lb->protocol && !strcmp(lb->protocol, "udp")) {
3626 ds_put_format(&match, " && udp.dst == %d", port);
3627 } else {
3628 ds_put_format(&match, " && tcp.dst == %d", port);
3629 }
3630 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL,
3631 120, ds_cstr(&match), action);
3632 } else {
3633 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL,
3634 110, ds_cstr(&match), action);
3635 }
3636
3637 free(ip_address);
3638 ds_destroy(&match);
3639 free(action);
3640 }
3641 }
3642 }
3643
3644 static void
3645 build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
3646 struct hmap *lflows, struct hmap *mcgroups)
3647 {
3648 /* This flow table structure is documented in ovn-northd(8), so please
3649 * update ovn-northd.8.xml if you change anything. */
3650
3651 struct ds match = DS_EMPTY_INITIALIZER;
3652 struct ds actions = DS_EMPTY_INITIALIZER;
3653
3654 /* Build pre-ACL and ACL tables for both ingress and egress.
3655 * Ingress tables 3 through 10. Egress tables 0 through 7. */
3656 struct ovn_datapath *od;
3657 HMAP_FOR_EACH (od, key_node, datapaths) {
3658 if (!od->nbs) {
3659 continue;
3660 }
3661
3662 build_pre_acls(od, lflows);
3663 build_pre_lb(od, lflows);
3664 build_pre_stateful(od, lflows);
3665 build_acls(od, lflows);
3666 build_qos(od, lflows);
3667 build_lb(od, lflows);
3668 build_stateful(od, lflows);
3669 }
3670
3671 /* Logical switch ingress table 0: Admission control framework (priority
3672 * 100). */
3673 HMAP_FOR_EACH (od, key_node, datapaths) {
3674 if (!od->nbs) {
3675 continue;
3676 }
3677
3678 /* Logical VLANs not supported. */
3679 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "vlan.present",
3680 "drop;");
3681
3682 /* Broadcast/multicast source address is invalid. */
3683 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "eth.src[40]",
3684 "drop;");
3685
3686 /* Port security flows have priority 50 (see below) and will continue
3687 * to the next table if packet source is acceptable. */
3688 }
3689
3690 /* Logical switch ingress table 0: Ingress port security - L2
3691 * (priority 50).
3692 * Ingress table 1: Ingress port security - IP (priority 90 and 80)
3693 * Ingress table 2: Ingress port security - ND (priority 90 and 80)
3694 */
3695 struct ovn_port *op;
3696 HMAP_FOR_EACH (op, key_node, ports) {
3697 if (!op->nbsp) {
3698 continue;
3699 }
3700
3701 if (!lsp_is_enabled(op->nbsp)) {
3702 /* Drop packets from disabled logical ports (since logical flow
3703 * tables are default-drop). */
3704 continue;
3705 }
3706
3707 ds_clear(&match);
3708 ds_clear(&actions);
3709 ds_put_format(&match, "inport == %s", op->json_key);
3710 build_port_security_l2("eth.src", op->ps_addrs, op->n_ps_addrs,
3711 &match);
3712
3713 const char *queue_id = smap_get(&op->sb->options, "qdisc_queue_id");
3714 if (queue_id) {
3715 ds_put_format(&actions, "set_queue(%s); ", queue_id);
3716 }
3717 ds_put_cstr(&actions, "next;");
3718 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_L2, 50,
3719 ds_cstr(&match), ds_cstr(&actions));
3720
3721 if (op->nbsp->n_port_security) {
3722 build_port_security_ip(P_IN, op, lflows);
3723 build_port_security_nd(op, lflows);
3724 }
3725 }
3726
3727 /* Ingress table 1 and 2: Port security - IP and ND, by default goto next.
3728 * (priority 0)*/
3729 HMAP_FOR_EACH (od, key_node, datapaths) {
3730 if (!od->nbs) {
3731 continue;
3732 }
3733
3734 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_ND, 0, "1", "next;");
3735 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_IP, 0, "1", "next;");
3736 }
3737
3738 /* Ingress table 11: ARP/ND responder, skip requests coming from localnet
3739 * and vtep ports. (priority 100); see ovn-northd.8.xml for the
3740 * rationale. */
3741 HMAP_FOR_EACH (op, key_node, ports) {
3742 if (!op->nbsp) {
3743 continue;
3744 }
3745
3746 if ((!strcmp(op->nbsp->type, "localnet")) ||
3747 (!strcmp(op->nbsp->type, "vtep"))) {
3748 ds_clear(&match);
3749 ds_put_format(&match, "inport == %s", op->json_key);
3750 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100,
3751 ds_cstr(&match), "next;");
3752 }
3753 }
3754
3755 /* Ingress table 11: ARP/ND responder, reply for known IPs.
3756 * (priority 50). */
3757 HMAP_FOR_EACH (op, key_node, ports) {
3758 if (!op->nbsp) {
3759 continue;
3760 }
3761
3762 /*
3763 * Add ARP/ND reply flows if either the
3764 * - port is up or
3765 * - port type is router or
3766 * - port type is localport
3767 */
3768 if (!lsp_is_up(op->nbsp) && strcmp(op->nbsp->type, "router") &&
3769 strcmp(op->nbsp->type, "localport")) {
3770 continue;
3771 }
3772
3773 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
3774 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
3775 ds_clear(&match);
3776 ds_put_format(&match, "arp.tpa == %s && arp.op == 1",
3777 op->lsp_addrs[i].ipv4_addrs[j].addr_s);
3778 ds_clear(&actions);
3779 ds_put_format(&actions,
3780 "eth.dst = eth.src; "
3781 "eth.src = %s; "
3782 "arp.op = 2; /* ARP reply */ "
3783 "arp.tha = arp.sha; "
3784 "arp.sha = %s; "
3785 "arp.tpa = arp.spa; "
3786 "arp.spa = %s; "
3787 "outport = inport; "
3788 "flags.loopback = 1; "
3789 "output;",
3790 op->lsp_addrs[i].ea_s, op->lsp_addrs[i].ea_s,
3791 op->lsp_addrs[i].ipv4_addrs[j].addr_s);
3792 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50,
3793 ds_cstr(&match), ds_cstr(&actions));
3794
3795 /* Do not reply to an ARP request from the port that owns the
3796 * address (otherwise a DHCP client that ARPs to check for a
3797 * duplicate address will fail). Instead, forward it the usual
3798 * way.
3799 *
3800 * (Another alternative would be to simply drop the packet. If
3801 * everything is working as it is configured, then this would
3802 * produce equivalent results, since no one should reply to the
3803 * request. But ARPing for one's own IP address is intended to
3804 * detect situations where the network is not working as
3805 * configured, so dropping the request would frustrate that
3806 * intent.) */
3807 ds_put_format(&match, " && inport == %s", op->json_key);
3808 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100,
3809 ds_cstr(&match), "next;");
3810 }
3811
3812 /* For ND solicitations, we need to listen for both the
3813 * unicast IPv6 address and its all-nodes multicast address,
3814 * but always respond with the unicast IPv6 address. */
3815 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
3816 ds_clear(&match);
3817 ds_put_format(&match,
3818 "nd_ns && ip6.dst == {%s, %s} && nd.target == %s",
3819 op->lsp_addrs[i].ipv6_addrs[j].addr_s,
3820 op->lsp_addrs[i].ipv6_addrs[j].sn_addr_s,
3821 op->lsp_addrs[i].ipv6_addrs[j].addr_s);
3822
3823 ds_clear(&actions);
3824 ds_put_format(&actions,
3825 "nd_na { "
3826 "eth.src = %s; "
3827 "ip6.src = %s; "
3828 "nd.target = %s; "
3829 "nd.tll = %s; "
3830 "outport = inport; "
3831 "flags.loopback = 1; "
3832 "output; "
3833 "};",
3834 op->lsp_addrs[i].ea_s,
3835 op->lsp_addrs[i].ipv6_addrs[j].addr_s,
3836 op->lsp_addrs[i].ipv6_addrs[j].addr_s,
3837 op->lsp_addrs[i].ea_s);
3838 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50,
3839 ds_cstr(&match), ds_cstr(&actions));
3840
3841 /* Do not reply to a solicitation from the port that owns the
3842 * address (otherwise DAD detection will fail). */
3843 ds_put_format(&match, " && inport == %s", op->json_key);
3844 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100,
3845 ds_cstr(&match), "next;");
3846 }
3847 }
3848 }
3849
3850 /* Ingress table 11: ARP/ND responder, by default goto next.
3851 * (priority 0)*/
3852 HMAP_FOR_EACH (od, key_node, datapaths) {
3853 if (!od->nbs) {
3854 continue;
3855 }
3856
3857 ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_ND_RSP, 0, "1", "next;");
3858 }
3859
3860 /* Logical switch ingress table 12 and 13: DHCP options and response
3861 * priority 100 flows. */
3862 HMAP_FOR_EACH (op, key_node, ports) {
3863 if (!op->nbsp) {
3864 continue;
3865 }
3866
3867 if (!lsp_is_enabled(op->nbsp) || !strcmp(op->nbsp->type, "router")) {
3868 /* Don't add the DHCP flows if the port is not enabled or if the
3869 * port is a router port. */
3870 continue;
3871 }
3872
3873 if (!op->nbsp->dhcpv4_options && !op->nbsp->dhcpv6_options) {
3874 /* CMS has disabled both native DHCPv4 and DHCPv6 for this lport.
3875 */
3876 continue;
3877 }
3878
3879 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
3880 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
3881 struct ds options_action = DS_EMPTY_INITIALIZER;
3882 struct ds response_action = DS_EMPTY_INITIALIZER;
3883 struct ds ipv4_addr_match = DS_EMPTY_INITIALIZER;
3884 if (build_dhcpv4_action(
3885 op, op->lsp_addrs[i].ipv4_addrs[j].addr,
3886 &options_action, &response_action, &ipv4_addr_match)) {
3887 ds_clear(&match);
3888 ds_put_format(
3889 &match, "inport == %s && eth.src == %s && "
3890 "ip4.src == 0.0.0.0 && ip4.dst == 255.255.255.255 && "
3891 "udp.src == 68 && udp.dst == 67", op->json_key,
3892 op->lsp_addrs[i].ea_s);
3893
3894 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS,
3895 100, ds_cstr(&match),
3896 ds_cstr(&options_action));
3897 ds_clear(&match);
3898 /* Allow ip4.src = OFFER_IP and
3899 * ip4.dst = {SERVER_IP, 255.255.255.255} for the below
3900 * cases
3901 * - When the client wants to renew the IP by sending
3902 * the DHCPREQUEST to the server ip.
3903 * - When the client wants to renew the IP by
3904 * broadcasting the DHCPREQUEST.
3905 */
3906 ds_put_format(
3907 &match, "inport == %s && eth.src == %s && "
3908 "%s && udp.src == 68 && udp.dst == 67", op->json_key,
3909 op->lsp_addrs[i].ea_s, ds_cstr(&ipv4_addr_match));
3910
3911 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS,
3912 100, ds_cstr(&match),
3913 ds_cstr(&options_action));
3914 ds_clear(&match);
3915
3916 /* If REGBIT_DHCP_OPTS_RESULT is set, it means the
3917 * put_dhcp_opts action is successful. */
3918 ds_put_format(
3919 &match, "inport == %s && eth.src == %s && "
3920 "ip4 && udp.src == 68 && udp.dst == 67"
3921 " && "REGBIT_DHCP_OPTS_RESULT, op->json_key,
3922 op->lsp_addrs[i].ea_s);
3923 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_RESPONSE,
3924 100, ds_cstr(&match),
3925 ds_cstr(&response_action));
3926 ds_destroy(&options_action);
3927 ds_destroy(&response_action);
3928 ds_destroy(&ipv4_addr_match);
3929 break;
3930 }
3931 }
3932
3933 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
3934 struct ds options_action = DS_EMPTY_INITIALIZER;
3935 struct ds response_action = DS_EMPTY_INITIALIZER;
3936 if (build_dhcpv6_action(
3937 op, &op->lsp_addrs[i].ipv6_addrs[j].addr,
3938 &options_action, &response_action)) {
3939 ds_clear(&match);
3940 ds_put_format(
3941 &match, "inport == %s && eth.src == %s"
3942 " && ip6.dst == ff02::1:2 && udp.src == 546 &&"
3943 " udp.dst == 547", op->json_key,
3944 op->lsp_addrs[i].ea_s);
3945
3946 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS, 100,
3947 ds_cstr(&match), ds_cstr(&options_action));
3948
3949 /* If REGBIT_DHCP_OPTS_RESULT is set to 1, it means the
3950 * put_dhcpv6_opts action is successful */
3951 ds_put_cstr(&match, " && "REGBIT_DHCP_OPTS_RESULT);
3952 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_RESPONSE, 100,
3953 ds_cstr(&match), ds_cstr(&response_action));
3954 ds_destroy(&options_action);
3955 ds_destroy(&response_action);
3956 break;
3957 }
3958 }
3959 }
3960 }
3961
3962 /* Logical switch ingress table 14 and 15: DNS lookup and response
3963 * priority 100 flows.
3964 */
3965 HMAP_FOR_EACH (od, key_node, datapaths) {
3966 if (!od->nbs || !ls_has_dns_records(od->nbs)) {
3967 continue;
3968 }
3969
3970 struct ds action = DS_EMPTY_INITIALIZER;
3971
3972 ds_clear(&match);
3973 ds_put_cstr(&match, "udp.dst == 53");
3974 ds_put_format(&action,
3975 REGBIT_DNS_LOOKUP_RESULT" = dns_lookup(); next;");
3976 ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_LOOKUP, 100,
3977 ds_cstr(&match), ds_cstr(&action));
3978 ds_clear(&action);
3979 ds_put_cstr(&match, " && "REGBIT_DNS_LOOKUP_RESULT);
3980 ds_put_format(&action, "eth.dst <-> eth.src; ip4.src <-> ip4.dst; "
3981 "udp.dst = udp.src; udp.src = 53; outport = inport; "
3982 "flags.loopback = 1; output;");
3983 ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_RESPONSE, 100,
3984 ds_cstr(&match), ds_cstr(&action));
3985 ds_clear(&action);
3986 ds_put_format(&action, "eth.dst <-> eth.src; ip6.src <-> ip6.dst; "
3987 "udp.dst = udp.src; udp.src = 53; outport = inport; "
3988 "flags.loopback = 1; output;");
3989 ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_RESPONSE, 100,
3990 ds_cstr(&match), ds_cstr(&action));
3991 ds_destroy(&action);
3992 }
3993
3994 /* Ingress table 12 and 13: DHCP options and response, by default goto
3995 * next. (priority 0).
3996 * Ingress table 14 and 15: DNS lookup and response, by default goto next.
3997 * (priority 0).*/
3998
3999 HMAP_FOR_EACH (od, key_node, datapaths) {
4000 if (!od->nbs) {
4001 continue;
4002 }
4003
4004 ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_OPTIONS, 0, "1", "next;");
4005 ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_RESPONSE, 0, "1", "next;");
4006 ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_LOOKUP, 0, "1", "next;");
4007 ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_RESPONSE, 0, "1", "next;");
4008 }
4009
4010 /* Ingress table 16: Destination lookup, broadcast and multicast handling
4011 * (priority 100). */
4012 HMAP_FOR_EACH (op, key_node, ports) {
4013 if (!op->nbsp) {
4014 continue;
4015 }
4016
4017 if (lsp_is_enabled(op->nbsp)) {
4018 ovn_multicast_add(mcgroups, &mc_flood, op);
4019 }
4020 }
4021 HMAP_FOR_EACH (od, key_node, datapaths) {
4022 if (!od->nbs) {
4023 continue;
4024 }
4025
4026 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 100, "eth.mcast",
4027 "outport = \""MC_FLOOD"\"; output;");
4028 }
4029
4030 /* Ingress table 16: Destination lookup, unicast handling (priority 50), */
4031 HMAP_FOR_EACH (op, key_node, ports) {
4032 if (!op->nbsp) {
4033 continue;
4034 }
4035
4036 for (size_t i = 0; i < op->nbsp->n_addresses; i++) {
4037 /* Addresses are owned by the logical port.
4038 * Ethernet address followed by zero or more IPv4
4039 * or IPv6 addresses (or both). */
4040 struct eth_addr mac;
4041 if (ovs_scan(op->nbsp->addresses[i],
4042 ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) {
4043 ds_clear(&match);
4044 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
4045 ETH_ADDR_ARGS(mac));
4046
4047 ds_clear(&actions);
4048 ds_put_format(&actions, "outport = %s; output;", op->json_key);
4049 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
4050 ds_cstr(&match), ds_cstr(&actions));
4051 } else if (!strcmp(op->nbsp->addresses[i], "unknown")) {
4052 if (lsp_is_enabled(op->nbsp)) {
4053 ovn_multicast_add(mcgroups, &mc_unknown, op);
4054 op->od->has_unknown = true;
4055 }
4056 } else if (is_dynamic_lsp_address(op->nbsp->addresses[i])) {
4057 if (!op->nbsp->dynamic_addresses
4058 || !ovs_scan(op->nbsp->dynamic_addresses,
4059 ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) {
4060 continue;
4061 }
4062 ds_clear(&match);
4063 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
4064 ETH_ADDR_ARGS(mac));
4065
4066 ds_clear(&actions);
4067 ds_put_format(&actions, "outport = %s; output;", op->json_key);
4068 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
4069 ds_cstr(&match), ds_cstr(&actions));
4070 } else if (!strcmp(op->nbsp->addresses[i], "router")) {
4071 if (!op->peer || !op->peer->nbrp
4072 || !ovs_scan(op->peer->nbrp->mac,
4073 ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) {
4074 continue;
4075 }
4076 ds_clear(&match);
4077 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
4078 ETH_ADDR_ARGS(mac));
4079 if (op->peer->od->l3dgw_port
4080 && op->peer == op->peer->od->l3dgw_port
4081 && op->peer->od->l3redirect_port) {
4082 /* The destination lookup flow for the router's
4083 * distributed gateway port MAC address should only be
4084 * programmed on the "redirect-chassis". */
4085 ds_put_format(&match, " && is_chassis_resident(%s)",
4086 op->peer->od->l3redirect_port->json_key);
4087 }
4088
4089 ds_clear(&actions);
4090 ds_put_format(&actions, "outport = %s; output;", op->json_key);
4091 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
4092 ds_cstr(&match), ds_cstr(&actions));
4093
4094 /* Add ethernet addresses specified in NAT rules on
4095 * distributed logical routers. */
4096 if (op->peer->od->l3dgw_port
4097 && op->peer == op->peer->od->l3dgw_port) {
4098 for (int j = 0; j < op->peer->od->nbr->n_nat; j++) {
4099 const struct nbrec_nat *nat
4100 = op->peer->od->nbr->nat[j];
4101 if (!strcmp(nat->type, "dnat_and_snat")
4102 && nat->logical_port && nat->external_mac
4103 && eth_addr_from_string(nat->external_mac, &mac)) {
4104
4105 ds_clear(&match);
4106 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT
4107 " && is_chassis_resident(\"%s\")",
4108 ETH_ADDR_ARGS(mac),
4109 nat->logical_port);
4110
4111 ds_clear(&actions);
4112 ds_put_format(&actions, "outport = %s; output;",
4113 op->json_key);
4114 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP,
4115 50, ds_cstr(&match),
4116 ds_cstr(&actions));
4117 }
4118 }
4119 }
4120 } else {
4121 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
4122
4123 VLOG_INFO_RL(&rl,
4124 "%s: invalid syntax '%s' in addresses column",
4125 op->nbsp->name, op->nbsp->addresses[i]);
4126 }
4127 }
4128 }
4129
4130 /* Ingress table 16: Destination lookup for unknown MACs (priority 0). */
4131 HMAP_FOR_EACH (od, key_node, datapaths) {
4132 if (!od->nbs) {
4133 continue;
4134 }
4135
4136 if (od->has_unknown) {
4137 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1",
4138 "outport = \""MC_UNKNOWN"\"; output;");
4139 }
4140 }
4141
4142 /* Egress tables 8: Egress port security - IP (priority 0)
4143 * Egress table 9: Egress port security L2 - multicast/broadcast
4144 * (priority 100). */
4145 HMAP_FOR_EACH (od, key_node, datapaths) {
4146 if (!od->nbs) {
4147 continue;
4148 }
4149
4150 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_IP, 0, "1", "next;");
4151 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_L2, 100, "eth.mcast",
4152 "output;");
4153 }
4154
4155 /* Egress table 8: Egress port security - IP (priorities 90 and 80)
4156 * if port security enabled.
4157 *
4158 * Egress table 9: Egress port security - L2 (priorities 50 and 150).
4159 *
4160 * Priority 50 rules implement port security for enabled logical port.
4161 *
4162 * Priority 150 rules drop packets to disabled logical ports, so that they
4163 * don't even receive multicast or broadcast packets. */
4164 HMAP_FOR_EACH (op, key_node, ports) {
4165 if (!op->nbsp) {
4166 continue;
4167 }
4168
4169 ds_clear(&match);
4170 ds_put_format(&match, "outport == %s", op->json_key);
4171 if (lsp_is_enabled(op->nbsp)) {
4172 build_port_security_l2("eth.dst", op->ps_addrs, op->n_ps_addrs,
4173 &match);
4174 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 50,
4175 ds_cstr(&match), "output;");
4176 } else {
4177 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 150,
4178 ds_cstr(&match), "drop;");
4179 }
4180
4181 if (op->nbsp->n_port_security) {
4182 build_port_security_ip(P_OUT, op, lflows);
4183 }
4184 }
4185
4186 ds_destroy(&match);
4187 ds_destroy(&actions);
4188 }
4189
4190 static bool
4191 lrport_is_enabled(const struct nbrec_logical_router_port *lrport)
4192 {
4193 return !lrport->enabled || *lrport->enabled;
4194 }
4195
4196 /* Returns a string of the IP address of the router port 'op' that
4197 * overlaps with 'ip_s". If one is not found, returns NULL.
4198 *
4199 * The caller must not free the returned string. */
4200 static const char *
4201 find_lrp_member_ip(const struct ovn_port *op, const char *ip_s)
4202 {
4203 bool is_ipv4 = strchr(ip_s, '.') ? true : false;
4204
4205 if (is_ipv4) {
4206 ovs_be32 ip;
4207
4208 if (!ip_parse(ip_s, &ip)) {
4209 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4210 VLOG_WARN_RL(&rl, "bad ip address %s", ip_s);
4211 return NULL;
4212 }
4213
4214 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
4215 const struct ipv4_netaddr *na = &op->lrp_networks.ipv4_addrs[i];
4216
4217 if (!((na->network ^ ip) & na->mask)) {
4218 /* There should be only 1 interface that matches the
4219 * supplied IP. Otherwise, it's a configuration error,
4220 * because subnets of a router's interfaces should NOT
4221 * overlap. */
4222 return na->addr_s;
4223 }
4224 }
4225 } else {
4226 struct in6_addr ip6;
4227
4228 if (!ipv6_parse(ip_s, &ip6)) {
4229 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4230 VLOG_WARN_RL(&rl, "bad ipv6 address %s", ip_s);
4231 return NULL;
4232 }
4233
4234 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
4235 const struct ipv6_netaddr *na = &op->lrp_networks.ipv6_addrs[i];
4236 struct in6_addr xor_addr = ipv6_addr_bitxor(&na->network, &ip6);
4237 struct in6_addr and_addr = ipv6_addr_bitand(&xor_addr, &na->mask);
4238
4239 if (ipv6_is_zero(&and_addr)) {
4240 /* There should be only 1 interface that matches the
4241 * supplied IP. Otherwise, it's a configuration error,
4242 * because subnets of a router's interfaces should NOT
4243 * overlap. */
4244 return na->addr_s;
4245 }
4246 }
4247 }
4248
4249 return NULL;
4250 }
4251
4252 static void
4253 add_route(struct hmap *lflows, const struct ovn_port *op,
4254 const char *lrp_addr_s, const char *network_s, int plen,
4255 const char *gateway, const char *policy)
4256 {
4257 bool is_ipv4 = strchr(network_s, '.') ? true : false;
4258 struct ds match = DS_EMPTY_INITIALIZER;
4259 const char *dir;
4260 uint16_t priority;
4261
4262 if (policy && !strcmp(policy, "src-ip")) {
4263 dir = "src";
4264 priority = plen * 2;
4265 } else {
4266 dir = "dst";
4267 priority = (plen * 2) + 1;
4268 }
4269
4270 /* IPv6 link-local addresses must be scoped to the local router port. */
4271 if (!is_ipv4) {
4272 struct in6_addr network;
4273 ovs_assert(ipv6_parse(network_s, &network));
4274 if (in6_is_lla(&network)) {
4275 ds_put_format(&match, "inport == %s && ", op->json_key);
4276 }
4277 }
4278 ds_put_format(&match, "ip%s.%s == %s/%d", is_ipv4 ? "4" : "6", dir,
4279 network_s, plen);
4280
4281 struct ds actions = DS_EMPTY_INITIALIZER;
4282 ds_put_format(&actions, "ip.ttl--; %sreg0 = ", is_ipv4 ? "" : "xx");
4283
4284 if (gateway) {
4285 ds_put_cstr(&actions, gateway);
4286 } else {
4287 ds_put_format(&actions, "ip%s.dst", is_ipv4 ? "4" : "6");
4288 }
4289 ds_put_format(&actions, "; "
4290 "%sreg1 = %s; "
4291 "eth.src = %s; "
4292 "outport = %s; "
4293 "flags.loopback = 1; "
4294 "next;",
4295 is_ipv4 ? "" : "xx",
4296 lrp_addr_s,
4297 op->lrp_networks.ea_s,
4298 op->json_key);
4299
4300 /* The priority here is calculated to implement longest-prefix-match
4301 * routing. */
4302 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_ROUTING, priority,
4303 ds_cstr(&match), ds_cstr(&actions));
4304 ds_destroy(&match);
4305 ds_destroy(&actions);
4306 }
4307
4308 static void
4309 build_static_route_flow(struct hmap *lflows, struct ovn_datapath *od,
4310 struct hmap *ports,
4311 const struct nbrec_logical_router_static_route *route)
4312 {
4313 ovs_be32 nexthop;
4314 const char *lrp_addr_s = NULL;
4315 unsigned int plen;
4316 bool is_ipv4;
4317
4318 /* Verify that the next hop is an IP address with an all-ones mask. */
4319 char *error = ip_parse_cidr(route->nexthop, &nexthop, &plen);
4320 if (!error) {
4321 if (plen != 32) {
4322 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4323 VLOG_WARN_RL(&rl, "bad next hop mask %s", route->nexthop);
4324 return;
4325 }
4326 is_ipv4 = true;
4327 } else {
4328 free(error);
4329
4330 struct in6_addr ip6;
4331 error = ipv6_parse_cidr(route->nexthop, &ip6, &plen);
4332 if (!error) {
4333 if (plen != 128) {
4334 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4335 VLOG_WARN_RL(&rl, "bad next hop mask %s", route->nexthop);
4336 return;
4337 }
4338 is_ipv4 = false;
4339 } else {
4340 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4341 VLOG_WARN_RL(&rl, "bad next hop ip address %s", route->nexthop);
4342 free(error);
4343 return;
4344 }
4345 }
4346
4347 char *prefix_s;
4348 if (is_ipv4) {
4349 ovs_be32 prefix;
4350 /* Verify that ip prefix is a valid IPv4 address. */
4351 error = ip_parse_cidr(route->ip_prefix, &prefix, &plen);
4352 if (error) {
4353 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4354 VLOG_WARN_RL(&rl, "bad 'ip_prefix' in static routes %s",
4355 route->ip_prefix);
4356 free(error);
4357 return;
4358 }
4359 prefix_s = xasprintf(IP_FMT, IP_ARGS(prefix & be32_prefix_mask(plen)));
4360 } else {
4361 /* Verify that ip prefix is a valid IPv6 address. */
4362 struct in6_addr prefix;
4363 error = ipv6_parse_cidr(route->ip_prefix, &prefix, &plen);
4364 if (error) {
4365 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4366 VLOG_WARN_RL(&rl, "bad 'ip_prefix' in static routes %s",
4367 route->ip_prefix);
4368 free(error);
4369 return;
4370 }
4371 struct in6_addr mask = ipv6_create_mask(plen);
4372 struct in6_addr network = ipv6_addr_bitand(&prefix, &mask);
4373 prefix_s = xmalloc(INET6_ADDRSTRLEN);
4374 inet_ntop(AF_INET6, &network, prefix_s, INET6_ADDRSTRLEN);
4375 }
4376
4377 /* Find the outgoing port. */
4378 struct ovn_port *out_port = NULL;
4379 if (route->output_port) {
4380 out_port = ovn_port_find(ports, route->output_port);
4381 if (!out_port) {
4382 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4383 VLOG_WARN_RL(&rl, "Bad out port %s for static route %s",
4384 route->output_port, route->ip_prefix);
4385 goto free_prefix_s;
4386 }
4387 lrp_addr_s = find_lrp_member_ip(out_port, route->nexthop);
4388 if (!lrp_addr_s) {
4389 /* There are no IP networks configured on the router's port via
4390 * which 'route->nexthop' is theoretically reachable. But since
4391 * 'out_port' has been specified, we honor it by trying to reach
4392 * 'route->nexthop' via the first IP address of 'out_port'.
4393 * (There are cases, e.g in GCE, where each VM gets a /32 IP
4394 * address and the default gateway is still reachable from it.) */
4395 if (is_ipv4) {
4396 if (out_port->lrp_networks.n_ipv4_addrs) {
4397 lrp_addr_s = out_port->lrp_networks.ipv4_addrs[0].addr_s;
4398 }
4399 } else {
4400 if (out_port->lrp_networks.n_ipv6_addrs) {
4401 lrp_addr_s = out_port->lrp_networks.ipv6_addrs[0].addr_s;
4402 }
4403 }
4404 }
4405 } else {
4406 /* output_port is not specified, find the
4407 * router port matching the next hop. */
4408 int i;
4409 for (i = 0; i < od->nbr->n_ports; i++) {
4410 struct nbrec_logical_router_port *lrp = od->nbr->ports[i];
4411 out_port = ovn_port_find(ports, lrp->name);
4412 if (!out_port) {
4413 /* This should not happen. */
4414 continue;
4415 }
4416
4417 lrp_addr_s = find_lrp_member_ip(out_port, route->nexthop);
4418 if (lrp_addr_s) {
4419 break;
4420 }
4421 }
4422 }
4423
4424 if (!out_port || !lrp_addr_s) {
4425 /* There is no matched out port. */
4426 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4427 VLOG_WARN_RL(&rl, "No path for static route %s; next hop %s",
4428 route->ip_prefix, route->nexthop);
4429 goto free_prefix_s;
4430 }
4431
4432 char *policy = route->policy ? route->policy : "dst-ip";
4433 add_route(lflows, out_port, lrp_addr_s, prefix_s, plen, route->nexthop,
4434 policy);
4435
4436 free_prefix_s:
4437 free(prefix_s);
4438 }
4439
4440 static void
4441 op_put_v4_networks(struct ds *ds, const struct ovn_port *op, bool add_bcast)
4442 {
4443 if (!add_bcast && op->lrp_networks.n_ipv4_addrs == 1) {
4444 ds_put_format(ds, "%s", op->lrp_networks.ipv4_addrs[0].addr_s);
4445 return;
4446 }
4447
4448 ds_put_cstr(ds, "{");
4449 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
4450 ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].addr_s);
4451 if (add_bcast) {
4452 ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].bcast_s);
4453 }
4454 }
4455 ds_chomp(ds, ' ');
4456 ds_chomp(ds, ',');
4457 ds_put_cstr(ds, "}");
4458 }
4459
4460 static void
4461 op_put_v6_networks(struct ds *ds, const struct ovn_port *op)
4462 {
4463 if (op->lrp_networks.n_ipv6_addrs == 1) {
4464 ds_put_format(ds, "%s", op->lrp_networks.ipv6_addrs[0].addr_s);
4465 return;
4466 }
4467
4468 ds_put_cstr(ds, "{");
4469 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
4470 ds_put_format(ds, "%s, ", op->lrp_networks.ipv6_addrs[i].addr_s);
4471 }
4472 ds_chomp(ds, ' ');
4473 ds_chomp(ds, ',');
4474 ds_put_cstr(ds, "}");
4475 }
4476
4477 static const char *
4478 get_force_snat_ip(struct ovn_datapath *od, const char *key_type, ovs_be32 *ip)
4479 {
4480 char *key = xasprintf("%s_force_snat_ip", key_type);
4481 const char *ip_address = smap_get(&od->nbr->options, key);
4482 free(key);
4483
4484 if (ip_address) {
4485 ovs_be32 mask;
4486 char *error = ip_parse_masked(ip_address, ip, &mask);
4487 if (error || mask != OVS_BE32_MAX) {
4488 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4489 VLOG_WARN_RL(&rl, "bad ip %s in options of router "UUID_FMT"",
4490 ip_address, UUID_ARGS(&od->key));
4491 free(error);
4492 *ip = 0;
4493 return NULL;
4494 }
4495 return ip_address;
4496 }
4497
4498 *ip = 0;
4499 return NULL;
4500 }
4501
4502 static void
4503 add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od,
4504 struct ds *match, struct ds *actions, int priority,
4505 const char *lb_force_snat_ip, char *backend_ips,
4506 bool is_udp, int addr_family)
4507 {
4508 /* A match and actions for new connections. */
4509 char *new_match = xasprintf("ct.new && %s", ds_cstr(match));
4510 if (lb_force_snat_ip) {
4511 char *new_actions = xasprintf("flags.force_snat_for_lb = 1; %s",
4512 ds_cstr(actions));
4513 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, new_match,
4514 new_actions);
4515 free(new_actions);
4516 } else {
4517 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, new_match,
4518 ds_cstr(actions));
4519 }
4520
4521 /* A match and actions for established connections. */
4522 char *est_match = xasprintf("ct.est && %s", ds_cstr(match));
4523 if (lb_force_snat_ip) {
4524 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, est_match,
4525 "flags.force_snat_for_lb = 1; ct_dnat;");
4526 } else {
4527 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, est_match,
4528 "ct_dnat;");
4529 }
4530
4531 free(new_match);
4532 free(est_match);
4533
4534 if (!od->l3dgw_port || !od->l3redirect_port || !backend_ips
4535 || addr_family != AF_INET) {
4536 return;
4537 }
4538
4539 /* Add logical flows to UNDNAT the load balanced reverse traffic in
4540 * the router egress pipleine stage - S_ROUTER_OUT_UNDNAT if the logical
4541 * router has a gateway router port associated.
4542 */
4543 struct ds undnat_match = DS_EMPTY_INITIALIZER;
4544 ds_put_cstr(&undnat_match, "ip4 && (");
4545 char *start, *next, *ip_str;
4546 start = next = xstrdup(backend_ips);
4547 ip_str = strsep(&next, ",");
4548 bool backend_ips_found = false;
4549 while (ip_str && ip_str[0]) {
4550 char *ip_address = NULL;
4551 uint16_t port = 0;
4552 int addr_family_;
4553 ip_address_and_port_from_lb_key(ip_str, &ip_address, &port,
4554 &addr_family_);
4555 if (!ip_address) {
4556 break;
4557 }
4558
4559 ds_put_format(&undnat_match, "(ip4.src == %s", ip_address);
4560 free(ip_address);
4561 if (port) {
4562 ds_put_format(&undnat_match, " && %s.src == %d) || ",
4563 is_udp ? "udp" : "tcp", port);
4564 } else {
4565 ds_put_cstr(&undnat_match, ") || ");
4566 }
4567 ip_str = strsep(&next, ",");
4568 backend_ips_found = true;
4569 }
4570
4571 free(start);
4572 if (!backend_ips_found) {
4573 ds_destroy(&undnat_match);
4574 return;
4575 }
4576 ds_chomp(&undnat_match, ' ');
4577 ds_chomp(&undnat_match, '|');
4578 ds_chomp(&undnat_match, '|');
4579 ds_chomp(&undnat_match, ' ');
4580 ds_put_format(&undnat_match, ") && outport == %s && "
4581 "is_chassis_resident(%s)", od->l3dgw_port->json_key,
4582 od->l3redirect_port->json_key);
4583 if (lb_force_snat_ip) {
4584 ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 120,
4585 ds_cstr(&undnat_match),
4586 "flags.force_snat_for_lb = 1; ct_dnat;");
4587 } else {
4588 ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 120,
4589 ds_cstr(&undnat_match), "ct_dnat;");
4590 }
4591
4592 ds_destroy(&undnat_match);
4593 }
4594
4595 #define ND_RA_MAX_INTERVAL_MAX 1800
4596 #define ND_RA_MAX_INTERVAL_MIN 4
4597
4598 #define ND_RA_MIN_INTERVAL_MAX(max) ((max) * 3 / 4)
4599 #define ND_RA_MIN_INTERVAL_MIN 3
4600
4601 static void
4602 copy_ra_to_sb(struct ovn_port *op, const char *address_mode)
4603 {
4604 struct smap options;
4605 smap_clone(&options, &op->sb->options);
4606
4607 smap_add(&options, "ipv6_ra_send_periodic", "true");
4608 smap_add(&options, "ipv6_ra_address_mode", address_mode);
4609
4610 int max_interval = smap_get_int(&op->nbrp->ipv6_ra_configs,
4611 "max_interval", ND_RA_MAX_INTERVAL_DEFAULT);
4612 if (max_interval > ND_RA_MAX_INTERVAL_MAX) {
4613 max_interval = ND_RA_MAX_INTERVAL_MAX;
4614 }
4615 if (max_interval < ND_RA_MAX_INTERVAL_MIN) {
4616 max_interval = ND_RA_MAX_INTERVAL_MIN;
4617 }
4618 smap_add_format(&options, "ipv6_ra_max_interval", "%d", max_interval);
4619
4620 int min_interval = smap_get_int(&op->nbrp->ipv6_ra_configs,
4621 "min_interval", nd_ra_min_interval_default(max_interval));
4622 if (min_interval > ND_RA_MIN_INTERVAL_MAX(max_interval)) {
4623 min_interval = ND_RA_MIN_INTERVAL_MAX(max_interval);
4624 }
4625 if (min_interval < ND_RA_MIN_INTERVAL_MIN) {
4626 min_interval = ND_RA_MIN_INTERVAL_MIN;
4627 }
4628 smap_add_format(&options, "ipv6_ra_min_interval", "%d", min_interval);
4629
4630 int mtu = smap_get_int(&op->nbrp->ipv6_ra_configs, "mtu", ND_MTU_DEFAULT);
4631 /* RFC 2460 requires the MTU for IPv6 to be at least 1280 */
4632 if (mtu && mtu >= 1280) {
4633 smap_add_format(&options, "ipv6_ra_mtu", "%d", mtu);
4634 }
4635
4636 struct ds s = DS_EMPTY_INITIALIZER;
4637 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; ++i) {
4638 struct ipv6_netaddr *addrs = &op->lrp_networks.ipv6_addrs[i];
4639 if (in6_is_lla(&addrs->network)) {
4640 smap_add(&options, "ipv6_ra_src_addr", addrs->addr_s);
4641 continue;
4642 }
4643 ds_put_format(&s, "%s/%u ", addrs->network_s, addrs->plen);
4644 }
4645 /* Remove trailing space */
4646 ds_chomp(&s, ' ');
4647 smap_add(&options, "ipv6_ra_prefixes", ds_cstr(&s));
4648 ds_destroy(&s);
4649
4650 smap_add(&options, "ipv6_ra_src_eth", op->lrp_networks.ea_s);
4651
4652 sbrec_port_binding_set_options(op->sb, &options);
4653 smap_destroy(&options);
4654 }
4655
4656 static void
4657 build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
4658 struct hmap *lflows)
4659 {
4660 /* This flow table structure is documented in ovn-northd(8), so please
4661 * update ovn-northd.8.xml if you change anything. */
4662
4663 struct ds match = DS_EMPTY_INITIALIZER;
4664 struct ds actions = DS_EMPTY_INITIALIZER;
4665
4666 /* Logical router ingress table 0: Admission control framework. */
4667 struct ovn_datapath *od;
4668 HMAP_FOR_EACH (od, key_node, datapaths) {
4669 if (!od->nbr) {
4670 continue;
4671 }
4672
4673 /* Logical VLANs not supported.
4674 * Broadcast/multicast source address is invalid. */
4675 ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100,
4676 "vlan.present || eth.src[40]", "drop;");
4677 }
4678
4679 /* Logical router ingress table 0: match (priority 50). */
4680 struct ovn_port *op;
4681 HMAP_FOR_EACH (op, key_node, ports) {
4682 if (!op->nbrp) {
4683 continue;
4684 }
4685
4686 if (!lrport_is_enabled(op->nbrp)) {
4687 /* Drop packets from disabled logical ports (since logical flow
4688 * tables are default-drop). */
4689 continue;
4690 }
4691
4692 if (op->derived) {
4693 /* No ingress packets should be received on a chassisredirect
4694 * port. */
4695 continue;
4696 }
4697
4698 ds_clear(&match);
4699 ds_put_format(&match, "eth.mcast && inport == %s", op->json_key);
4700 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
4701 ds_cstr(&match), "next;");
4702
4703 ds_clear(&match);
4704 ds_put_format(&match, "eth.dst == %s && inport == %s",
4705 op->lrp_networks.ea_s, op->json_key);
4706 if (op->od->l3dgw_port && op == op->od->l3dgw_port
4707 && op->od->l3redirect_port) {
4708 /* Traffic with eth.dst = l3dgw_port->lrp_networks.ea_s
4709 * should only be received on the "redirect-chassis". */
4710 ds_put_format(&match, " && is_chassis_resident(%s)",
4711 op->od->l3redirect_port->json_key);
4712 }
4713 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
4714 ds_cstr(&match), "next;");
4715 }
4716
4717 /* Logical router ingress table 1: IP Input. */
4718 HMAP_FOR_EACH (od, key_node, datapaths) {
4719 if (!od->nbr) {
4720 continue;
4721 }
4722
4723 /* L3 admission control: drop multicast and broadcast source, localhost
4724 * source or destination, and zero network source or destination
4725 * (priority 100). */
4726 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100,
4727 "ip4.mcast || "
4728 "ip4.src == 255.255.255.255 || "
4729 "ip4.src == 127.0.0.0/8 || "
4730 "ip4.dst == 127.0.0.0/8 || "
4731 "ip4.src == 0.0.0.0/8 || "
4732 "ip4.dst == 0.0.0.0/8",
4733 "drop;");
4734
4735 /* ARP reply handling. Use ARP replies to populate the logical
4736 * router's ARP table. */
4737 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "arp.op == 2",
4738 "put_arp(inport, arp.spa, arp.sha);");
4739
4740 /* Drop Ethernet local broadcast. By definition this traffic should
4741 * not be forwarded.*/
4742 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
4743 "eth.bcast", "drop;");
4744
4745 /* TTL discard.
4746 *
4747 * XXX Need to send ICMP time exceeded if !ip.later_frag. */
4748 ds_clear(&match);
4749 ds_put_cstr(&match, "ip4 && ip.ttl == {0, 1}");
4750 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30,
4751 ds_cstr(&match), "drop;");
4752
4753 /* ND advertisement handling. Use advertisements to populate
4754 * the logical router's ARP/ND table. */
4755 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "nd_na",
4756 "put_nd(inport, nd.target, nd.tll);");
4757
4758 /* Lean from neighbor solicitations that were not directed at
4759 * us. (A priority-90 flow will respond to requests to us and
4760 * learn the sender's mac address. */
4761 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 80, "nd_ns",
4762 "put_nd(inport, ip6.src, nd.sll);");
4763
4764 /* Pass other traffic not already handled to the next table for
4765 * routing. */
4766 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;");
4767 }
4768
4769 /* Logical router ingress table 1: IP Input for IPv4. */
4770 HMAP_FOR_EACH (op, key_node, ports) {
4771 if (!op->nbrp) {
4772 continue;
4773 }
4774
4775 if (op->derived) {
4776 /* No ingress packets are accepted on a chassisredirect
4777 * port, so no need to program flows for that port. */
4778 continue;
4779 }
4780
4781 if (op->lrp_networks.n_ipv4_addrs) {
4782 /* L3 admission control: drop packets that originate from an
4783 * IPv4 address owned by the router or a broadcast address
4784 * known to the router (priority 100). */
4785 ds_clear(&match);
4786 ds_put_cstr(&match, "ip4.src == ");
4787 op_put_v4_networks(&match, op, true);
4788 ds_put_cstr(&match, " && "REGBIT_EGRESS_LOOPBACK" == 0");
4789 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
4790 ds_cstr(&match), "drop;");
4791
4792 /* ICMP echo reply. These flows reply to ICMP echo requests
4793 * received for the router's IP address. Since packets only
4794 * get here as part of the logical router datapath, the inport
4795 * (i.e. the incoming locally attached net) does not matter.
4796 * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */
4797 ds_clear(&match);
4798 ds_put_cstr(&match, "ip4.dst == ");
4799 op_put_v4_networks(&match, op, false);
4800 ds_put_cstr(&match, " && icmp4.type == 8 && icmp4.code == 0");
4801
4802 ds_clear(&actions);
4803 ds_put_format(&actions,
4804 "ip4.dst <-> ip4.src; "
4805 "ip.ttl = 255; "
4806 "icmp4.type = 0; "
4807 "flags.loopback = 1; "
4808 "next; ");
4809 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
4810 ds_cstr(&match), ds_cstr(&actions));
4811 }
4812
4813 /* ARP reply. These flows reply to ARP requests for the router's own
4814 * IP address. */
4815 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
4816 ds_clear(&match);
4817 ds_put_format(&match,
4818 "inport == %s && arp.tpa == %s && arp.op == 1",
4819 op->json_key, op->lrp_networks.ipv4_addrs[i].addr_s);
4820 if (op->od->l3dgw_port && op == op->od->l3dgw_port
4821 && op->od->l3redirect_port) {
4822 /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
4823 * should only be sent from the "redirect-chassis", so that
4824 * upstream MAC learning points to the "redirect-chassis".
4825 * Also need to avoid generation of multiple ARP responses
4826 * from different chassis. */
4827 ds_put_format(&match, " && is_chassis_resident(%s)",
4828 op->od->l3redirect_port->json_key);
4829 }
4830
4831 ds_clear(&actions);
4832 ds_put_format(&actions,
4833 "eth.dst = eth.src; "
4834 "eth.src = %s; "
4835 "arp.op = 2; /* ARP reply */ "
4836 "arp.tha = arp.sha; "
4837 "arp.sha = %s; "
4838 "arp.tpa = arp.spa; "
4839 "arp.spa = %s; "
4840 "outport = %s; "
4841 "flags.loopback = 1; "
4842 "output;",
4843 op->lrp_networks.ea_s,
4844 op->lrp_networks.ea_s,
4845 op->lrp_networks.ipv4_addrs[i].addr_s,
4846 op->json_key);
4847 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
4848 ds_cstr(&match), ds_cstr(&actions));
4849 }
4850
4851 /* A set to hold all load-balancer vips that need ARP responses. */
4852 struct sset all_ips = SSET_INITIALIZER(&all_ips);
4853 int addr_family;
4854 get_router_load_balancer_ips(op->od, &all_ips, &addr_family);
4855
4856 const char *ip_address;
4857 SSET_FOR_EACH(ip_address, &all_ips) {
4858 ds_clear(&match);
4859 if (addr_family == AF_INET) {
4860 ds_put_format(&match,
4861 "inport == %s && arp.tpa == %s && arp.op == 1",
4862 op->json_key, ip_address);
4863 } else {
4864 ds_put_format(&match,
4865 "inport == %s && nd_ns && nd.target == %s",
4866 op->json_key, ip_address);
4867 }
4868
4869 ds_clear(&actions);
4870 if (addr_family == AF_INET) {
4871 ds_put_format(&actions,
4872 "eth.dst = eth.src; "
4873 "eth.src = %s; "
4874 "arp.op = 2; /* ARP reply */ "
4875 "arp.tha = arp.sha; "
4876 "arp.sha = %s; "
4877 "arp.tpa = arp.spa; "
4878 "arp.spa = %s; "
4879 "outport = %s; "
4880 "flags.loopback = 1; "
4881 "output;",
4882 op->lrp_networks.ea_s,
4883 op->lrp_networks.ea_s,
4884 ip_address,
4885 op->json_key);
4886 } else {
4887 ds_put_format(&actions,
4888 "nd_na { "
4889 "eth.src = %s; "
4890 "ip6.src = %s; "
4891 "nd.target = %s; "
4892 "nd.tll = %s; "
4893 "outport = inport; "
4894 "flags.loopback = 1; "
4895 "output; "
4896 "};",
4897 op->lrp_networks.ea_s,
4898 ip_address,
4899 ip_address,
4900 op->lrp_networks.ea_s);
4901 }
4902 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
4903 ds_cstr(&match), ds_cstr(&actions));
4904 }
4905
4906 sset_destroy(&all_ips);
4907
4908 /* A gateway router can have 2 SNAT IP addresses to force DNATed and
4909 * LBed traffic respectively to be SNATed. In addition, there can be
4910 * a number of SNAT rules in the NAT table. */
4911 ovs_be32 *snat_ips = xmalloc(sizeof *snat_ips *
4912 (op->od->nbr->n_nat + 2));
4913 size_t n_snat_ips = 0;
4914
4915 ovs_be32 snat_ip;
4916 const char *dnat_force_snat_ip = get_force_snat_ip(op->od, "dnat",
4917 &snat_ip);
4918 if (dnat_force_snat_ip) {
4919 snat_ips[n_snat_ips++] = snat_ip;
4920 }
4921
4922 const char *lb_force_snat_ip = get_force_snat_ip(op->od, "lb",
4923 &snat_ip);
4924 if (lb_force_snat_ip) {
4925 snat_ips[n_snat_ips++] = snat_ip;
4926 }
4927
4928 for (int i = 0; i < op->od->nbr->n_nat; i++) {
4929 const struct nbrec_nat *nat;
4930
4931 nat = op->od->nbr->nat[i];
4932
4933 ovs_be32 ip;
4934 if (!ip_parse(nat->external_ip, &ip) || !ip) {
4935 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4936 VLOG_WARN_RL(&rl, "bad ip address %s in nat configuration "
4937 "for router %s", nat->external_ip, op->key);
4938 continue;
4939 }
4940
4941 if (!strcmp(nat->type, "snat")) {
4942 snat_ips[n_snat_ips++] = ip;
4943 continue;
4944 }
4945
4946 /* ARP handling for external IP addresses.
4947 *
4948 * DNAT IP addresses are external IP addresses that need ARP
4949 * handling. */
4950 ds_clear(&match);
4951 ds_put_format(&match,
4952 "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1",
4953 op->json_key, IP_ARGS(ip));
4954
4955 ds_clear(&actions);
4956 ds_put_format(&actions,
4957 "eth.dst = eth.src; "
4958 "arp.op = 2; /* ARP reply */ "
4959 "arp.tha = arp.sha; ");
4960
4961 if (op->od->l3dgw_port && op == op->od->l3dgw_port) {
4962 struct eth_addr mac;
4963 if (nat->external_mac &&
4964 eth_addr_from_string(nat->external_mac, &mac)
4965 && nat->logical_port) {
4966 /* distributed NAT case, use nat->external_mac */
4967 ds_put_format(&actions,
4968 "eth.src = "ETH_ADDR_FMT"; "
4969 "arp.sha = "ETH_ADDR_FMT"; ",
4970 ETH_ADDR_ARGS(mac),
4971 ETH_ADDR_ARGS(mac));
4972 /* Traffic with eth.src = nat->external_mac should only be
4973 * sent from the chassis where nat->logical_port is
4974 * resident, so that upstream MAC learning points to the
4975 * correct chassis. Also need to avoid generation of
4976 * multiple ARP responses from different chassis. */
4977 ds_put_format(&match, " && is_chassis_resident(\"%s\")",
4978 nat->logical_port);
4979 } else {
4980 ds_put_format(&actions,
4981 "eth.src = %s; "
4982 "arp.sha = %s; ",
4983 op->lrp_networks.ea_s,
4984 op->lrp_networks.ea_s);
4985 /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
4986 * should only be sent from the "redirect-chassis", so that
4987 * upstream MAC learning points to the "redirect-chassis".
4988 * Also need to avoid generation of multiple ARP responses
4989 * from different chassis. */
4990 if (op->od->l3redirect_port) {
4991 ds_put_format(&match, " && is_chassis_resident(%s)",
4992 op->od->l3redirect_port->json_key);
4993 }
4994 }
4995 } else {
4996 ds_put_format(&actions,
4997 "eth.src = %s; "
4998 "arp.sha = %s; ",
4999 op->lrp_networks.ea_s,
5000 op->lrp_networks.ea_s);
5001 }
5002 ds_put_format(&actions,
5003 "arp.tpa = arp.spa; "
5004 "arp.spa = "IP_FMT"; "
5005 "outport = %s; "
5006 "flags.loopback = 1; "
5007 "output;",
5008 IP_ARGS(ip),
5009 op->json_key);
5010 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
5011 ds_cstr(&match), ds_cstr(&actions));
5012 }
5013
5014 ds_clear(&match);
5015 ds_put_cstr(&match, "ip4.dst == {");
5016 bool has_drop_ips = false;
5017 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
5018 bool snat_ip_is_router_ip = false;
5019 for (int j = 0; j < n_snat_ips; j++) {
5020 /* Packets to SNAT IPs should not be dropped. */
5021 if (op->lrp_networks.ipv4_addrs[i].addr == snat_ips[j]) {
5022 snat_ip_is_router_ip = true;
5023 break;
5024 }
5025 }
5026 if (snat_ip_is_router_ip) {
5027 continue;
5028 }
5029 ds_put_format(&match, "%s, ",
5030 op->lrp_networks.ipv4_addrs[i].addr_s);
5031 has_drop_ips = true;
5032 }
5033 ds_chomp(&match, ' ');
5034 ds_chomp(&match, ',');
5035 ds_put_cstr(&match, "}");
5036
5037 if (has_drop_ips) {
5038 /* Drop IP traffic to this router. */
5039 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
5040 ds_cstr(&match), "drop;");
5041 }
5042
5043 free(snat_ips);
5044 }
5045
5046 /* Logical router ingress table 1: IP Input for IPv6. */
5047 HMAP_FOR_EACH (op, key_node, ports) {
5048 if (!op->nbrp) {
5049 continue;
5050 }
5051
5052 if (op->derived) {
5053 /* No ingress packets are accepted on a chassisredirect
5054 * port, so no need to program flows for that port. */
5055 continue;
5056 }
5057
5058 if (op->lrp_networks.n_ipv6_addrs) {
5059 /* L3 admission control: drop packets that originate from an
5060 * IPv6 address owned by the router (priority 100). */
5061 ds_clear(&match);
5062 ds_put_cstr(&match, "ip6.src == ");
5063 op_put_v6_networks(&match, op);
5064 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
5065 ds_cstr(&match), "drop;");
5066
5067 /* ICMPv6 echo reply. These flows reply to echo requests
5068 * received for the router's IP address. */
5069 ds_clear(&match);
5070 ds_put_cstr(&match, "ip6.dst == ");
5071 op_put_v6_networks(&match, op);
5072 ds_put_cstr(&match, " && icmp6.type == 128 && icmp6.code == 0");
5073
5074 ds_clear(&actions);
5075 ds_put_cstr(&actions,
5076 "ip6.dst <-> ip6.src; "
5077 "ip.ttl = 255; "
5078 "icmp6.type = 129; "
5079 "flags.loopback = 1; "
5080 "next; ");
5081 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
5082 ds_cstr(&match), ds_cstr(&actions));
5083
5084 /* Drop IPv6 traffic to this router. */
5085 ds_clear(&match);
5086 ds_put_cstr(&match, "ip6.dst == ");
5087 op_put_v6_networks(&match, op);
5088 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
5089 ds_cstr(&match), "drop;");
5090 }
5091
5092 /* ND reply. These flows reply to ND solicitations for the
5093 * router's own IP address. */
5094 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
5095 ds_clear(&match);
5096 ds_put_format(&match,
5097 "inport == %s && nd_ns && ip6.dst == {%s, %s} "
5098 "&& nd.target == %s",
5099 op->json_key,
5100 op->lrp_networks.ipv6_addrs[i].addr_s,
5101 op->lrp_networks.ipv6_addrs[i].sn_addr_s,
5102 op->lrp_networks.ipv6_addrs[i].addr_s);
5103 if (op->od->l3dgw_port && op == op->od->l3dgw_port
5104 && op->od->l3redirect_port) {
5105 /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
5106 * should only be sent from the "redirect-chassis", so that
5107 * upstream MAC learning points to the "redirect-chassis".
5108 * Also need to avoid generation of multiple ND replies
5109 * from different chassis. */
5110 ds_put_format(&match, " && is_chassis_resident(%s)",
5111 op->od->l3redirect_port->json_key);
5112 }
5113
5114 ds_clear(&actions);
5115 ds_put_format(&actions,
5116 "put_nd(inport, ip6.src, nd.sll); "
5117 "nd_na { "
5118 "eth.src = %s; "
5119 "ip6.src = %s; "
5120 "nd.target = %s; "
5121 "nd.tll = %s; "
5122 "outport = inport; "
5123 "flags.loopback = 1; "
5124 "output; "
5125 "};",
5126 op->lrp_networks.ea_s,
5127 op->lrp_networks.ipv6_addrs[i].addr_s,
5128 op->lrp_networks.ipv6_addrs[i].addr_s,
5129 op->lrp_networks.ea_s);
5130 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
5131 ds_cstr(&match), ds_cstr(&actions));
5132 }
5133 }
5134
5135 /* NAT, Defrag and load balancing. */
5136 HMAP_FOR_EACH (od, key_node, datapaths) {
5137 if (!od->nbr) {
5138 continue;
5139 }
5140
5141 /* Packets are allowed by default. */
5142 ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG, 0, "1", "next;");
5143 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;");
5144 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;");
5145 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;");
5146 ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 0, "1", "next;");
5147 ovn_lflow_add(lflows, od, S_ROUTER_OUT_EGR_LOOP, 0, "1", "next;");
5148
5149 /* NAT rules are only valid on Gateway routers and routers with
5150 * l3dgw_port (router has a port with "redirect-chassis"
5151 * specified). */
5152 if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) {
5153 continue;
5154 }
5155
5156 ovs_be32 snat_ip;
5157 const char *dnat_force_snat_ip = get_force_snat_ip(od, "dnat",
5158 &snat_ip);
5159 const char *lb_force_snat_ip = get_force_snat_ip(od, "lb",
5160 &snat_ip);
5161
5162 for (int i = 0; i < od->nbr->n_nat; i++) {
5163 const struct nbrec_nat *nat;
5164
5165 nat = od->nbr->nat[i];
5166
5167 ovs_be32 ip, mask;
5168
5169 char *error = ip_parse_masked(nat->external_ip, &ip, &mask);
5170 if (error || mask != OVS_BE32_MAX) {
5171 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
5172 VLOG_WARN_RL(&rl, "bad external ip %s for nat",
5173 nat->external_ip);
5174 free(error);
5175 continue;
5176 }
5177
5178 /* Check the validity of nat->logical_ip. 'logical_ip' can
5179 * be a subnet when the type is "snat". */
5180 error = ip_parse_masked(nat->logical_ip, &ip, &mask);
5181 if (!strcmp(nat->type, "snat")) {
5182 if (error) {
5183 static struct vlog_rate_limit rl =
5184 VLOG_RATE_LIMIT_INIT(5, 1);
5185 VLOG_WARN_RL(&rl, "bad ip network or ip %s for snat "
5186 "in router "UUID_FMT"",
5187 nat->logical_ip, UUID_ARGS(&od->key));
5188 free(error);
5189 continue;
5190 }
5191 } else {
5192 if (error || mask != OVS_BE32_MAX) {
5193 static struct vlog_rate_limit rl =
5194 VLOG_RATE_LIMIT_INIT(5, 1);
5195 VLOG_WARN_RL(&rl, "bad ip %s for dnat in router "
5196 ""UUID_FMT"", nat->logical_ip, UUID_ARGS(&od->key));
5197 free(error);
5198 continue;
5199 }
5200 }
5201
5202 /* For distributed router NAT, determine whether this NAT rule
5203 * satisfies the conditions for distributed NAT processing. */
5204 bool distributed = false;
5205 struct eth_addr mac;
5206 if (od->l3dgw_port && !strcmp(nat->type, "dnat_and_snat") &&
5207 nat->logical_port && nat->external_mac) {
5208 if (eth_addr_from_string(nat->external_mac, &mac)) {
5209 distributed = true;
5210 } else {
5211 static struct vlog_rate_limit rl =
5212 VLOG_RATE_LIMIT_INIT(5, 1);
5213 VLOG_WARN_RL(&rl, "bad mac %s for dnat in router "
5214 ""UUID_FMT"", nat->external_mac, UUID_ARGS(&od->key));
5215 continue;
5216 }
5217 }
5218
5219 /* Ingress UNSNAT table: It is for already established connections'
5220 * reverse traffic. i.e., SNAT has already been done in egress
5221 * pipeline and now the packet has entered the ingress pipeline as
5222 * part of a reply. We undo the SNAT here.
5223 *
5224 * Undoing SNAT has to happen before DNAT processing. This is
5225 * because when the packet was DNATed in ingress pipeline, it did
5226 * not know about the possibility of eventual additional SNAT in
5227 * egress pipeline. */
5228 if (!strcmp(nat->type, "snat")
5229 || !strcmp(nat->type, "dnat_and_snat")) {
5230 if (!od->l3dgw_port) {
5231 /* Gateway router. */
5232 ds_clear(&match);
5233 ds_put_format(&match, "ip && ip4.dst == %s",
5234 nat->external_ip);
5235 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 90,
5236 ds_cstr(&match), "ct_snat;");
5237 } else {
5238 /* Distributed router. */
5239
5240 /* Traffic received on l3dgw_port is subject to NAT. */
5241 ds_clear(&match);
5242 ds_put_format(&match, "ip && ip4.dst == %s"
5243 " && inport == %s",
5244 nat->external_ip,
5245 od->l3dgw_port->json_key);
5246 if (!distributed && od->l3redirect_port) {
5247 /* Flows for NAT rules that are centralized are only
5248 * programmed on the "redirect-chassis". */
5249 ds_put_format(&match, " && is_chassis_resident(%s)",
5250 od->l3redirect_port->json_key);
5251 }
5252 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 100,
5253 ds_cstr(&match), "ct_snat;");
5254
5255 /* Traffic received on other router ports must be
5256 * redirected to the central instance of the l3dgw_port
5257 * for NAT processing. */
5258 ds_clear(&match);
5259 ds_put_format(&match, "ip && ip4.dst == %s",
5260 nat->external_ip);
5261 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 50,
5262 ds_cstr(&match),
5263 REGBIT_NAT_REDIRECT" = 1; next;");
5264 }
5265 }
5266
5267 /* Ingress DNAT table: Packets enter the pipeline with destination
5268 * IP address that needs to be DNATted from a external IP address
5269 * to a logical IP address. */
5270 if (!strcmp(nat->type, "dnat")
5271 || !strcmp(nat->type, "dnat_and_snat")) {
5272 if (!od->l3dgw_port) {
5273 /* Gateway router. */
5274 /* Packet when it goes from the initiator to destination.
5275 * We need to set flags.loopback because the router can
5276 * send the packet back through the same interface. */
5277 ds_clear(&match);
5278 ds_put_format(&match, "ip && ip4.dst == %s",
5279 nat->external_ip);
5280 ds_clear(&actions);
5281 if (dnat_force_snat_ip) {
5282 /* Indicate to the future tables that a DNAT has taken
5283 * place and a force SNAT needs to be done in the
5284 * Egress SNAT table. */
5285 ds_put_format(&actions,
5286 "flags.force_snat_for_dnat = 1; ");
5287 }
5288 ds_put_format(&actions, "flags.loopback = 1; ct_dnat(%s);",
5289 nat->logical_ip);
5290 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100,
5291 ds_cstr(&match), ds_cstr(&actions));
5292 } else {
5293 /* Distributed router. */
5294
5295 /* Traffic received on l3dgw_port is subject to NAT. */
5296 ds_clear(&match);
5297 ds_put_format(&match, "ip && ip4.dst == %s"
5298 " && inport == %s",
5299 nat->external_ip,
5300 od->l3dgw_port->json_key);
5301 if (!distributed && od->l3redirect_port) {
5302 /* Flows for NAT rules that are centralized are only
5303 * programmed on the "redirect-chassis". */
5304 ds_put_format(&match, " && is_chassis_resident(%s)",
5305 od->l3redirect_port->json_key);
5306 }
5307 ds_clear(&actions);
5308 ds_put_format(&actions, "ct_dnat(%s);",
5309 nat->logical_ip);
5310 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100,
5311 ds_cstr(&match), ds_cstr(&actions));
5312
5313 /* Traffic received on other router ports must be
5314 * redirected to the central instance of the l3dgw_port
5315 * for NAT processing. */
5316 ds_clear(&match);
5317 ds_put_format(&match, "ip && ip4.dst == %s",
5318 nat->external_ip);
5319 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50,
5320 ds_cstr(&match),
5321 REGBIT_NAT_REDIRECT" = 1; next;");
5322 }
5323 }
5324
5325 /* Egress UNDNAT table: It is for already established connections'
5326 * reverse traffic. i.e., DNAT has already been done in ingress
5327 * pipeline and now the packet has entered the egress pipeline as
5328 * part of a reply. We undo the DNAT here.
5329 *
5330 * Note that this only applies for NAT on a distributed router.
5331 * Undo DNAT on a gateway router is done in the ingress DNAT
5332 * pipeline stage. */
5333 if (od->l3dgw_port && (!strcmp(nat->type, "dnat")
5334 || !strcmp(nat->type, "dnat_and_snat"))) {
5335 ds_clear(&match);
5336 ds_put_format(&match, "ip && ip4.src == %s"
5337 " && outport == %s",
5338 nat->logical_ip,
5339 od->l3dgw_port->json_key);
5340 if (!distributed && od->l3redirect_port) {
5341 /* Flows for NAT rules that are centralized are only
5342 * programmed on the "redirect-chassis". */
5343 ds_put_format(&match, " && is_chassis_resident(%s)",
5344 od->l3redirect_port->json_key);
5345 }
5346 ds_clear(&actions);
5347 if (distributed) {
5348 ds_put_format(&actions, "eth.src = "ETH_ADDR_FMT"; ",
5349 ETH_ADDR_ARGS(mac));
5350 }
5351 ds_put_format(&actions, "ct_dnat;");
5352 ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 100,
5353 ds_cstr(&match), ds_cstr(&actions));
5354 }
5355
5356 /* Egress SNAT table: Packets enter the egress pipeline with
5357 * source ip address that needs to be SNATted to a external ip
5358 * address. */
5359 if (!strcmp(nat->type, "snat")
5360 || !strcmp(nat->type, "dnat_and_snat")) {
5361 if (!od->l3dgw_port) {
5362 /* Gateway router. */
5363 ds_clear(&match);
5364 ds_put_format(&match, "ip && ip4.src == %s",
5365 nat->logical_ip);
5366 ds_clear(&actions);
5367 ds_put_format(&actions, "ct_snat(%s);", nat->external_ip);
5368
5369 /* The priority here is calculated such that the
5370 * nat->logical_ip with the longest mask gets a higher
5371 * priority. */
5372 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT,
5373 count_1bits(ntohl(mask)) + 1,
5374 ds_cstr(&match), ds_cstr(&actions));
5375 } else {
5376 /* Distributed router. */
5377 ds_clear(&match);
5378 ds_put_format(&match, "ip && ip4.src == %s"
5379 " && outport == %s",
5380 nat->logical_ip,
5381 od->l3dgw_port->json_key);
5382 if (!distributed && od->l3redirect_port) {
5383 /* Flows for NAT rules that are centralized are only
5384 * programmed on the "redirect-chassis". */
5385 ds_put_format(&match, " && is_chassis_resident(%s)",
5386 od->l3redirect_port->json_key);
5387 }
5388 ds_clear(&actions);
5389 if (distributed) {
5390 ds_put_format(&actions, "eth.src = "ETH_ADDR_FMT"; ",
5391 ETH_ADDR_ARGS(mac));
5392 }
5393 ds_put_format(&actions, "ct_snat(%s);", nat->external_ip);
5394
5395 /* The priority here is calculated such that the
5396 * nat->logical_ip with the longest mask gets a higher
5397 * priority. */
5398 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT,
5399 count_1bits(ntohl(mask)) + 1,
5400 ds_cstr(&match), ds_cstr(&actions));
5401 }
5402 }
5403
5404 /* Logical router ingress table 0:
5405 * For NAT on a distributed router, add rules allowing
5406 * ingress traffic with eth.dst matching nat->external_mac
5407 * on the l3dgw_port instance where nat->logical_port is
5408 * resident. */
5409 if (distributed) {
5410 ds_clear(&match);
5411 ds_put_format(&match,
5412 "eth.dst == "ETH_ADDR_FMT" && inport == %s"
5413 " && is_chassis_resident(\"%s\")",
5414 ETH_ADDR_ARGS(mac),
5415 od->l3dgw_port->json_key,
5416 nat->logical_port);
5417 ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 50,
5418 ds_cstr(&match), "next;");
5419 }
5420
5421 /* Ingress Gateway Redirect Table: For NAT on a distributed
5422 * router, add flows that are specific to a NAT rule. These
5423 * flows indicate the presence of an applicable NAT rule that
5424 * can be applied in a distributed manner. */
5425 if (distributed) {
5426 ds_clear(&match);
5427 ds_put_format(&match, "ip4.src == %s && outport == %s",
5428 nat->logical_ip,
5429 od->l3dgw_port->json_key);
5430 ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 100,
5431 ds_cstr(&match), "next;");
5432 }
5433
5434 /* Egress Loopback table: For NAT on a distributed router.
5435 * If packets in the egress pipeline on the distributed
5436 * gateway port have ip.dst matching a NAT external IP, then
5437 * loop a clone of the packet back to the beginning of the
5438 * ingress pipeline with inport = outport. */
5439 if (od->l3dgw_port) {
5440 /* Distributed router. */
5441 ds_clear(&match);
5442 ds_put_format(&match, "ip4.dst == %s && outport == %s",
5443 nat->external_ip,
5444 od->l3dgw_port->json_key);
5445 ds_clear(&actions);
5446 ds_put_format(&actions,
5447 "clone { ct_clear; "
5448 "inport = outport; outport = \"\"; "
5449 "flags = 0; flags.loopback = 1; ");
5450 for (int j = 0; j < MFF_N_LOG_REGS; j++) {
5451 ds_put_format(&actions, "reg%d = 0; ", j);
5452 }
5453 ds_put_format(&actions, REGBIT_EGRESS_LOOPBACK" = 1; "
5454 "next(pipeline=ingress, table=0); };");
5455 ovn_lflow_add(lflows, od, S_ROUTER_OUT_EGR_LOOP, 100,
5456 ds_cstr(&match), ds_cstr(&actions));
5457 }
5458 }
5459
5460 /* Handle force SNAT options set in the gateway router. */
5461 if (dnat_force_snat_ip && !od->l3dgw_port) {
5462 /* If a packet with destination IP address as that of the
5463 * gateway router (as set in options:dnat_force_snat_ip) is seen,
5464 * UNSNAT it. */
5465 ds_clear(&match);
5466 ds_put_format(&match, "ip && ip4.dst == %s", dnat_force_snat_ip);
5467 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 110,
5468 ds_cstr(&match), "ct_snat;");
5469
5470 /* Higher priority rules to force SNAT with the IP addresses
5471 * configured in the Gateway router. This only takes effect
5472 * when the packet has already been DNATed once. */
5473 ds_clear(&match);
5474 ds_put_format(&match, "flags.force_snat_for_dnat == 1 && ip");
5475 ds_clear(&actions);
5476 ds_put_format(&actions, "ct_snat(%s);", dnat_force_snat_ip);
5477 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 100,
5478 ds_cstr(&match), ds_cstr(&actions));
5479 }
5480 if (lb_force_snat_ip && !od->l3dgw_port) {
5481 /* If a packet with destination IP address as that of the
5482 * gateway router (as set in options:lb_force_snat_ip) is seen,
5483 * UNSNAT it. */
5484 ds_clear(&match);
5485 ds_put_format(&match, "ip && ip4.dst == %s", lb_force_snat_ip);
5486 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 100,
5487 ds_cstr(&match), "ct_snat;");
5488
5489 /* Load balanced traffic will have flags.force_snat_for_lb set.
5490 * Force SNAT it. */
5491 ds_clear(&match);
5492 ds_put_format(&match, "flags.force_snat_for_lb == 1 && ip");
5493 ds_clear(&actions);
5494 ds_put_format(&actions, "ct_snat(%s);", lb_force_snat_ip);
5495 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 100,
5496 ds_cstr(&match), ds_cstr(&actions));
5497 }
5498
5499 if (!od->l3dgw_port) {
5500 /* For gateway router, re-circulate every packet through
5501 * the DNAT zone. This helps with the following.
5502 *
5503 * Any packet that needs to be unDNATed in the reverse
5504 * direction gets unDNATed. Ideally this could be done in
5505 * the egress pipeline. But since the gateway router
5506 * does not have any feature that depends on the source
5507 * ip address being external IP address for IP routing,
5508 * we can do it here, saving a future re-circulation. */
5509 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50,
5510 "ip", "flags.loopback = 1; ct_dnat;");
5511 } else {
5512 /* For NAT on a distributed router, add flows to Ingress
5513 * IP Routing table, Ingress ARP Resolution table, and
5514 * Ingress Gateway Redirect Table that are not specific to a
5515 * NAT rule. */
5516
5517 /* The highest priority IN_IP_ROUTING rule matches packets
5518 * with REGBIT_NAT_REDIRECT (set in DNAT or UNSNAT stages),
5519 * with action "ip.ttl--; next;". The IN_GW_REDIRECT table
5520 * will take care of setting the outport. */
5521 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 300,
5522 REGBIT_NAT_REDIRECT" == 1", "ip.ttl--; next;");
5523
5524 /* The highest priority IN_ARP_RESOLVE rule matches packets
5525 * with REGBIT_NAT_REDIRECT (set in DNAT or UNSNAT stages),
5526 * then sets eth.dst to the distributed gateway port's
5527 * ethernet address. */
5528 ds_clear(&actions);
5529 ds_put_format(&actions, "eth.dst = %s; next;",
5530 od->l3dgw_port->lrp_networks.ea_s);
5531 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 200,
5532 REGBIT_NAT_REDIRECT" == 1", ds_cstr(&actions));
5533
5534 /* The highest priority IN_GW_REDIRECT rule redirects packets
5535 * with REGBIT_NAT_REDIRECT (set in DNAT or UNSNAT stages) to
5536 * the central instance of the l3dgw_port for NAT processing. */
5537 ds_clear(&actions);
5538 ds_put_format(&actions, "outport = %s; next;",
5539 od->l3redirect_port->json_key);
5540 ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 200,
5541 REGBIT_NAT_REDIRECT" == 1", ds_cstr(&actions));
5542 }
5543
5544 /* Load balancing and packet defrag are only valid on
5545 * Gateway routers or router with gateway port. */
5546 if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) {
5547 continue;
5548 }
5549
5550 /* A set to hold all ips that need defragmentation and tracking. */
5551 struct sset all_ips = SSET_INITIALIZER(&all_ips);
5552
5553 for (int i = 0; i < od->nbr->n_load_balancer; i++) {
5554 struct nbrec_load_balancer *lb = od->nbr->load_balancer[i];
5555 struct smap *vips = &lb->vips;
5556 struct smap_node *node;
5557
5558 SMAP_FOR_EACH (node, vips) {
5559 uint16_t port = 0;
5560 int addr_family;
5561
5562 /* node->key contains IP:port or just IP. */
5563 char *ip_address = NULL;
5564 ip_address_and_port_from_lb_key(node->key, &ip_address, &port,
5565 &addr_family);
5566 if (!ip_address) {
5567 continue;
5568 }
5569
5570 if (!sset_contains(&all_ips, ip_address)) {
5571 sset_add(&all_ips, ip_address);
5572 /* If there are any load balancing rules, we should send
5573 * the packet to conntrack for defragmentation and
5574 * tracking. This helps with two things.
5575 *
5576 * 1. With tracking, we can send only new connections to
5577 * pick a DNAT ip address from a group.
5578 * 2. If there are L4 ports in load balancing rules, we
5579 * need the defragmentation to match on L4 ports. */
5580 ds_clear(&match);
5581 if (addr_family == AF_INET) {
5582 ds_put_format(&match, "ip && ip4.dst == %s",
5583 ip_address);
5584 } else {
5585 ds_put_format(&match, "ip && ip6.dst == %s",
5586 ip_address);
5587 }
5588 ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG,
5589 100, ds_cstr(&match), "ct_next;");
5590 }
5591
5592 /* Higher priority rules are added for load-balancing in DNAT
5593 * table. For every match (on a VIP[:port]), we add two flows
5594 * via add_router_lb_flow(). One flow is for specific matching
5595 * on ct.new with an action of "ct_lb($targets);". The other
5596 * flow is for ct.est with an action of "ct_dnat;". */
5597 ds_clear(&actions);
5598 ds_put_format(&actions, "ct_lb(%s);", node->value);
5599
5600 ds_clear(&match);
5601 if (addr_family == AF_INET) {
5602 ds_put_format(&match, "ip && ip4.dst == %s",
5603 ip_address);
5604 } else {
5605 ds_put_format(&match, "ip && ip6.dst == %s",
5606 ip_address);
5607 }
5608 free(ip_address);
5609
5610 int prio = 110;
5611 bool is_udp = lb->protocol && !strcmp(lb->protocol, "udp") ?
5612 true : false;
5613 if (port) {
5614 if (is_udp) {
5615 ds_put_format(&match, " && udp && udp.dst == %d",
5616 port);
5617 } else {
5618 ds_put_format(&match, " && tcp && tcp.dst == %d",
5619 port);
5620 }
5621 prio = 120;
5622 }
5623
5624 if (od->l3redirect_port) {
5625 ds_put_format(&match, " && is_chassis_resident(%s)",
5626 od->l3redirect_port->json_key);
5627 }
5628 add_router_lb_flow(lflows, od, &match, &actions, prio,
5629 lb_force_snat_ip, node->value, is_udp,
5630 addr_family);
5631 }
5632 }
5633 sset_destroy(&all_ips);
5634 }
5635
5636 /* Logical router ingress table 5 and 6: IPv6 Router Adv (RA) options and
5637 * response. */
5638 HMAP_FOR_EACH (op, key_node, ports) {
5639 if (!op->nbrp || op->nbrp->peer || !op->peer) {
5640 continue;
5641 }
5642
5643 if (!op->lrp_networks.n_ipv6_addrs) {
5644 continue;
5645 }
5646
5647 const char *address_mode = smap_get(
5648 &op->nbrp->ipv6_ra_configs, "address_mode");
5649
5650 if (!address_mode) {
5651 continue;
5652 }
5653 if (strcmp(address_mode, "slaac") &&
5654 strcmp(address_mode, "dhcpv6_stateful") &&
5655 strcmp(address_mode, "dhcpv6_stateless")) {
5656 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
5657 VLOG_WARN_RL(&rl, "Invalid address mode [%s] defined",
5658 address_mode);
5659 continue;
5660 }
5661
5662 if (smap_get_bool(&op->nbrp->ipv6_ra_configs, "send_periodic",
5663 false)) {
5664 copy_ra_to_sb(op, address_mode);
5665 }
5666
5667 ds_clear(&match);
5668 ds_put_format(&match, "inport == %s && ip6.dst == ff02::2 && nd_rs",
5669 op->json_key);
5670 ds_clear(&actions);
5671
5672 const char *mtu_s = smap_get(
5673 &op->nbrp->ipv6_ra_configs, "mtu");
5674
5675 /* As per RFC 2460, 1280 is minimum IPv6 MTU. */
5676 uint32_t mtu = (mtu_s && atoi(mtu_s) >= 1280) ? atoi(mtu_s) : 0;
5677
5678 ds_put_format(&actions, REGBIT_ND_RA_OPTS_RESULT" = put_nd_ra_opts("
5679 "addr_mode = \"%s\", slla = %s",
5680 address_mode, op->lrp_networks.ea_s);
5681 if (mtu > 0) {
5682 ds_put_format(&actions, ", mtu = %u", mtu);
5683 }
5684
5685 bool add_rs_response_flow = false;
5686
5687 for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
5688 if (in6_is_lla(&op->lrp_networks.ipv6_addrs[i].network)) {
5689 continue;
5690 }
5691
5692 /* Add the prefix option if the address mode is slaac or
5693 * dhcpv6_stateless. */
5694 if (strcmp(address_mode, "dhcpv6_stateful")) {
5695 ds_put_format(&actions, ", prefix = %s/%u",
5696 op->lrp_networks.ipv6_addrs[i].network_s,
5697 op->lrp_networks.ipv6_addrs[i].plen);
5698 }
5699 add_rs_response_flow = true;
5700 }
5701
5702 if (add_rs_response_flow) {
5703 ds_put_cstr(&actions, "); next;");
5704 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ND_RA_OPTIONS, 50,
5705 ds_cstr(&match), ds_cstr(&actions));
5706 ds_clear(&actions);
5707 ds_clear(&match);
5708 ds_put_format(&match, "inport == %s && ip6.dst == ff02::2 && "
5709 "nd_ra && "REGBIT_ND_RA_OPTS_RESULT, op->json_key);
5710
5711 char ip6_str[INET6_ADDRSTRLEN + 1];
5712 struct in6_addr lla;
5713 in6_generate_lla(op->lrp_networks.ea, &lla);
5714 memset(ip6_str, 0, sizeof(ip6_str));
5715 ipv6_string_mapped(ip6_str, &lla);
5716 ds_put_format(&actions, "eth.dst = eth.src; eth.src = %s; "
5717 "ip6.dst = ip6.src; ip6.src = %s; "
5718 "outport = inport; flags.loopback = 1; "
5719 "output;",
5720 op->lrp_networks.ea_s, ip6_str);
5721 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ND_RA_RESPONSE, 50,
5722 ds_cstr(&match), ds_cstr(&actions));
5723 }
5724 }
5725
5726 /* Logical router ingress table 5, 6: RS responder, by default goto next.
5727 * (priority 0)*/
5728 HMAP_FOR_EACH (od, key_node, datapaths) {
5729 if (!od->nbr) {
5730 continue;
5731 }
5732
5733 ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_OPTIONS, 0, "1", "next;");
5734 ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_RESPONSE, 0, "1", "next;");
5735 }
5736
5737 /* Logical router ingress table 7: IP Routing.
5738 *
5739 * A packet that arrives at this table is an IP packet that should be
5740 * routed to the address in 'ip[46].dst'. This table sets outport to
5741 * the correct output port, eth.src to the output port's MAC
5742 * address, and '[xx]reg0' to the next-hop IP address (leaving
5743 * 'ip[46].dst', the packet’s final destination, unchanged), and
5744 * advances to the next table for ARP/ND resolution. */
5745 HMAP_FOR_EACH (op, key_node, ports) {
5746 if (!op->nbrp) {
5747 continue;
5748 }
5749
5750 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
5751 add_route(lflows, op, op->lrp_networks.ipv4_addrs[i].addr_s,
5752 op->lrp_networks.ipv4_addrs[i].network_s,
5753 op->lrp_networks.ipv4_addrs[i].plen, NULL, NULL);
5754 }
5755
5756 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
5757 add_route(lflows, op, op->lrp_networks.ipv6_addrs[i].addr_s,
5758 op->lrp_networks.ipv6_addrs[i].network_s,
5759 op->lrp_networks.ipv6_addrs[i].plen, NULL, NULL);
5760 }
5761 }
5762
5763 /* Convert the static routes to flows. */
5764 HMAP_FOR_EACH (od, key_node, datapaths) {
5765 if (!od->nbr) {
5766 continue;
5767 }
5768
5769 for (int i = 0; i < od->nbr->n_static_routes; i++) {
5770 const struct nbrec_logical_router_static_route *route;
5771
5772 route = od->nbr->static_routes[i];
5773 build_static_route_flow(lflows, od, ports, route);
5774 }
5775 }
5776
5777 /* XXX destination unreachable */
5778
5779 /* Local router ingress table 8: ARP Resolution.
5780 *
5781 * Any packet that reaches this table is an IP packet whose next-hop IP
5782 * address is in reg0. (ip4.dst is the final destination.) This table
5783 * resolves the IP address in reg0 into an output port in outport and an
5784 * Ethernet address in eth.dst. */
5785 HMAP_FOR_EACH (op, key_node, ports) {
5786 if (op->nbsp && !lsp_is_enabled(op->nbsp)) {
5787 continue;
5788 }
5789
5790 if (op->nbrp) {
5791 /* This is a logical router port. If next-hop IP address in
5792 * '[xx]reg0' matches IP address of this router port, then
5793 * the packet is intended to eventually be sent to this
5794 * logical port. Set the destination mac address using this
5795 * port's mac address.
5796 *
5797 * The packet is still in peer's logical pipeline. So the match
5798 * should be on peer's outport. */
5799 if (op->peer && op->nbrp->peer) {
5800 if (op->lrp_networks.n_ipv4_addrs) {
5801 ds_clear(&match);
5802 ds_put_format(&match, "outport == %s && reg0 == ",
5803 op->peer->json_key);
5804 op_put_v4_networks(&match, op, false);
5805
5806 ds_clear(&actions);
5807 ds_put_format(&actions, "eth.dst = %s; next;",
5808 op->lrp_networks.ea_s);
5809 ovn_lflow_add(lflows, op->peer->od, S_ROUTER_IN_ARP_RESOLVE,
5810 100, ds_cstr(&match), ds_cstr(&actions));
5811 }
5812
5813 if (op->lrp_networks.n_ipv6_addrs) {
5814 ds_clear(&match);
5815 ds_put_format(&match, "outport == %s && xxreg0 == ",
5816 op->peer->json_key);
5817 op_put_v6_networks(&match, op);
5818
5819 ds_clear(&actions);
5820 ds_put_format(&actions, "eth.dst = %s; next;",
5821 op->lrp_networks.ea_s);
5822 ovn_lflow_add(lflows, op->peer->od, S_ROUTER_IN_ARP_RESOLVE,
5823 100, ds_cstr(&match), ds_cstr(&actions));
5824 }
5825 }
5826 } else if (op->od->n_router_ports && strcmp(op->nbsp->type, "router")) {
5827 /* This is a logical switch port that backs a VM or a container.
5828 * Extract its addresses. For each of the address, go through all
5829 * the router ports attached to the switch (to which this port
5830 * connects) and if the address in question is reachable from the
5831 * router port, add an ARP/ND entry in that router's pipeline. */
5832
5833 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
5834 const char *ea_s = op->lsp_addrs[i].ea_s;
5835 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
5836 const char *ip_s = op->lsp_addrs[i].ipv4_addrs[j].addr_s;
5837 for (size_t k = 0; k < op->od->n_router_ports; k++) {
5838 /* Get the Logical_Router_Port that the
5839 * Logical_Switch_Port is connected to, as
5840 * 'peer'. */
5841 const char *peer_name = smap_get(
5842 &op->od->router_ports[k]->nbsp->options,
5843 "router-port");
5844 if (!peer_name) {
5845 continue;
5846 }
5847
5848 struct ovn_port *peer = ovn_port_find(ports, peer_name);
5849 if (!peer || !peer->nbrp) {
5850 continue;
5851 }
5852
5853 if (!find_lrp_member_ip(peer, ip_s)) {
5854 continue;
5855 }
5856
5857 ds_clear(&match);
5858 ds_put_format(&match, "outport == %s && reg0 == %s",
5859 peer->json_key, ip_s);
5860
5861 ds_clear(&actions);
5862 ds_put_format(&actions, "eth.dst = %s; next;", ea_s);
5863 ovn_lflow_add(lflows, peer->od,
5864 S_ROUTER_IN_ARP_RESOLVE, 100,
5865 ds_cstr(&match), ds_cstr(&actions));
5866 }
5867 }
5868
5869 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
5870 const char *ip_s = op->lsp_addrs[i].ipv6_addrs[j].addr_s;
5871 for (size_t k = 0; k < op->od->n_router_ports; k++) {
5872 /* Get the Logical_Router_Port that the
5873 * Logical_Switch_Port is connected to, as
5874 * 'peer'. */
5875 const char *peer_name = smap_get(
5876 &op->od->router_ports[k]->nbsp->options,
5877 "router-port");
5878 if (!peer_name) {
5879 continue;
5880 }
5881
5882 struct ovn_port *peer = ovn_port_find(ports, peer_name);
5883 if (!peer || !peer->nbrp) {
5884 continue;
5885 }
5886
5887 if (!find_lrp_member_ip(peer, ip_s)) {
5888 continue;
5889 }
5890
5891 ds_clear(&match);
5892 ds_put_format(&match, "outport == %s && xxreg0 == %s",
5893 peer->json_key, ip_s);
5894
5895 ds_clear(&actions);
5896 ds_put_format(&actions, "eth.dst = %s; next;", ea_s);
5897 ovn_lflow_add(lflows, peer->od,
5898 S_ROUTER_IN_ARP_RESOLVE, 100,
5899 ds_cstr(&match), ds_cstr(&actions));
5900 }
5901 }
5902 }
5903 } else if (!strcmp(op->nbsp->type, "router")) {
5904 /* This is a logical switch port that connects to a router. */
5905
5906 /* The peer of this switch port is the router port for which
5907 * we need to add logical flows such that it can resolve
5908 * ARP entries for all the other router ports connected to
5909 * the switch in question. */
5910
5911 const char *peer_name = smap_get(&op->nbsp->options,
5912 "router-port");
5913 if (!peer_name) {
5914 continue;
5915 }
5916
5917 struct ovn_port *peer = ovn_port_find(ports, peer_name);
5918 if (!peer || !peer->nbrp) {
5919 continue;
5920 }
5921
5922 for (size_t i = 0; i < op->od->n_router_ports; i++) {
5923 const char *router_port_name = smap_get(
5924 &op->od->router_ports[i]->nbsp->options,
5925 "router-port");
5926 struct ovn_port *router_port = ovn_port_find(ports,
5927 router_port_name);
5928 if (!router_port || !router_port->nbrp) {
5929 continue;
5930 }
5931
5932 /* Skip the router port under consideration. */
5933 if (router_port == peer) {
5934 continue;
5935 }
5936
5937 if (router_port->lrp_networks.n_ipv4_addrs) {
5938 ds_clear(&match);
5939 ds_put_format(&match, "outport == %s && reg0 == ",
5940 peer->json_key);
5941 op_put_v4_networks(&match, router_port, false);
5942
5943 ds_clear(&actions);
5944 ds_put_format(&actions, "eth.dst = %s; next;",
5945 router_port->lrp_networks.ea_s);
5946 ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE,
5947 100, ds_cstr(&match), ds_cstr(&actions));
5948 }
5949
5950 if (router_port->lrp_networks.n_ipv6_addrs) {
5951 ds_clear(&match);
5952 ds_put_format(&match, "outport == %s && xxreg0 == ",
5953 peer->json_key);
5954 op_put_v6_networks(&match, router_port);
5955
5956 ds_clear(&actions);
5957 ds_put_format(&actions, "eth.dst = %s; next;",
5958 router_port->lrp_networks.ea_s);
5959 ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE,
5960 100, ds_cstr(&match), ds_cstr(&actions));
5961 }
5962 }
5963 }
5964 }
5965
5966 HMAP_FOR_EACH (od, key_node, datapaths) {
5967 if (!od->nbr) {
5968 continue;
5969 }
5970
5971 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip4",
5972 "get_arp(outport, reg0); next;");
5973
5974 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip6",
5975 "get_nd(outport, xxreg0); next;");
5976 }
5977
5978 /* Logical router ingress table 9: Gateway redirect.
5979 *
5980 * For traffic with outport equal to the l3dgw_port
5981 * on a distributed router, this table redirects a subset
5982 * of the traffic to the l3redirect_port which represents
5983 * the central instance of the l3dgw_port.
5984 */
5985 HMAP_FOR_EACH (od, key_node, datapaths) {
5986 if (!od->nbr) {
5987 continue;
5988 }
5989 if (od->l3dgw_port && od->l3redirect_port) {
5990 /* For traffic with outport == l3dgw_port, if the
5991 * packet did not match any higher priority redirect
5992 * rule, then the traffic is redirected to the central
5993 * instance of the l3dgw_port. */
5994 ds_clear(&match);
5995 ds_put_format(&match, "outport == %s",
5996 od->l3dgw_port->json_key);
5997 ds_clear(&actions);
5998 ds_put_format(&actions, "outport = %s; next;",
5999 od->l3redirect_port->json_key);
6000 ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 50,
6001 ds_cstr(&match), ds_cstr(&actions));
6002
6003 /* If the Ethernet destination has not been resolved,
6004 * redirect to the central instance of the l3dgw_port.
6005 * Such traffic will be replaced by an ARP request or ND
6006 * Neighbor Solicitation in the ARP request ingress
6007 * table, before being redirected to the central instance.
6008 */
6009 ds_put_format(&match, " && eth.dst == 00:00:00:00:00:00");
6010 ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 150,
6011 ds_cstr(&match), ds_cstr(&actions));
6012 }
6013
6014 /* Packets are allowed by default. */
6015 ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 0, "1", "next;");
6016 }
6017
6018 /* Local router ingress table 10: ARP request.
6019 *
6020 * In the common case where the Ethernet destination has been resolved,
6021 * this table outputs the packet (priority 0). Otherwise, it composes
6022 * and sends an ARP/IPv6 NA request (priority 100). */
6023 HMAP_FOR_EACH (od, key_node, datapaths) {
6024 if (!od->nbr) {
6025 continue;
6026 }
6027
6028 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100,
6029 "eth.dst == 00:00:00:00:00:00",
6030 "arp { "
6031 "eth.dst = ff:ff:ff:ff:ff:ff; "
6032 "arp.spa = reg1; "
6033 "arp.tpa = reg0; "
6034 "arp.op = 1; " /* ARP request */
6035 "output; "
6036 "};");
6037 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100,
6038 "eth.dst == 00:00:00:00:00:00",
6039 "nd_ns { "
6040 "nd.target = xxreg0; "
6041 "output; "
6042 "};");
6043 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1", "output;");
6044 }
6045
6046 /* Logical router egress table 1: Delivery (priority 100).
6047 *
6048 * Priority 100 rules deliver packets to enabled logical ports. */
6049 HMAP_FOR_EACH (op, key_node, ports) {
6050 if (!op->nbrp) {
6051 continue;
6052 }
6053
6054 if (!lrport_is_enabled(op->nbrp)) {
6055 /* Drop packets to disabled logical ports (since logical flow
6056 * tables are default-drop). */
6057 continue;
6058 }
6059
6060 if (op->derived) {
6061 /* No egress packets should be processed in the context of
6062 * a chassisredirect port. The chassisredirect port should
6063 * be replaced by the l3dgw port in the local output
6064 * pipeline stage before egress processing. */
6065 continue;
6066 }
6067
6068 ds_clear(&match);
6069 ds_put_format(&match, "outport == %s", op->json_key);
6070 ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100,
6071 ds_cstr(&match), "output;");
6072 }
6073
6074 ds_destroy(&match);
6075 ds_destroy(&actions);
6076 }
6077
6078 /* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
6079 * constructing their contents based on the OVN_NB database. */
6080 static void
6081 build_lflows(struct northd_context *ctx, struct hmap *datapaths,
6082 struct hmap *ports)
6083 {
6084 struct hmap lflows = HMAP_INITIALIZER(&lflows);
6085 struct hmap mcgroups = HMAP_INITIALIZER(&mcgroups);
6086
6087 build_lswitch_flows(datapaths, ports, &lflows, &mcgroups);
6088 build_lrouter_flows(datapaths, ports, &lflows);
6089
6090 /* Push changes to the Logical_Flow table to database. */
6091 const struct sbrec_logical_flow *sbflow, *next_sbflow;
6092 SBREC_LOGICAL_FLOW_FOR_EACH_SAFE (sbflow, next_sbflow, ctx->ovnsb_idl) {
6093 struct ovn_datapath *od
6094 = ovn_datapath_from_sbrec(datapaths, sbflow->logical_datapath);
6095 if (!od) {
6096 sbrec_logical_flow_delete(sbflow);
6097 continue;
6098 }
6099
6100 enum ovn_datapath_type dp_type = od->nbs ? DP_SWITCH : DP_ROUTER;
6101 enum ovn_pipeline pipeline
6102 = !strcmp(sbflow->pipeline, "ingress") ? P_IN : P_OUT;
6103 struct ovn_lflow *lflow = ovn_lflow_find(
6104 &lflows, od, ovn_stage_build(dp_type, pipeline, sbflow->table_id),
6105 sbflow->priority, sbflow->match, sbflow->actions, sbflow->hash);
6106 if (lflow) {
6107 ovn_lflow_destroy(&lflows, lflow);
6108 } else {
6109 sbrec_logical_flow_delete(sbflow);
6110 }
6111 }
6112 struct ovn_lflow *lflow, *next_lflow;
6113 HMAP_FOR_EACH_SAFE (lflow, next_lflow, hmap_node, &lflows) {
6114 const char *pipeline = ovn_stage_get_pipeline_name(lflow->stage);
6115 uint8_t table = ovn_stage_get_table(lflow->stage);
6116
6117 sbflow = sbrec_logical_flow_insert(ctx->ovnsb_txn);
6118 sbrec_logical_flow_set_logical_datapath(sbflow, lflow->od->sb);
6119 sbrec_logical_flow_set_pipeline(sbflow, pipeline);
6120 sbrec_logical_flow_set_table_id(sbflow, table);
6121 sbrec_logical_flow_set_priority(sbflow, lflow->priority);
6122 sbrec_logical_flow_set_match(sbflow, lflow->match);
6123 sbrec_logical_flow_set_actions(sbflow, lflow->actions);
6124
6125 /* Trim the source locator lflow->where, which looks something like
6126 * "ovn/northd/ovn-northd.c:1234", down to just the part following the
6127 * last slash, e.g. "ovn-northd.c:1234". */
6128 const char *slash = strrchr(lflow->where, '/');
6129 #if _WIN32
6130 const char *backslash = strrchr(lflow->where, '\\');
6131 if (!slash || backslash > slash) {
6132 slash = backslash;
6133 }
6134 #endif
6135 const char *where = slash ? slash + 1 : lflow->where;
6136
6137 struct smap ids = SMAP_INITIALIZER(&ids);
6138 smap_add(&ids, "stage-name", ovn_stage_to_str(lflow->stage));
6139 smap_add(&ids, "source", where);
6140 if (lflow->stage_hint) {
6141 smap_add(&ids, "stage-hint", lflow->stage_hint);
6142 }
6143 sbrec_logical_flow_set_external_ids(sbflow, &ids);
6144 smap_destroy(&ids);
6145
6146 ovn_lflow_destroy(&lflows, lflow);
6147 }
6148 hmap_destroy(&lflows);
6149
6150 /* Push changes to the Multicast_Group table to database. */
6151 const struct sbrec_multicast_group *sbmc, *next_sbmc;
6152 SBREC_MULTICAST_GROUP_FOR_EACH_SAFE (sbmc, next_sbmc, ctx->ovnsb_idl) {
6153 struct ovn_datapath *od = ovn_datapath_from_sbrec(datapaths,
6154 sbmc->datapath);
6155 if (!od) {
6156 sbrec_multicast_group_delete(sbmc);
6157 continue;
6158 }
6159
6160 struct multicast_group group = { .name = sbmc->name,
6161 .key = sbmc->tunnel_key };
6162 struct ovn_multicast *mc = ovn_multicast_find(&mcgroups, od, &group);
6163 if (mc) {
6164 ovn_multicast_update_sbrec(mc, sbmc);
6165 ovn_multicast_destroy(&mcgroups, mc);
6166 } else {
6167 sbrec_multicast_group_delete(sbmc);
6168 }
6169 }
6170 struct ovn_multicast *mc, *next_mc;
6171 HMAP_FOR_EACH_SAFE (mc, next_mc, hmap_node, &mcgroups) {
6172 sbmc = sbrec_multicast_group_insert(ctx->ovnsb_txn);
6173 sbrec_multicast_group_set_datapath(sbmc, mc->datapath->sb);
6174 sbrec_multicast_group_set_name(sbmc, mc->group->name);
6175 sbrec_multicast_group_set_tunnel_key(sbmc, mc->group->key);
6176 ovn_multicast_update_sbrec(mc, sbmc);
6177 ovn_multicast_destroy(&mcgroups, mc);
6178 }
6179 hmap_destroy(&mcgroups);
6180 }
6181
6182 static void
6183 sync_address_set(struct northd_context *ctx, const char *name,
6184 const char **addrs, size_t n_addrs,
6185 struct shash *sb_address_sets)
6186 {
6187 const struct sbrec_address_set *sb_address_set;
6188 sb_address_set = shash_find_and_delete(sb_address_sets,
6189 name);
6190 if (!sb_address_set) {
6191 sb_address_set = sbrec_address_set_insert(ctx->ovnsb_txn);
6192 sbrec_address_set_set_name(sb_address_set, name);
6193 }
6194
6195 sbrec_address_set_set_addresses(sb_address_set,
6196 addrs, n_addrs);
6197 }
6198
6199 /* OVN_Southbound Address_Set table contains same records as in north
6200 * bound, plus the records generated from Port_Group table in north bound.
6201 *
6202 * There are 2 records generated from each port group, one for IPv4, and
6203 * one for IPv6, named in the format: <port group name>_ip4 and
6204 * <port group name>_ip6 respectively. MAC addresses are ignored.
6205 *
6206 * We always update OVN_Southbound to match the Address_Set and Port_Group
6207 * in OVN_Northbound, so that the address sets used in Logical_Flows in
6208 * OVN_Southbound is checked against the proper set.*/
6209 static void
6210 sync_address_sets(struct northd_context *ctx)
6211 {
6212 struct shash sb_address_sets = SHASH_INITIALIZER(&sb_address_sets);
6213
6214 const struct sbrec_address_set *sb_address_set;
6215 SBREC_ADDRESS_SET_FOR_EACH (sb_address_set, ctx->ovnsb_idl) {
6216 shash_add(&sb_address_sets, sb_address_set->name, sb_address_set);
6217 }
6218
6219 /* sync port group generated address sets first */
6220 const struct nbrec_port_group *nb_port_group;
6221 NBREC_PORT_GROUP_FOR_EACH (nb_port_group, ctx->ovnnb_idl) {
6222 char **ipv4_addrs = xcalloc(1, sizeof *ipv4_addrs);
6223 size_t n_ipv4_addrs = 0;
6224 size_t n_ipv4_addrs_buf = 1;
6225 char **ipv6_addrs = xcalloc(1, sizeof *ipv6_addrs);
6226 size_t n_ipv6_addrs = 0;
6227 size_t n_ipv6_addrs_buf = 1;
6228 for (size_t i = 0; i < nb_port_group->n_ports; i++) {
6229 for (size_t j = 0; j < nb_port_group->ports[i]->n_addresses; j++) {
6230 struct lport_addresses laddrs;
6231 extract_lsp_addresses(nb_port_group->ports[i]->addresses[j],
6232 &laddrs);
6233 while (n_ipv4_addrs_buf < n_ipv4_addrs + laddrs.n_ipv4_addrs) {
6234 n_ipv4_addrs_buf *= 2;
6235 ipv4_addrs = xrealloc(ipv4_addrs,
6236 n_ipv4_addrs_buf * sizeof *ipv4_addrs);
6237 }
6238 for (size_t k = 0; k < laddrs.n_ipv4_addrs; k++) {
6239 ipv4_addrs[n_ipv4_addrs++] =
6240 xstrdup(laddrs.ipv4_addrs[k].addr_s);
6241 }
6242 while (n_ipv6_addrs_buf < n_ipv6_addrs + laddrs.n_ipv6_addrs) {
6243 n_ipv6_addrs_buf *= 2;
6244 ipv6_addrs = xrealloc(ipv6_addrs,
6245 n_ipv6_addrs_buf * sizeof *ipv6_addrs);
6246 }
6247 for (size_t k = 0; k < laddrs.n_ipv6_addrs; k++) {
6248 ipv6_addrs[n_ipv6_addrs++] =
6249 xstrdup(laddrs.ipv6_addrs[k].addr_s);
6250 }
6251 destroy_lport_addresses(&laddrs);
6252 }
6253 }
6254 char *ipv4_addrs_name = xasprintf("%s_ip4", nb_port_group->name);
6255 char *ipv6_addrs_name = xasprintf("%s_ip6", nb_port_group->name);
6256 sync_address_set(ctx, ipv4_addrs_name, (const char **)ipv4_addrs,
6257 n_ipv4_addrs, &sb_address_sets);
6258 sync_address_set(ctx, ipv6_addrs_name, (const char **)ipv6_addrs,
6259 n_ipv6_addrs, &sb_address_sets);
6260 free(ipv4_addrs_name);
6261 free(ipv6_addrs_name);
6262 for (size_t i = 0; i < n_ipv4_addrs; i++) {
6263 free(ipv4_addrs[i]);
6264 }
6265 free(ipv4_addrs);
6266 for (size_t i = 0; i < n_ipv6_addrs; i++) {
6267 free(ipv6_addrs[i]);
6268 }
6269 free(ipv6_addrs);
6270 }
6271
6272 /* sync user defined address sets, which may overwrite port group
6273 * generated address sets if same name is used */
6274 const struct nbrec_address_set *nb_address_set;
6275 NBREC_ADDRESS_SET_FOR_EACH (nb_address_set, ctx->ovnnb_idl) {
6276 sync_address_set(ctx, nb_address_set->name,
6277 /* "char **" is not compatible with "const char **" */
6278 (const char **)nb_address_set->addresses,
6279 nb_address_set->n_addresses, &sb_address_sets);
6280 }
6281
6282 struct shash_node *node, *next;
6283 SHASH_FOR_EACH_SAFE (node, next, &sb_address_sets) {
6284 sbrec_address_set_delete(node->data);
6285 shash_delete(&sb_address_sets, node);
6286 }
6287 shash_destroy(&sb_address_sets);
6288 }
6289
6290 /* Each port group in Port_Group table in OVN_Northbound has a corresponding
6291 * entry in Port_Group table in OVN_Southbound. In OVN_Northbound the entries
6292 * contains lport uuids, while in OVN_Southbound we store the lport names.
6293 */
6294 static void
6295 sync_port_groups(struct northd_context *ctx)
6296 {
6297 struct shash sb_port_groups = SHASH_INITIALIZER(&sb_port_groups);
6298
6299 const struct sbrec_port_group *sb_port_group;
6300 SBREC_PORT_GROUP_FOR_EACH (sb_port_group, ctx->ovnsb_idl) {
6301 shash_add(&sb_port_groups, sb_port_group->name, sb_port_group);
6302 }
6303
6304 const struct nbrec_port_group *nb_port_group;
6305 NBREC_PORT_GROUP_FOR_EACH (nb_port_group, ctx->ovnnb_idl) {
6306 sb_port_group = shash_find_and_delete(&sb_port_groups,
6307 nb_port_group->name);
6308 if (!sb_port_group) {
6309 sb_port_group = sbrec_port_group_insert(ctx->ovnsb_txn);
6310 sbrec_port_group_set_name(sb_port_group, nb_port_group->name);
6311 }
6312
6313 const char **nb_port_names = xcalloc(nb_port_group->n_ports,
6314 sizeof *nb_port_names);
6315 int i;
6316 for (i = 0; i < nb_port_group->n_ports; i++) {
6317 nb_port_names[i] = nb_port_group->ports[i]->name;
6318 }
6319 sbrec_port_group_set_ports(sb_port_group,
6320 nb_port_names,
6321 nb_port_group->n_ports);
6322 free(nb_port_names);
6323 }
6324
6325 struct shash_node *node, *next;
6326 SHASH_FOR_EACH_SAFE (node, next, &sb_port_groups) {
6327 sbrec_port_group_delete(node->data);
6328 shash_delete(&sb_port_groups, node);
6329 }
6330 shash_destroy(&sb_port_groups);
6331 }
6332
6333 /*
6334 * struct 'dns_info' is used to sync the DNS records between OVN Northbound db
6335 * and Southbound db.
6336 */
6337 struct dns_info {
6338 struct hmap_node hmap_node;
6339 const struct nbrec_dns *nb_dns; /* DNS record in the Northbound db. */
6340 const struct sbrec_dns *sb_dns; /* DNS record in the Soutbound db. */
6341
6342 /* Datapaths to which the DNS entry is associated with it. */
6343 const struct sbrec_datapath_binding **sbs;
6344 size_t n_sbs;
6345 };
6346
6347 static inline struct dns_info *
6348 get_dns_info_from_hmap(struct hmap *dns_map, struct uuid *uuid)
6349 {
6350 struct dns_info *dns_info;
6351 size_t hash = uuid_hash(uuid);
6352 HMAP_FOR_EACH_WITH_HASH (dns_info, hmap_node, hash, dns_map) {
6353 if (uuid_equals(&dns_info->nb_dns->header_.uuid, uuid)) {
6354 return dns_info;
6355 }
6356 }
6357
6358 return NULL;
6359 }
6360
6361 static void
6362 sync_dns_entries(struct northd_context *ctx, struct hmap *datapaths)
6363 {
6364 struct hmap dns_map = HMAP_INITIALIZER(&dns_map);
6365 struct ovn_datapath *od;
6366 HMAP_FOR_EACH (od, key_node, datapaths) {
6367 if (!od->nbs || !od->nbs->n_dns_records) {
6368 continue;
6369 }
6370
6371 for (size_t i = 0; i < od->nbs->n_dns_records; i++) {
6372 struct dns_info *dns_info = get_dns_info_from_hmap(
6373 &dns_map, &od->nbs->dns_records[i]->header_.uuid);
6374 if (!dns_info) {
6375 size_t hash = uuid_hash(
6376 &od->nbs->dns_records[i]->header_.uuid);
6377 dns_info = xzalloc(sizeof *dns_info);;
6378 dns_info->nb_dns = od->nbs->dns_records[i];
6379 hmap_insert(&dns_map, &dns_info->hmap_node, hash);
6380 }
6381
6382 dns_info->n_sbs++;
6383 dns_info->sbs = xrealloc(dns_info->sbs,
6384 dns_info->n_sbs * sizeof *dns_info->sbs);
6385 dns_info->sbs[dns_info->n_sbs - 1] = od->sb;
6386 }
6387 }
6388
6389 const struct sbrec_dns *sbrec_dns, *next;
6390 SBREC_DNS_FOR_EACH_SAFE (sbrec_dns, next, ctx->ovnsb_idl) {
6391 const char *nb_dns_uuid = smap_get(&sbrec_dns->external_ids, "dns_id");
6392 struct uuid dns_uuid;
6393 if (!nb_dns_uuid || !uuid_from_string(&dns_uuid, nb_dns_uuid)) {
6394 sbrec_dns_delete(sbrec_dns);
6395 continue;
6396 }
6397
6398 struct dns_info *dns_info =
6399 get_dns_info_from_hmap(&dns_map, &dns_uuid);
6400 if (dns_info) {
6401 dns_info->sb_dns = sbrec_dns;
6402 } else {
6403 sbrec_dns_delete(sbrec_dns);
6404 }
6405 }
6406
6407 struct dns_info *dns_info;
6408 HMAP_FOR_EACH_POP (dns_info, hmap_node, &dns_map) {
6409 if (!dns_info->sb_dns) {
6410 sbrec_dns = sbrec_dns_insert(ctx->ovnsb_txn);
6411 dns_info->sb_dns = sbrec_dns;
6412 char *dns_id = xasprintf(
6413 UUID_FMT, UUID_ARGS(&dns_info->nb_dns->header_.uuid));
6414 const struct smap external_ids =
6415 SMAP_CONST1(&external_ids, "dns_id", dns_id);
6416 sbrec_dns_set_external_ids(sbrec_dns, &external_ids);
6417 free(dns_id);
6418 }
6419
6420 /* Set the datapaths and records. If nothing has changed, then
6421 * this will be a no-op.
6422 */
6423 sbrec_dns_set_datapaths(
6424 dns_info->sb_dns,
6425 (struct sbrec_datapath_binding **)dns_info->sbs,
6426 dns_info->n_sbs);
6427 sbrec_dns_set_records(dns_info->sb_dns, &dns_info->nb_dns->records);
6428 free(dns_info->sbs);
6429 free(dns_info);
6430 }
6431 hmap_destroy(&dns_map);
6432 }
6433
6434 \f
6435 static void
6436 ovnnb_db_run(struct northd_context *ctx, struct chassis_index *chassis_index,
6437 struct ovsdb_idl_loop *sb_loop)
6438 {
6439 if (!ctx->ovnsb_txn || !ctx->ovnnb_txn) {
6440 return;
6441 }
6442 struct hmap datapaths, ports;
6443 build_datapaths(ctx, &datapaths);
6444 build_ports(ctx, &datapaths, chassis_index, &ports);
6445 build_ipam(&datapaths, &ports);
6446 build_lflows(ctx, &datapaths, &ports);
6447
6448 sync_address_sets(ctx);
6449 sync_port_groups(ctx);
6450 sync_dns_entries(ctx, &datapaths);
6451
6452 struct ovn_datapath *dp, *next_dp;
6453 HMAP_FOR_EACH_SAFE (dp, next_dp, key_node, &datapaths) {
6454 ovn_datapath_destroy(&datapaths, dp);
6455 }
6456 hmap_destroy(&datapaths);
6457
6458 struct ovn_port *port, *next_port;
6459 HMAP_FOR_EACH_SAFE (port, next_port, key_node, &ports) {
6460 ovn_port_destroy(&ports, port);
6461 }
6462 hmap_destroy(&ports);
6463
6464 /* Copy nb_cfg from northbound to southbound database.
6465 *
6466 * Also set up to update sb_cfg once our southbound transaction commits. */
6467 const struct nbrec_nb_global *nb = nbrec_nb_global_first(ctx->ovnnb_idl);
6468 if (!nb) {
6469 nb = nbrec_nb_global_insert(ctx->ovnnb_txn);
6470 }
6471 const struct sbrec_sb_global *sb = sbrec_sb_global_first(ctx->ovnsb_idl);
6472 if (!sb) {
6473 sb = sbrec_sb_global_insert(ctx->ovnsb_txn);
6474 }
6475 sbrec_sb_global_set_nb_cfg(sb, nb->nb_cfg);
6476 sb_loop->next_cfg = nb->nb_cfg;
6477
6478 cleanup_macam(&macam);
6479 }
6480
6481 /* Handle changes to the 'chassis' column of the 'Port_Binding' table. When
6482 * this column is not empty, it means we need to set the corresponding logical
6483 * port as 'up' in the northbound DB. */
6484 static void
6485 update_logical_port_status(struct northd_context *ctx)
6486 {
6487 struct hmap lports_hmap;
6488 const struct sbrec_port_binding *sb;
6489 const struct nbrec_logical_switch_port *nbsp;
6490
6491 struct lport_hash_node {
6492 struct hmap_node node;
6493 const struct nbrec_logical_switch_port *nbsp;
6494 } *hash_node;
6495
6496 hmap_init(&lports_hmap);
6497
6498 NBREC_LOGICAL_SWITCH_PORT_FOR_EACH(nbsp, ctx->ovnnb_idl) {
6499 hash_node = xzalloc(sizeof *hash_node);
6500 hash_node->nbsp = nbsp;
6501 hmap_insert(&lports_hmap, &hash_node->node, hash_string(nbsp->name, 0));
6502 }
6503
6504 SBREC_PORT_BINDING_FOR_EACH(sb, ctx->ovnsb_idl) {
6505 nbsp = NULL;
6506 HMAP_FOR_EACH_WITH_HASH(hash_node, node,
6507 hash_string(sb->logical_port, 0),
6508 &lports_hmap) {
6509 if (!strcmp(sb->logical_port, hash_node->nbsp->name)) {
6510 nbsp = hash_node->nbsp;
6511 break;
6512 }
6513 }
6514
6515 if (!nbsp) {
6516 /* The logical port doesn't exist for this port binding. This can
6517 * happen under normal circumstances when ovn-northd hasn't gotten
6518 * around to pruning the Port_Binding yet. */
6519 continue;
6520 }
6521
6522 bool up = (sb->chassis || !strcmp(nbsp->type, "router"));
6523 if (!nbsp->up || *nbsp->up != up) {
6524 nbrec_logical_switch_port_set_up(nbsp, &up, 1);
6525 }
6526 }
6527
6528 HMAP_FOR_EACH_POP(hash_node, node, &lports_hmap) {
6529 free(hash_node);
6530 }
6531 hmap_destroy(&lports_hmap);
6532 }
6533
6534 static struct gen_opts_map supported_dhcp_opts[] = {
6535 OFFERIP,
6536 DHCP_OPT_NETMASK,
6537 DHCP_OPT_ROUTER,
6538 DHCP_OPT_DNS_SERVER,
6539 DHCP_OPT_LOG_SERVER,
6540 DHCP_OPT_LPR_SERVER,
6541 DHCP_OPT_SWAP_SERVER,
6542 DHCP_OPT_POLICY_FILTER,
6543 DHCP_OPT_ROUTER_SOLICITATION,
6544 DHCP_OPT_NIS_SERVER,
6545 DHCP_OPT_NTP_SERVER,
6546 DHCP_OPT_SERVER_ID,
6547 DHCP_OPT_TFTP_SERVER,
6548 DHCP_OPT_CLASSLESS_STATIC_ROUTE,
6549 DHCP_OPT_MS_CLASSLESS_STATIC_ROUTE,
6550 DHCP_OPT_IP_FORWARD_ENABLE,
6551 DHCP_OPT_ROUTER_DISCOVERY,
6552 DHCP_OPT_ETHERNET_ENCAP,
6553 DHCP_OPT_DEFAULT_TTL,
6554 DHCP_OPT_TCP_TTL,
6555 DHCP_OPT_MTU,
6556 DHCP_OPT_LEASE_TIME,
6557 DHCP_OPT_T1,
6558 DHCP_OPT_T2
6559 };
6560
6561 static struct gen_opts_map supported_dhcpv6_opts[] = {
6562 DHCPV6_OPT_IA_ADDR,
6563 DHCPV6_OPT_SERVER_ID,
6564 DHCPV6_OPT_DOMAIN_SEARCH,
6565 DHCPV6_OPT_DNS_SERVER
6566 };
6567
6568 static void
6569 check_and_add_supported_dhcp_opts_to_sb_db(struct northd_context *ctx)
6570 {
6571 struct hmap dhcp_opts_to_add = HMAP_INITIALIZER(&dhcp_opts_to_add);
6572 for (size_t i = 0; (i < sizeof(supported_dhcp_opts) /
6573 sizeof(supported_dhcp_opts[0])); i++) {
6574 hmap_insert(&dhcp_opts_to_add, &supported_dhcp_opts[i].hmap_node,
6575 dhcp_opt_hash(supported_dhcp_opts[i].name));
6576 }
6577
6578 const struct sbrec_dhcp_options *opt_row, *opt_row_next;
6579 SBREC_DHCP_OPTIONS_FOR_EACH_SAFE(opt_row, opt_row_next, ctx->ovnsb_idl) {
6580 struct gen_opts_map *dhcp_opt =
6581 dhcp_opts_find(&dhcp_opts_to_add, opt_row->name);
6582 if (dhcp_opt) {
6583 hmap_remove(&dhcp_opts_to_add, &dhcp_opt->hmap_node);
6584 } else {
6585 sbrec_dhcp_options_delete(opt_row);
6586 }
6587 }
6588
6589 struct gen_opts_map *opt;
6590 HMAP_FOR_EACH (opt, hmap_node, &dhcp_opts_to_add) {
6591 struct sbrec_dhcp_options *sbrec_dhcp_option =
6592 sbrec_dhcp_options_insert(ctx->ovnsb_txn);
6593 sbrec_dhcp_options_set_name(sbrec_dhcp_option, opt->name);
6594 sbrec_dhcp_options_set_code(sbrec_dhcp_option, opt->code);
6595 sbrec_dhcp_options_set_type(sbrec_dhcp_option, opt->type);
6596 }
6597
6598 hmap_destroy(&dhcp_opts_to_add);
6599 }
6600
6601 static void
6602 check_and_add_supported_dhcpv6_opts_to_sb_db(struct northd_context *ctx)
6603 {
6604 struct hmap dhcpv6_opts_to_add = HMAP_INITIALIZER(&dhcpv6_opts_to_add);
6605 for (size_t i = 0; (i < sizeof(supported_dhcpv6_opts) /
6606 sizeof(supported_dhcpv6_opts[0])); i++) {
6607 hmap_insert(&dhcpv6_opts_to_add, &supported_dhcpv6_opts[i].hmap_node,
6608 dhcp_opt_hash(supported_dhcpv6_opts[i].name));
6609 }
6610
6611 const struct sbrec_dhcpv6_options *opt_row, *opt_row_next;
6612 SBREC_DHCPV6_OPTIONS_FOR_EACH_SAFE(opt_row, opt_row_next, ctx->ovnsb_idl) {
6613 struct gen_opts_map *dhcp_opt =
6614 dhcp_opts_find(&dhcpv6_opts_to_add, opt_row->name);
6615 if (dhcp_opt) {
6616 hmap_remove(&dhcpv6_opts_to_add, &dhcp_opt->hmap_node);
6617 } else {
6618 sbrec_dhcpv6_options_delete(opt_row);
6619 }
6620 }
6621
6622 struct gen_opts_map *opt;
6623 HMAP_FOR_EACH(opt, hmap_node, &dhcpv6_opts_to_add) {
6624 struct sbrec_dhcpv6_options *sbrec_dhcpv6_option =
6625 sbrec_dhcpv6_options_insert(ctx->ovnsb_txn);
6626 sbrec_dhcpv6_options_set_name(sbrec_dhcpv6_option, opt->name);
6627 sbrec_dhcpv6_options_set_code(sbrec_dhcpv6_option, opt->code);
6628 sbrec_dhcpv6_options_set_type(sbrec_dhcpv6_option, opt->type);
6629 }
6630
6631 hmap_destroy(&dhcpv6_opts_to_add);
6632 }
6633
6634 static const char *rbac_chassis_auth[] =
6635 {"name"};
6636 static const char *rbac_chassis_update[] =
6637 {"nb_cfg", "external_ids", "encaps", "vtep_logical_switches"};
6638
6639 static const char *rbac_encap_auth[] =
6640 {"chassis_name"};
6641 static const char *rbac_encap_update[] =
6642 {"type", "options", "ip"};
6643
6644 static const char *rbac_port_binding_auth[] =
6645 {""};
6646 static const char *rbac_port_binding_update[] =
6647 {"chassis"};
6648
6649 static const char *rbac_mac_binding_auth[] =
6650 {""};
6651 static const char *rbac_mac_binding_update[] =
6652 {"logical_port", "ip", "mac", "datapath"};
6653
6654 static struct rbac_perm_cfg {
6655 const char *table;
6656 const char **auth;
6657 int n_auth;
6658 bool insdel;
6659 const char **update;
6660 int n_update;
6661 const struct sbrec_rbac_permission *row;
6662 } rbac_perm_cfg[] = {
6663 {
6664 .table = "Chassis",
6665 .auth = rbac_chassis_auth,
6666 .n_auth = ARRAY_SIZE(rbac_chassis_auth),
6667 .insdel = true,
6668 .update = rbac_chassis_update,
6669 .n_update = ARRAY_SIZE(rbac_chassis_update),
6670 .row = NULL
6671 },{
6672 .table = "Encap",
6673 .auth = rbac_encap_auth,
6674 .n_auth = ARRAY_SIZE(rbac_encap_auth),
6675 .insdel = true,
6676 .update = rbac_encap_update,
6677 .n_update = ARRAY_SIZE(rbac_encap_update),
6678 .row = NULL
6679 },{
6680 .table = "Port_Binding",
6681 .auth = rbac_port_binding_auth,
6682 .n_auth = ARRAY_SIZE(rbac_port_binding_auth),
6683 .insdel = false,
6684 .update = rbac_port_binding_update,
6685 .n_update = ARRAY_SIZE(rbac_port_binding_update),
6686 .row = NULL
6687 },{
6688 .table = "MAC_Binding",
6689 .auth = rbac_mac_binding_auth,
6690 .n_auth = ARRAY_SIZE(rbac_mac_binding_auth),
6691 .insdel = true,
6692 .update = rbac_mac_binding_update,
6693 .n_update = ARRAY_SIZE(rbac_mac_binding_update),
6694 .row = NULL
6695 },{
6696 .table = NULL,
6697 .auth = NULL,
6698 .n_auth = 0,
6699 .insdel = false,
6700 .update = NULL,
6701 .n_update = 0,
6702 .row = NULL
6703 }
6704 };
6705
6706 static bool
6707 ovn_rbac_validate_perm(const struct sbrec_rbac_permission *perm)
6708 {
6709 struct rbac_perm_cfg *pcfg;
6710 int i, j, n_found;
6711
6712 for (pcfg = rbac_perm_cfg; pcfg->table; pcfg++) {
6713 if (!strcmp(perm->table, pcfg->table)) {
6714 break;
6715 }
6716 }
6717 if (!pcfg->table) {
6718 return false;
6719 }
6720 if (perm->n_authorization != pcfg->n_auth ||
6721 perm->n_update != pcfg->n_update) {
6722 return false;
6723 }
6724 if (perm->insert_delete != pcfg->insdel) {
6725 return false;
6726 }
6727 /* verify perm->authorization vs. pcfg->auth */
6728 n_found = 0;
6729 for (i = 0; i < pcfg->n_auth; i++) {
6730 for (j = 0; j < perm->n_authorization; j++) {
6731 if (!strcmp(pcfg->auth[i], perm->authorization[j])) {
6732 n_found++;
6733 break;
6734 }
6735 }
6736 }
6737 if (n_found != pcfg->n_auth) {
6738 return false;
6739 }
6740
6741 /* verify perm->update vs. pcfg->update */
6742 n_found = 0;
6743 for (i = 0; i < pcfg->n_update; i++) {
6744 for (j = 0; j < perm->n_update; j++) {
6745 if (!strcmp(pcfg->update[i], perm->update[j])) {
6746 n_found++;
6747 break;
6748 }
6749 }
6750 }
6751 if (n_found != pcfg->n_update) {
6752 return false;
6753 }
6754
6755 /* Success, db state matches expected state */
6756 pcfg->row = perm;
6757 return true;
6758 }
6759
6760 static void
6761 ovn_rbac_create_perm(struct rbac_perm_cfg *pcfg,
6762 struct northd_context *ctx,
6763 const struct sbrec_rbac_role *rbac_role)
6764 {
6765 struct sbrec_rbac_permission *rbac_perm;
6766
6767 rbac_perm = sbrec_rbac_permission_insert(ctx->ovnsb_txn);
6768 sbrec_rbac_permission_set_table(rbac_perm, pcfg->table);
6769 sbrec_rbac_permission_set_authorization(rbac_perm,
6770 pcfg->auth,
6771 pcfg->n_auth);
6772 sbrec_rbac_permission_set_insert_delete(rbac_perm, pcfg->insdel);
6773 sbrec_rbac_permission_set_update(rbac_perm,
6774 pcfg->update,
6775 pcfg->n_update);
6776 sbrec_rbac_role_update_permissions_setkey(rbac_role, pcfg->table,
6777 rbac_perm);
6778 }
6779
6780 static void
6781 check_and_update_rbac(struct northd_context *ctx)
6782 {
6783 const struct sbrec_rbac_role *rbac_role = NULL;
6784 const struct sbrec_rbac_permission *perm_row, *perm_next;
6785 const struct sbrec_rbac_role *role_row, *role_row_next;
6786 struct rbac_perm_cfg *pcfg;
6787
6788 for (pcfg = rbac_perm_cfg; pcfg->table; pcfg++) {
6789 pcfg->row = NULL;
6790 }
6791
6792 SBREC_RBAC_PERMISSION_FOR_EACH_SAFE (perm_row, perm_next, ctx->ovnsb_idl) {
6793 if (!ovn_rbac_validate_perm(perm_row)) {
6794 sbrec_rbac_permission_delete(perm_row);
6795 }
6796 }
6797 SBREC_RBAC_ROLE_FOR_EACH_SAFE (role_row, role_row_next, ctx->ovnsb_idl) {
6798 if (strcmp(role_row->name, "ovn-controller")) {
6799 sbrec_rbac_role_delete(role_row);
6800 } else {
6801 rbac_role = role_row;
6802 }
6803 }
6804
6805 if (!rbac_role) {
6806 rbac_role = sbrec_rbac_role_insert(ctx->ovnsb_txn);
6807 sbrec_rbac_role_set_name(rbac_role, "ovn-controller");
6808 }
6809
6810 for (pcfg = rbac_perm_cfg; pcfg->table; pcfg++) {
6811 if (!pcfg->row) {
6812 ovn_rbac_create_perm(pcfg, ctx, rbac_role);
6813 }
6814 }
6815 }
6816
6817 /* Updates the sb_cfg and hv_cfg columns in the northbound NB_Global table. */
6818 static void
6819 update_northbound_cfg(struct northd_context *ctx,
6820 struct ovsdb_idl_loop *sb_loop)
6821 {
6822 /* Update northbound sb_cfg if appropriate. */
6823 const struct nbrec_nb_global *nbg = nbrec_nb_global_first(ctx->ovnnb_idl);
6824 int64_t sb_cfg = sb_loop->cur_cfg;
6825 if (nbg && sb_cfg && nbg->sb_cfg != sb_cfg) {
6826 nbrec_nb_global_set_sb_cfg(nbg, sb_cfg);
6827 }
6828
6829 /* Update northbound hv_cfg if appropriate. */
6830 if (nbg) {
6831 /* Find minimum nb_cfg among all chassis. */
6832 const struct sbrec_chassis *chassis;
6833 int64_t hv_cfg = nbg->nb_cfg;
6834 SBREC_CHASSIS_FOR_EACH (chassis, ctx->ovnsb_idl) {
6835 if (chassis->nb_cfg < hv_cfg) {
6836 hv_cfg = chassis->nb_cfg;
6837 }
6838 }
6839
6840 /* Update hv_cfg. */
6841 if (nbg->hv_cfg != hv_cfg) {
6842 nbrec_nb_global_set_hv_cfg(nbg, hv_cfg);
6843 }
6844 }
6845 }
6846
6847 /* Handle a fairly small set of changes in the southbound database. */
6848 static void
6849 ovnsb_db_run(struct northd_context *ctx, struct ovsdb_idl_loop *sb_loop)
6850 {
6851 if (!ctx->ovnnb_txn || !ovsdb_idl_has_ever_connected(ctx->ovnsb_idl)) {
6852 return;
6853 }
6854
6855 update_logical_port_status(ctx);
6856 update_northbound_cfg(ctx, sb_loop);
6857 }
6858 \f
6859 static void
6860 parse_options(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
6861 {
6862 enum {
6863 DAEMON_OPTION_ENUMS,
6864 VLOG_OPTION_ENUMS,
6865 SSL_OPTION_ENUMS,
6866 };
6867 static const struct option long_options[] = {
6868 {"ovnsb-db", required_argument, NULL, 'd'},
6869 {"ovnnb-db", required_argument, NULL, 'D'},
6870 {"unixctl", required_argument, NULL, 'u'},
6871 {"help", no_argument, NULL, 'h'},
6872 {"options", no_argument, NULL, 'o'},
6873 {"version", no_argument, NULL, 'V'},
6874 DAEMON_LONG_OPTIONS,
6875 VLOG_LONG_OPTIONS,
6876 STREAM_SSL_LONG_OPTIONS,
6877 {NULL, 0, NULL, 0},
6878 };
6879 char *short_options = ovs_cmdl_long_options_to_short_options(long_options);
6880
6881 for (;;) {
6882 int c;
6883
6884 c = getopt_long(argc, argv, short_options, long_options, NULL);
6885 if (c == -1) {
6886 break;
6887 }
6888
6889 switch (c) {
6890 DAEMON_OPTION_HANDLERS;
6891 VLOG_OPTION_HANDLERS;
6892 STREAM_SSL_OPTION_HANDLERS;
6893
6894 case 'd':
6895 ovnsb_db = optarg;
6896 break;
6897
6898 case 'D':
6899 ovnnb_db = optarg;
6900 break;
6901
6902 case 'u':
6903 unixctl_path = optarg;
6904 break;
6905
6906 case 'h':
6907 usage();
6908 exit(EXIT_SUCCESS);
6909
6910 case 'o':
6911 ovs_cmdl_print_options(long_options);
6912 exit(EXIT_SUCCESS);
6913
6914 case 'V':
6915 ovs_print_version(0, 0);
6916 exit(EXIT_SUCCESS);
6917
6918 default:
6919 break;
6920 }
6921 }
6922
6923 if (!ovnsb_db) {
6924 ovnsb_db = default_sb_db();
6925 }
6926
6927 if (!ovnnb_db) {
6928 ovnnb_db = default_nb_db();
6929 }
6930
6931 free(short_options);
6932 }
6933
6934 static void
6935 add_column_noalert(struct ovsdb_idl *idl,
6936 const struct ovsdb_idl_column *column)
6937 {
6938 ovsdb_idl_add_column(idl, column);
6939 ovsdb_idl_omit_alert(idl, column);
6940 }
6941
6942 int
6943 main(int argc, char *argv[])
6944 {
6945 int res = EXIT_SUCCESS;
6946 struct unixctl_server *unixctl;
6947 int retval;
6948 bool exiting;
6949
6950 fatal_ignore_sigpipe();
6951 ovs_cmdl_proctitle_init(argc, argv);
6952 set_program_name(argv[0]);
6953 service_start(&argc, &argv);
6954 parse_options(argc, argv);
6955
6956 daemonize_start(false);
6957
6958 retval = unixctl_server_create(unixctl_path, &unixctl);
6959 if (retval) {
6960 exit(EXIT_FAILURE);
6961 }
6962 unixctl_command_register("exit", "", 0, 0, ovn_northd_exit, &exiting);
6963
6964 daemonize_complete();
6965
6966 /* We want to detect (almost) all changes to the ovn-nb db. */
6967 struct ovsdb_idl_loop ovnnb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
6968 ovsdb_idl_create(ovnnb_db, &nbrec_idl_class, true, true));
6969 ovsdb_idl_omit_alert(ovnnb_idl_loop.idl, &nbrec_nb_global_col_sb_cfg);
6970 ovsdb_idl_omit_alert(ovnnb_idl_loop.idl, &nbrec_nb_global_col_hv_cfg);
6971
6972 /* We want to detect only selected changes to the ovn-sb db. */
6973 struct ovsdb_idl_loop ovnsb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
6974 ovsdb_idl_create(ovnsb_db, &sbrec_idl_class, false, true));
6975
6976 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_sb_global);
6977 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_sb_global_col_nb_cfg);
6978
6979 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_logical_flow);
6980 add_column_noalert(ovnsb_idl_loop.idl,
6981 &sbrec_logical_flow_col_logical_datapath);
6982 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_pipeline);
6983 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_table_id);
6984 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_priority);
6985 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_match);
6986 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_actions);
6987
6988 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_multicast_group);
6989 add_column_noalert(ovnsb_idl_loop.idl,
6990 &sbrec_multicast_group_col_datapath);
6991 add_column_noalert(ovnsb_idl_loop.idl,
6992 &sbrec_multicast_group_col_tunnel_key);
6993 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_name);
6994 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_ports);
6995
6996 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_datapath_binding);
6997 add_column_noalert(ovnsb_idl_loop.idl,
6998 &sbrec_datapath_binding_col_tunnel_key);
6999 add_column_noalert(ovnsb_idl_loop.idl,
7000 &sbrec_datapath_binding_col_external_ids);
7001
7002 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_port_binding);
7003 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_datapath);
7004 add_column_noalert(ovnsb_idl_loop.idl,
7005 &sbrec_port_binding_col_logical_port);
7006 add_column_noalert(ovnsb_idl_loop.idl,
7007 &sbrec_port_binding_col_tunnel_key);
7008 add_column_noalert(ovnsb_idl_loop.idl,
7009 &sbrec_port_binding_col_parent_port);
7010 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_tag);
7011 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_type);
7012 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_options);
7013 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_mac);
7014 add_column_noalert(ovnsb_idl_loop.idl,
7015 &sbrec_port_binding_col_nat_addresses);
7016 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_port_binding_col_chassis);
7017 ovsdb_idl_add_column(ovnsb_idl_loop.idl,
7018 &sbrec_port_binding_col_gateway_chassis);
7019 ovsdb_idl_add_column(ovnsb_idl_loop.idl,
7020 &sbrec_gateway_chassis_col_chassis);
7021 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_gateway_chassis_col_name);
7022 ovsdb_idl_add_column(ovnsb_idl_loop.idl,
7023 &sbrec_gateway_chassis_col_priority);
7024 ovsdb_idl_add_column(ovnsb_idl_loop.idl,
7025 &sbrec_gateway_chassis_col_external_ids);
7026 ovsdb_idl_add_column(ovnsb_idl_loop.idl,
7027 &sbrec_gateway_chassis_col_options);
7028 add_column_noalert(ovnsb_idl_loop.idl,
7029 &sbrec_port_binding_col_external_ids);
7030 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_mac_binding);
7031 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_datapath);
7032 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_ip);
7033 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_mac);
7034 add_column_noalert(ovnsb_idl_loop.idl,
7035 &sbrec_mac_binding_col_logical_port);
7036 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_dhcp_options);
7037 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_code);
7038 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_type);
7039 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_name);
7040 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_dhcpv6_options);
7041 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_code);
7042 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_type);
7043 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_name);
7044 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_address_set);
7045 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_name);
7046 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_addresses);
7047 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_port_group);
7048 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_group_col_name);
7049 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_group_col_ports);
7050
7051 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_dns);
7052 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dns_col_datapaths);
7053 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dns_col_records);
7054 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dns_col_external_ids);
7055
7056 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_rbac_role);
7057 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_rbac_role_col_name);
7058 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_rbac_role_col_permissions);
7059
7060 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_rbac_permission);
7061 add_column_noalert(ovnsb_idl_loop.idl,
7062 &sbrec_rbac_permission_col_table);
7063 add_column_noalert(ovnsb_idl_loop.idl,
7064 &sbrec_rbac_permission_col_authorization);
7065 add_column_noalert(ovnsb_idl_loop.idl,
7066 &sbrec_rbac_permission_col_insert_delete);
7067 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_rbac_permission_col_update);
7068
7069 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_chassis);
7070 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_nb_cfg);
7071 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_name);
7072
7073 /* Ensure that only a single ovn-northd is active in the deployment by
7074 * acquiring a lock called "ovn_northd" on the southbound database
7075 * and then only performing DB transactions if the lock is held. */
7076 ovsdb_idl_set_lock(ovnsb_idl_loop.idl, "ovn_northd");
7077 bool had_lock = false;
7078
7079 /* Main loop. */
7080 exiting = false;
7081 while (!exiting) {
7082 struct northd_context ctx = {
7083 .ovnnb_idl = ovnnb_idl_loop.idl,
7084 .ovnnb_txn = ovsdb_idl_loop_run(&ovnnb_idl_loop),
7085 .ovnsb_idl = ovnsb_idl_loop.idl,
7086 .ovnsb_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop),
7087 };
7088
7089 if (!had_lock && ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) {
7090 VLOG_INFO("ovn-northd lock acquired. "
7091 "This ovn-northd instance is now active.");
7092 had_lock = true;
7093 } else if (had_lock && !ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) {
7094 VLOG_INFO("ovn-northd lock lost. "
7095 "This ovn-northd instance is now on standby.");
7096 had_lock = false;
7097 }
7098
7099 struct chassis_index chassis_index;
7100 bool destroy_chassis_index = false;
7101 if (ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) {
7102 chassis_index_init(&chassis_index, ctx.ovnsb_idl);
7103 destroy_chassis_index = true;
7104
7105 ovnnb_db_run(&ctx, &chassis_index, &ovnsb_idl_loop);
7106 ovnsb_db_run(&ctx, &ovnsb_idl_loop);
7107 if (ctx.ovnsb_txn) {
7108 check_and_add_supported_dhcp_opts_to_sb_db(&ctx);
7109 check_and_add_supported_dhcpv6_opts_to_sb_db(&ctx);
7110 check_and_update_rbac(&ctx);
7111 }
7112 }
7113
7114 unixctl_server_run(unixctl);
7115 unixctl_server_wait(unixctl);
7116 if (exiting) {
7117 poll_immediate_wake();
7118 }
7119 ovsdb_idl_loop_commit_and_wait(&ovnnb_idl_loop);
7120 ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);
7121
7122 poll_block();
7123 if (should_service_stop()) {
7124 exiting = true;
7125 }
7126
7127 if (destroy_chassis_index) {
7128 chassis_index_destroy(&chassis_index);
7129 }
7130 }
7131
7132 unixctl_server_destroy(unixctl);
7133 ovsdb_idl_loop_destroy(&ovnnb_idl_loop);
7134 ovsdb_idl_loop_destroy(&ovnsb_idl_loop);
7135 service_stop();
7136
7137 exit(res);
7138 }
7139
7140 static void
7141 ovn_northd_exit(struct unixctl_conn *conn, int argc OVS_UNUSED,
7142 const char *argv[] OVS_UNUSED, void *exiting_)
7143 {
7144 bool *exiting = exiting_;
7145 *exiting = true;
7146
7147 unixctl_command_reply(conn, NULL);
7148 }