]> git.proxmox.com Git - mirror_ovs.git/blob - ovn/northd/ovn-northd.c
ovn: Add router load balancer undnat rule for IPv6
[mirror_ovs.git] / ovn / northd / ovn-northd.c
1 /*
2 * Licensed under the Apache License, Version 2.0 (the "License");
3 * you may not use this file except in compliance with the License.
4 * You may obtain a copy of the License at:
5 *
6 * http://www.apache.org/licenses/LICENSE-2.0
7 *
8 * Unless required by applicable law or agreed to in writing, software
9 * distributed under the License is distributed on an "AS IS" BASIS,
10 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 * See the License for the specific language governing permissions and
12 * limitations under the License.
13 */
14
15 #include <config.h>
16
17 #include <getopt.h>
18 #include <stdlib.h>
19 #include <stdio.h>
20
21 #include "bitmap.h"
22 #include "command-line.h"
23 #include "daemon.h"
24 #include "dirs.h"
25 #include "openvswitch/dynamic-string.h"
26 #include "fatal-signal.h"
27 #include "hash.h"
28 #include "openvswitch/hmap.h"
29 #include "openvswitch/json.h"
30 #include "ovn/lex.h"
31 #include "ovn/lib/chassis-index.h"
32 #include "ovn/lib/logical-fields.h"
33 #include "ovn/lib/ovn-l7.h"
34 #include "ovn/lib/ovn-nb-idl.h"
35 #include "ovn/lib/ovn-sb-idl.h"
36 #include "ovn/lib/ovn-util.h"
37 #include "ovn/actions.h"
38 #include "packets.h"
39 #include "openvswitch/poll-loop.h"
40 #include "smap.h"
41 #include "sset.h"
42 #include "stream.h"
43 #include "stream-ssl.h"
44 #include "unixctl.h"
45 #include "util.h"
46 #include "uuid.h"
47 #include "openvswitch/vlog.h"
48
49 VLOG_DEFINE_THIS_MODULE(ovn_northd);
50
51 static unixctl_cb_func ovn_northd_exit;
52
53 struct northd_context {
54 struct ovsdb_idl *ovnnb_idl;
55 struct ovsdb_idl *ovnsb_idl;
56 struct ovsdb_idl_txn *ovnnb_txn;
57 struct ovsdb_idl_txn *ovnsb_txn;
58 };
59
60 static const char *ovnnb_db;
61 static const char *ovnsb_db;
62 static const char *unixctl_path;
63
64 #define MAC_ADDR_PREFIX 0x0A0000000000ULL
65 #define MAC_ADDR_SPACE 0xffffff
66
67 /* MAC address management (macam) table of "struct eth_addr"s, that holds the
68 * MAC addresses allocated by the OVN ipam module. */
69 static struct hmap macam = HMAP_INITIALIZER(&macam);
70
71 #define MAX_OVN_TAGS 4096
72 \f
73 /* Pipeline stages. */
74
75 /* The two pipelines in an OVN logical flow table. */
76 enum ovn_pipeline {
77 P_IN, /* Ingress pipeline. */
78 P_OUT /* Egress pipeline. */
79 };
80
81 /* The two purposes for which ovn-northd uses OVN logical datapaths. */
82 enum ovn_datapath_type {
83 DP_SWITCH, /* OVN logical switch. */
84 DP_ROUTER /* OVN logical router. */
85 };
86
87 /* Returns an "enum ovn_stage" built from the arguments.
88 *
89 * (It's better to use ovn_stage_build() for type-safety reasons, but inline
90 * functions can't be used in enums or switch cases.) */
91 #define OVN_STAGE_BUILD(DP_TYPE, PIPELINE, TABLE) \
92 (((DP_TYPE) << 9) | ((PIPELINE) << 8) | (TABLE))
93
94 /* A stage within an OVN logical switch or router.
95 *
96 * An "enum ovn_stage" indicates whether the stage is part of a logical switch
97 * or router, whether the stage is part of the ingress or egress pipeline, and
98 * the table within that pipeline. The first three components are combined to
99 * form the stage's full name, e.g. S_SWITCH_IN_PORT_SEC_L2,
100 * S_ROUTER_OUT_DELIVERY. */
101 enum ovn_stage {
102 #define PIPELINE_STAGES \
103 /* Logical switch ingress stages. */ \
104 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_L2, 0, "ls_in_port_sec_l2") \
105 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_IP, 1, "ls_in_port_sec_ip") \
106 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_ND, 2, "ls_in_port_sec_nd") \
107 PIPELINE_STAGE(SWITCH, IN, PRE_ACL, 3, "ls_in_pre_acl") \
108 PIPELINE_STAGE(SWITCH, IN, PRE_LB, 4, "ls_in_pre_lb") \
109 PIPELINE_STAGE(SWITCH, IN, PRE_STATEFUL, 5, "ls_in_pre_stateful") \
110 PIPELINE_STAGE(SWITCH, IN, ACL, 6, "ls_in_acl") \
111 PIPELINE_STAGE(SWITCH, IN, QOS_MARK, 7, "ls_in_qos_mark") \
112 PIPELINE_STAGE(SWITCH, IN, QOS_METER, 8, "ls_in_qos_meter") \
113 PIPELINE_STAGE(SWITCH, IN, LB, 9, "ls_in_lb") \
114 PIPELINE_STAGE(SWITCH, IN, STATEFUL, 10, "ls_in_stateful") \
115 PIPELINE_STAGE(SWITCH, IN, ARP_ND_RSP, 11, "ls_in_arp_rsp") \
116 PIPELINE_STAGE(SWITCH, IN, DHCP_OPTIONS, 12, "ls_in_dhcp_options") \
117 PIPELINE_STAGE(SWITCH, IN, DHCP_RESPONSE, 13, "ls_in_dhcp_response") \
118 PIPELINE_STAGE(SWITCH, IN, DNS_LOOKUP, 14, "ls_in_dns_lookup") \
119 PIPELINE_STAGE(SWITCH, IN, DNS_RESPONSE, 15, "ls_in_dns_response") \
120 PIPELINE_STAGE(SWITCH, IN, L2_LKUP, 16, "ls_in_l2_lkup") \
121 \
122 /* Logical switch egress stages. */ \
123 PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 0, "ls_out_pre_lb") \
124 PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 1, "ls_out_pre_acl") \
125 PIPELINE_STAGE(SWITCH, OUT, PRE_STATEFUL, 2, "ls_out_pre_stateful") \
126 PIPELINE_STAGE(SWITCH, OUT, LB, 3, "ls_out_lb") \
127 PIPELINE_STAGE(SWITCH, OUT, ACL, 4, "ls_out_acl") \
128 PIPELINE_STAGE(SWITCH, OUT, QOS_MARK, 5, "ls_out_qos_mark") \
129 PIPELINE_STAGE(SWITCH, OUT, QOS_METER, 6, "ls_out_qos_meter") \
130 PIPELINE_STAGE(SWITCH, OUT, STATEFUL, 7, "ls_out_stateful") \
131 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_IP, 8, "ls_out_port_sec_ip") \
132 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_L2, 9, "ls_out_port_sec_l2") \
133 \
134 /* Logical router ingress stages. */ \
135 PIPELINE_STAGE(ROUTER, IN, ADMISSION, 0, "lr_in_admission") \
136 PIPELINE_STAGE(ROUTER, IN, IP_INPUT, 1, "lr_in_ip_input") \
137 PIPELINE_STAGE(ROUTER, IN, DEFRAG, 2, "lr_in_defrag") \
138 PIPELINE_STAGE(ROUTER, IN, UNSNAT, 3, "lr_in_unsnat") \
139 PIPELINE_STAGE(ROUTER, IN, DNAT, 4, "lr_in_dnat") \
140 PIPELINE_STAGE(ROUTER, IN, ND_RA_OPTIONS, 5, "lr_in_nd_ra_options") \
141 PIPELINE_STAGE(ROUTER, IN, ND_RA_RESPONSE, 6, "lr_in_nd_ra_response") \
142 PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 7, "lr_in_ip_routing") \
143 PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 8, "lr_in_arp_resolve") \
144 PIPELINE_STAGE(ROUTER, IN, GW_REDIRECT, 9, "lr_in_gw_redirect") \
145 PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 10, "lr_in_arp_request") \
146 \
147 /* Logical router egress stages. */ \
148 PIPELINE_STAGE(ROUTER, OUT, UNDNAT, 0, "lr_out_undnat") \
149 PIPELINE_STAGE(ROUTER, OUT, SNAT, 1, "lr_out_snat") \
150 PIPELINE_STAGE(ROUTER, OUT, EGR_LOOP, 2, "lr_out_egr_loop") \
151 PIPELINE_STAGE(ROUTER, OUT, DELIVERY, 3, "lr_out_delivery")
152
153 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
154 S_##DP_TYPE##_##PIPELINE##_##STAGE \
155 = OVN_STAGE_BUILD(DP_##DP_TYPE, P_##PIPELINE, TABLE),
156 PIPELINE_STAGES
157 #undef PIPELINE_STAGE
158 };
159
160 /* Due to various hard-coded priorities need to implement ACLs, the
161 * northbound database supports a smaller range of ACL priorities than
162 * are available to logical flows. This value is added to an ACL
163 * priority to determine the ACL's logical flow priority. */
164 #define OVN_ACL_PRI_OFFSET 1000
165
166 /* Register definitions specific to switches. */
167 #define REGBIT_CONNTRACK_DEFRAG "reg0[0]"
168 #define REGBIT_CONNTRACK_COMMIT "reg0[1]"
169 #define REGBIT_CONNTRACK_NAT "reg0[2]"
170 #define REGBIT_DHCP_OPTS_RESULT "reg0[3]"
171 #define REGBIT_DNS_LOOKUP_RESULT "reg0[4]"
172 #define REGBIT_ND_RA_OPTS_RESULT "reg0[5]"
173
174 /* Register definitions for switches and routers. */
175 #define REGBIT_NAT_REDIRECT "reg9[0]"
176 /* Indicate that this packet has been recirculated using egress
177 * loopback. This allows certain checks to be bypassed, such as a
178 * logical router dropping packets with source IP address equals
179 * one of the logical router's own IP addresses. */
180 #define REGBIT_EGRESS_LOOPBACK "reg9[1]"
181
182 /* Returns an "enum ovn_stage" built from the arguments. */
183 static enum ovn_stage
184 ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline,
185 uint8_t table)
186 {
187 return OVN_STAGE_BUILD(dp_type, pipeline, table);
188 }
189
190 /* Returns the pipeline to which 'stage' belongs. */
191 static enum ovn_pipeline
192 ovn_stage_get_pipeline(enum ovn_stage stage)
193 {
194 return (stage >> 8) & 1;
195 }
196
197 /* Returns the pipeline name to which 'stage' belongs. */
198 static const char *
199 ovn_stage_get_pipeline_name(enum ovn_stage stage)
200 {
201 return ovn_stage_get_pipeline(stage) == P_IN ? "ingress" : "egress";
202 }
203
204 /* Returns the table to which 'stage' belongs. */
205 static uint8_t
206 ovn_stage_get_table(enum ovn_stage stage)
207 {
208 return stage & 0xff;
209 }
210
211 /* Returns a string name for 'stage'. */
212 static const char *
213 ovn_stage_to_str(enum ovn_stage stage)
214 {
215 switch (stage) {
216 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
217 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return NAME;
218 PIPELINE_STAGES
219 #undef PIPELINE_STAGE
220 default: return "<unknown>";
221 }
222 }
223
224 /* Returns the type of the datapath to which a flow with the given 'stage' may
225 * be added. */
226 static enum ovn_datapath_type
227 ovn_stage_to_datapath_type(enum ovn_stage stage)
228 {
229 switch (stage) {
230 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
231 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return DP_##DP_TYPE;
232 PIPELINE_STAGES
233 #undef PIPELINE_STAGE
234 default: OVS_NOT_REACHED();
235 }
236 }
237 \f
238 static void
239 usage(void)
240 {
241 printf("\
242 %s: OVN northbound management daemon\n\
243 usage: %s [OPTIONS]\n\
244 \n\
245 Options:\n\
246 --ovnnb-db=DATABASE connect to ovn-nb database at DATABASE\n\
247 (default: %s)\n\
248 --ovnsb-db=DATABASE connect to ovn-sb database at DATABASE\n\
249 (default: %s)\n\
250 --unixctl=SOCKET override default control socket name\n\
251 -h, --help display this help message\n\
252 -o, --options list available options\n\
253 -V, --version display version information\n\
254 ", program_name, program_name, default_nb_db(), default_sb_db());
255 daemon_usage();
256 vlog_usage();
257 stream_usage("database", true, true, false);
258 }
259 \f
260 struct tnlid_node {
261 struct hmap_node hmap_node;
262 uint32_t tnlid;
263 };
264
265 static void
266 destroy_tnlids(struct hmap *tnlids)
267 {
268 struct tnlid_node *node;
269 HMAP_FOR_EACH_POP (node, hmap_node, tnlids) {
270 free(node);
271 }
272 hmap_destroy(tnlids);
273 }
274
275 static void
276 add_tnlid(struct hmap *set, uint32_t tnlid)
277 {
278 struct tnlid_node *node = xmalloc(sizeof *node);
279 hmap_insert(set, &node->hmap_node, hash_int(tnlid, 0));
280 node->tnlid = tnlid;
281 }
282
283 static bool
284 tnlid_in_use(const struct hmap *set, uint32_t tnlid)
285 {
286 const struct tnlid_node *node;
287 HMAP_FOR_EACH_IN_BUCKET (node, hmap_node, hash_int(tnlid, 0), set) {
288 if (node->tnlid == tnlid) {
289 return true;
290 }
291 }
292 return false;
293 }
294
295 static uint32_t
296 next_tnlid(uint32_t tnlid, uint32_t max)
297 {
298 return tnlid + 1 <= max ? tnlid + 1 : 1;
299 }
300
301 static uint32_t
302 allocate_tnlid(struct hmap *set, const char *name, uint32_t max,
303 uint32_t *hint)
304 {
305 for (uint32_t tnlid = next_tnlid(*hint, max); tnlid != *hint;
306 tnlid = next_tnlid(tnlid, max)) {
307 if (!tnlid_in_use(set, tnlid)) {
308 add_tnlid(set, tnlid);
309 *hint = tnlid;
310 return tnlid;
311 }
312 }
313
314 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
315 VLOG_WARN_RL(&rl, "all %s tunnel ids exhausted", name);
316 return 0;
317 }
318 \f
319 struct ovn_chassis_qdisc_queues {
320 struct hmap_node key_node;
321 uint32_t queue_id;
322 struct uuid chassis_uuid;
323 };
324
325 static void
326 destroy_chassis_queues(struct hmap *set)
327 {
328 struct ovn_chassis_qdisc_queues *node;
329 HMAP_FOR_EACH_POP (node, key_node, set) {
330 free(node);
331 }
332 hmap_destroy(set);
333 }
334
335 static void
336 add_chassis_queue(struct hmap *set, struct uuid *chassis_uuid,
337 uint32_t queue_id)
338 {
339 struct ovn_chassis_qdisc_queues *node = xmalloc(sizeof *node);
340 node->queue_id = queue_id;
341 memcpy(&node->chassis_uuid, chassis_uuid, sizeof node->chassis_uuid);
342 hmap_insert(set, &node->key_node, uuid_hash(chassis_uuid));
343 }
344
345 static bool
346 chassis_queueid_in_use(const struct hmap *set, struct uuid *chassis_uuid,
347 uint32_t queue_id)
348 {
349 const struct ovn_chassis_qdisc_queues *node;
350 HMAP_FOR_EACH_WITH_HASH (node, key_node, uuid_hash(chassis_uuid), set) {
351 if (uuid_equals(chassis_uuid, &node->chassis_uuid)
352 && node->queue_id == queue_id) {
353 return true;
354 }
355 }
356 return false;
357 }
358
359 static uint32_t
360 allocate_chassis_queueid(struct hmap *set, struct sbrec_chassis *chassis)
361 {
362 for (uint32_t queue_id = QDISC_MIN_QUEUE_ID + 1;
363 queue_id <= QDISC_MAX_QUEUE_ID;
364 queue_id++) {
365 if (!chassis_queueid_in_use(set, &chassis->header_.uuid, queue_id)) {
366 add_chassis_queue(set, &chassis->header_.uuid, queue_id);
367 return queue_id;
368 }
369 }
370
371 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
372 VLOG_WARN_RL(&rl, "all %s queue ids exhausted", chassis->name);
373 return 0;
374 }
375
376 static void
377 free_chassis_queueid(struct hmap *set, struct sbrec_chassis *chassis,
378 uint32_t queue_id)
379 {
380 struct ovn_chassis_qdisc_queues *node;
381 HMAP_FOR_EACH_WITH_HASH (node, key_node,
382 uuid_hash(&chassis->header_.uuid),
383 set) {
384 if (uuid_equals(&chassis->header_.uuid, &node->chassis_uuid)
385 && node->queue_id == queue_id) {
386 hmap_remove(set, &node->key_node);
387 break;
388 }
389 }
390 }
391
392 static inline bool
393 port_has_qos_params(const struct smap *opts)
394 {
395 return (smap_get(opts, "qos_max_rate") ||
396 smap_get(opts, "qos_burst"));
397 }
398 \f
399
400 struct ipam_info {
401 uint32_t start_ipv4;
402 size_t total_ipv4s;
403 unsigned long *allocated_ipv4s; /* A bitmap of allocated IPv4s */
404 bool ipv6_prefix_set;
405 struct in6_addr ipv6_prefix;
406 };
407
408 /* The 'key' comes from nbs->header_.uuid or nbr->header_.uuid or
409 * sb->external_ids:logical-switch. */
410 struct ovn_datapath {
411 struct hmap_node key_node; /* Index on 'key'. */
412 struct uuid key; /* (nbs/nbr)->header_.uuid. */
413
414 const struct nbrec_logical_switch *nbs; /* May be NULL. */
415 const struct nbrec_logical_router *nbr; /* May be NULL. */
416 const struct sbrec_datapath_binding *sb; /* May be NULL. */
417
418 struct ovs_list list; /* In list of similar records. */
419
420 /* Logical switch data. */
421 struct ovn_port **router_ports;
422 size_t n_router_ports;
423
424 struct hmap port_tnlids;
425 uint32_t port_key_hint;
426
427 bool has_unknown;
428
429 /* IPAM data. */
430 struct ipam_info ipam_info;
431
432 /* OVN northd only needs to know about the logical router gateway port for
433 * NAT on a distributed router. This "distributed gateway port" is
434 * populated only when there is a "redirect-chassis" specified for one of
435 * the ports on the logical router. Otherwise this will be NULL. */
436 struct ovn_port *l3dgw_port;
437 /* The "derived" OVN port representing the instance of l3dgw_port on
438 * the "redirect-chassis". */
439 struct ovn_port *l3redirect_port;
440 struct ovn_port *localnet_port;
441 };
442
443 struct macam_node {
444 struct hmap_node hmap_node;
445 struct eth_addr mac_addr; /* Allocated MAC address. */
446 };
447
448 static void
449 cleanup_macam(struct hmap *macam_)
450 {
451 struct macam_node *node;
452 HMAP_FOR_EACH_POP (node, hmap_node, macam_) {
453 free(node);
454 }
455 }
456
457 static struct ovn_datapath *
458 ovn_datapath_create(struct hmap *datapaths, const struct uuid *key,
459 const struct nbrec_logical_switch *nbs,
460 const struct nbrec_logical_router *nbr,
461 const struct sbrec_datapath_binding *sb)
462 {
463 struct ovn_datapath *od = xzalloc(sizeof *od);
464 od->key = *key;
465 od->sb = sb;
466 od->nbs = nbs;
467 od->nbr = nbr;
468 hmap_init(&od->port_tnlids);
469 od->port_key_hint = 0;
470 hmap_insert(datapaths, &od->key_node, uuid_hash(&od->key));
471 return od;
472 }
473
474 static void
475 ovn_datapath_destroy(struct hmap *datapaths, struct ovn_datapath *od)
476 {
477 if (od) {
478 /* Don't remove od->list. It is used within build_datapaths() as a
479 * private list and once we've exited that function it is not safe to
480 * use it. */
481 hmap_remove(datapaths, &od->key_node);
482 destroy_tnlids(&od->port_tnlids);
483 bitmap_free(od->ipam_info.allocated_ipv4s);
484 free(od->router_ports);
485 free(od);
486 }
487 }
488
489 /* Returns 'od''s datapath type. */
490 static enum ovn_datapath_type
491 ovn_datapath_get_type(const struct ovn_datapath *od)
492 {
493 return od->nbs ? DP_SWITCH : DP_ROUTER;
494 }
495
496 static struct ovn_datapath *
497 ovn_datapath_find(struct hmap *datapaths, const struct uuid *uuid)
498 {
499 struct ovn_datapath *od;
500
501 HMAP_FOR_EACH_WITH_HASH (od, key_node, uuid_hash(uuid), datapaths) {
502 if (uuid_equals(uuid, &od->key)) {
503 return od;
504 }
505 }
506 return NULL;
507 }
508
509 static struct ovn_datapath *
510 ovn_datapath_from_sbrec(struct hmap *datapaths,
511 const struct sbrec_datapath_binding *sb)
512 {
513 struct uuid key;
514
515 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
516 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
517 return NULL;
518 }
519 return ovn_datapath_find(datapaths, &key);
520 }
521
522 static bool
523 lrouter_is_enabled(const struct nbrec_logical_router *lrouter)
524 {
525 return !lrouter->enabled || *lrouter->enabled;
526 }
527
528 static void
529 init_ipam_info_for_datapath(struct ovn_datapath *od)
530 {
531 if (!od->nbs) {
532 return;
533 }
534
535 const char *subnet_str = smap_get(&od->nbs->other_config, "subnet");
536 const char *ipv6_prefix = smap_get(&od->nbs->other_config, "ipv6_prefix");
537
538 if (ipv6_prefix) {
539 od->ipam_info.ipv6_prefix_set = ipv6_parse(
540 ipv6_prefix, &od->ipam_info.ipv6_prefix);
541 }
542
543 if (!subnet_str) {
544 return;
545 }
546
547 ovs_be32 subnet, mask;
548 char *error = ip_parse_masked(subnet_str, &subnet, &mask);
549 if (error || mask == OVS_BE32_MAX || !ip_is_cidr(mask)) {
550 static struct vlog_rate_limit rl
551 = VLOG_RATE_LIMIT_INIT(5, 1);
552 VLOG_WARN_RL(&rl, "bad 'subnet' %s", subnet_str);
553 free(error);
554 return;
555 }
556
557 od->ipam_info.start_ipv4 = ntohl(subnet) + 1;
558 od->ipam_info.total_ipv4s = ~ntohl(mask);
559 od->ipam_info.allocated_ipv4s =
560 bitmap_allocate(od->ipam_info.total_ipv4s);
561
562 /* Mark first IP as taken */
563 bitmap_set1(od->ipam_info.allocated_ipv4s, 0);
564
565 /* Check if there are any reserver IPs (list) to be excluded from IPAM */
566 const char *exclude_ip_list = smap_get(&od->nbs->other_config,
567 "exclude_ips");
568 if (!exclude_ip_list) {
569 return;
570 }
571
572 struct lexer lexer;
573 lexer_init(&lexer, exclude_ip_list);
574 /* exclude_ip_list could be in the format -
575 * "10.0.0.4 10.0.0.10 10.0.0.20..10.0.0.50 10.0.0.100..10.0.0.110".
576 */
577 lexer_get(&lexer);
578 while (lexer.token.type != LEX_T_END) {
579 if (lexer.token.type != LEX_T_INTEGER) {
580 lexer_syntax_error(&lexer, "expecting address");
581 break;
582 }
583 uint32_t start = ntohl(lexer.token.value.ipv4);
584 lexer_get(&lexer);
585
586 uint32_t end = start + 1;
587 if (lexer_match(&lexer, LEX_T_ELLIPSIS)) {
588 if (lexer.token.type != LEX_T_INTEGER) {
589 lexer_syntax_error(&lexer, "expecting address range");
590 break;
591 }
592 end = ntohl(lexer.token.value.ipv4) + 1;
593 lexer_get(&lexer);
594 }
595
596 /* Clamp start...end to fit the subnet. */
597 start = MAX(od->ipam_info.start_ipv4, start);
598 end = MIN(od->ipam_info.start_ipv4 + od->ipam_info.total_ipv4s, end);
599 if (end > start) {
600 bitmap_set_multiple(od->ipam_info.allocated_ipv4s,
601 start - od->ipam_info.start_ipv4,
602 end - start, 1);
603 } else {
604 lexer_error(&lexer, "excluded addresses not in subnet");
605 }
606 }
607 if (lexer.error) {
608 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
609 VLOG_WARN_RL(&rl, "logical switch "UUID_FMT": bad exclude_ips (%s)",
610 UUID_ARGS(&od->key), lexer.error);
611 }
612 lexer_destroy(&lexer);
613 }
614
615 static void
616 ovn_datapath_update_external_ids(struct ovn_datapath *od)
617 {
618 /* Get the logical-switch or logical-router UUID to set in
619 * external-ids. */
620 char uuid_s[UUID_LEN + 1];
621 sprintf(uuid_s, UUID_FMT, UUID_ARGS(&od->key));
622 const char *key = od->nbs ? "logical-switch" : "logical-router";
623
624 /* Get names to set in external-ids. */
625 const char *name = od->nbs ? od->nbs->name : od->nbr->name;
626 const char *name2 = (od->nbs
627 ? smap_get(&od->nbs->external_ids,
628 "neutron:network_name")
629 : smap_get(&od->nbr->external_ids,
630 "neutron:router_name"));
631
632 /* Set external-ids. */
633 struct smap ids = SMAP_INITIALIZER(&ids);
634 smap_add(&ids, key, uuid_s);
635 smap_add(&ids, "name", name);
636 if (name2 && name2[0]) {
637 smap_add(&ids, "name2", name2);
638 }
639 sbrec_datapath_binding_set_external_ids(od->sb, &ids);
640 smap_destroy(&ids);
641 }
642
643 static void
644 join_datapaths(struct northd_context *ctx, struct hmap *datapaths,
645 struct ovs_list *sb_only, struct ovs_list *nb_only,
646 struct ovs_list *both)
647 {
648 hmap_init(datapaths);
649 ovs_list_init(sb_only);
650 ovs_list_init(nb_only);
651 ovs_list_init(both);
652
653 const struct sbrec_datapath_binding *sb, *sb_next;
654 SBREC_DATAPATH_BINDING_FOR_EACH_SAFE (sb, sb_next, ctx->ovnsb_idl) {
655 struct uuid key;
656 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
657 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
658 ovsdb_idl_txn_add_comment(
659 ctx->ovnsb_txn,
660 "deleting Datapath_Binding "UUID_FMT" that lacks "
661 "external-ids:logical-switch and "
662 "external-ids:logical-router",
663 UUID_ARGS(&sb->header_.uuid));
664 sbrec_datapath_binding_delete(sb);
665 continue;
666 }
667
668 if (ovn_datapath_find(datapaths, &key)) {
669 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
670 VLOG_INFO_RL(
671 &rl, "deleting Datapath_Binding "UUID_FMT" with "
672 "duplicate external-ids:logical-switch/router "UUID_FMT,
673 UUID_ARGS(&sb->header_.uuid), UUID_ARGS(&key));
674 sbrec_datapath_binding_delete(sb);
675 continue;
676 }
677
678 struct ovn_datapath *od = ovn_datapath_create(datapaths, &key,
679 NULL, NULL, sb);
680 ovs_list_push_back(sb_only, &od->list);
681 }
682
683 const struct nbrec_logical_switch *nbs;
684 NBREC_LOGICAL_SWITCH_FOR_EACH (nbs, ctx->ovnnb_idl) {
685 struct ovn_datapath *od = ovn_datapath_find(datapaths,
686 &nbs->header_.uuid);
687 if (od) {
688 od->nbs = nbs;
689 ovs_list_remove(&od->list);
690 ovs_list_push_back(both, &od->list);
691 ovn_datapath_update_external_ids(od);
692 } else {
693 od = ovn_datapath_create(datapaths, &nbs->header_.uuid,
694 nbs, NULL, NULL);
695 ovs_list_push_back(nb_only, &od->list);
696 }
697
698 init_ipam_info_for_datapath(od);
699 }
700
701 const struct nbrec_logical_router *nbr;
702 NBREC_LOGICAL_ROUTER_FOR_EACH (nbr, ctx->ovnnb_idl) {
703 if (!lrouter_is_enabled(nbr)) {
704 continue;
705 }
706
707 struct ovn_datapath *od = ovn_datapath_find(datapaths,
708 &nbr->header_.uuid);
709 if (od) {
710 if (!od->nbs) {
711 od->nbr = nbr;
712 ovs_list_remove(&od->list);
713 ovs_list_push_back(both, &od->list);
714 ovn_datapath_update_external_ids(od);
715 } else {
716 /* Can't happen! */
717 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
718 VLOG_WARN_RL(&rl,
719 "duplicate UUID "UUID_FMT" in OVN_Northbound",
720 UUID_ARGS(&nbr->header_.uuid));
721 continue;
722 }
723 } else {
724 od = ovn_datapath_create(datapaths, &nbr->header_.uuid,
725 NULL, nbr, NULL);
726 ovs_list_push_back(nb_only, &od->list);
727 }
728 }
729 }
730
731 static uint32_t
732 ovn_datapath_allocate_key(struct hmap *dp_tnlids)
733 {
734 static uint32_t hint;
735 return allocate_tnlid(dp_tnlids, "datapath", (1u << 24) - 1, &hint);
736 }
737
738 /* Updates the southbound Datapath_Binding table so that it contains the
739 * logical switches and routers specified by the northbound database.
740 *
741 * Initializes 'datapaths' to contain a "struct ovn_datapath" for every logical
742 * switch and router. */
743 static void
744 build_datapaths(struct northd_context *ctx, struct hmap *datapaths)
745 {
746 struct ovs_list sb_only, nb_only, both;
747
748 join_datapaths(ctx, datapaths, &sb_only, &nb_only, &both);
749
750 if (!ovs_list_is_empty(&nb_only)) {
751 /* First index the in-use datapath tunnel IDs. */
752 struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
753 struct ovn_datapath *od;
754 LIST_FOR_EACH (od, list, &both) {
755 add_tnlid(&dp_tnlids, od->sb->tunnel_key);
756 }
757
758 /* Add southbound record for each unmatched northbound record. */
759 LIST_FOR_EACH (od, list, &nb_only) {
760 uint16_t tunnel_key = ovn_datapath_allocate_key(&dp_tnlids);
761 if (!tunnel_key) {
762 break;
763 }
764
765 od->sb = sbrec_datapath_binding_insert(ctx->ovnsb_txn);
766 ovn_datapath_update_external_ids(od);
767 sbrec_datapath_binding_set_tunnel_key(od->sb, tunnel_key);
768 }
769 destroy_tnlids(&dp_tnlids);
770 }
771
772 /* Delete southbound records without northbound matches. */
773 struct ovn_datapath *od, *next;
774 LIST_FOR_EACH_SAFE (od, next, list, &sb_only) {
775 ovs_list_remove(&od->list);
776 sbrec_datapath_binding_delete(od->sb);
777 ovn_datapath_destroy(datapaths, od);
778 }
779 }
780 \f
781 struct ovn_port {
782 struct hmap_node key_node; /* Index on 'key'. */
783 char *key; /* nbs->name, nbr->name, sb->logical_port. */
784 char *json_key; /* 'key', quoted for use in JSON. */
785
786 const struct sbrec_port_binding *sb; /* May be NULL. */
787
788 /* Logical switch port data. */
789 const struct nbrec_logical_switch_port *nbsp; /* May be NULL. */
790
791 struct lport_addresses *lsp_addrs; /* Logical switch port addresses. */
792 unsigned int n_lsp_addrs;
793
794 struct lport_addresses *ps_addrs; /* Port security addresses. */
795 unsigned int n_ps_addrs;
796
797 /* Logical router port data. */
798 const struct nbrec_logical_router_port *nbrp; /* May be NULL. */
799
800 struct lport_addresses lrp_networks;
801
802 bool derived; /* Indicates whether this is an additional port
803 * derived from nbsp or nbrp. */
804
805 /* The port's peer:
806 *
807 * - A switch port S of type "router" has a router port R as a peer,
808 * and R in turn has S has its peer.
809 *
810 * - Two connected logical router ports have each other as peer. */
811 struct ovn_port *peer;
812
813 struct ovn_datapath *od;
814
815 struct ovs_list list; /* In list of similar records. */
816 };
817
818 static struct ovn_port *
819 ovn_port_create(struct hmap *ports, const char *key,
820 const struct nbrec_logical_switch_port *nbsp,
821 const struct nbrec_logical_router_port *nbrp,
822 const struct sbrec_port_binding *sb)
823 {
824 struct ovn_port *op = xzalloc(sizeof *op);
825
826 struct ds json_key = DS_EMPTY_INITIALIZER;
827 json_string_escape(key, &json_key);
828 op->json_key = ds_steal_cstr(&json_key);
829
830 op->key = xstrdup(key);
831 op->sb = sb;
832 op->nbsp = nbsp;
833 op->nbrp = nbrp;
834 op->derived = false;
835 hmap_insert(ports, &op->key_node, hash_string(op->key, 0));
836 return op;
837 }
838
839 static void
840 ovn_port_destroy(struct hmap *ports, struct ovn_port *port)
841 {
842 if (port) {
843 /* Don't remove port->list. It is used within build_ports() as a
844 * private list and once we've exited that function it is not safe to
845 * use it. */
846 hmap_remove(ports, &port->key_node);
847
848 for (int i = 0; i < port->n_lsp_addrs; i++) {
849 destroy_lport_addresses(&port->lsp_addrs[i]);
850 }
851 free(port->lsp_addrs);
852
853 for (int i = 0; i < port->n_ps_addrs; i++) {
854 destroy_lport_addresses(&port->ps_addrs[i]);
855 }
856 free(port->ps_addrs);
857
858 destroy_lport_addresses(&port->lrp_networks);
859 free(port->json_key);
860 free(port->key);
861 free(port);
862 }
863 }
864
865 static struct ovn_port *
866 ovn_port_find(struct hmap *ports, const char *name)
867 {
868 struct ovn_port *op;
869
870 HMAP_FOR_EACH_WITH_HASH (op, key_node, hash_string(name, 0), ports) {
871 if (!strcmp(op->key, name)) {
872 return op;
873 }
874 }
875 return NULL;
876 }
877
878 static uint32_t
879 ovn_port_allocate_key(struct ovn_datapath *od)
880 {
881 return allocate_tnlid(&od->port_tnlids, "port",
882 (1u << 15) - 1, &od->port_key_hint);
883 }
884
885 static char *
886 chassis_redirect_name(const char *port_name)
887 {
888 return xasprintf("cr-%s", port_name);
889 }
890
891 static bool
892 ipam_is_duplicate_mac(struct eth_addr *ea, uint64_t mac64, bool warn)
893 {
894 struct macam_node *macam_node;
895 HMAP_FOR_EACH_WITH_HASH (macam_node, hmap_node, hash_uint64(mac64),
896 &macam) {
897 if (eth_addr_equals(*ea, macam_node->mac_addr)) {
898 if (warn) {
899 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
900 VLOG_WARN_RL(&rl, "Duplicate MAC set: "ETH_ADDR_FMT,
901 ETH_ADDR_ARGS(macam_node->mac_addr));
902 }
903 return true;
904 }
905 }
906 return false;
907 }
908
909 static void
910 ipam_insert_mac(struct eth_addr *ea, bool check)
911 {
912 if (!ea) {
913 return;
914 }
915
916 uint64_t mac64 = eth_addr_to_uint64(*ea);
917 /* If the new MAC was not assigned by this address management system or
918 * check is true and the new MAC is a duplicate, do not insert it into the
919 * macam hmap. */
920 if (((mac64 ^ MAC_ADDR_PREFIX) >> 24)
921 || (check && ipam_is_duplicate_mac(ea, mac64, true))) {
922 return;
923 }
924
925 struct macam_node *new_macam_node = xmalloc(sizeof *new_macam_node);
926 new_macam_node->mac_addr = *ea;
927 hmap_insert(&macam, &new_macam_node->hmap_node, hash_uint64(mac64));
928 }
929
930 static void
931 ipam_insert_ip(struct ovn_datapath *od, uint32_t ip)
932 {
933 if (!od || !od->ipam_info.allocated_ipv4s) {
934 return;
935 }
936
937 if (ip >= od->ipam_info.start_ipv4 &&
938 ip < (od->ipam_info.start_ipv4 + od->ipam_info.total_ipv4s)) {
939 bitmap_set1(od->ipam_info.allocated_ipv4s,
940 ip - od->ipam_info.start_ipv4);
941 }
942 }
943
944 static void
945 ipam_insert_lsp_addresses(struct ovn_datapath *od, struct ovn_port *op,
946 char *address)
947 {
948 if (!od || !op || !address || !strcmp(address, "unknown")
949 || !strcmp(address, "router") || is_dynamic_lsp_address(address)) {
950 return;
951 }
952
953 struct lport_addresses laddrs;
954 if (!extract_lsp_addresses(address, &laddrs)) {
955 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
956 VLOG_WARN_RL(&rl, "Extract addresses failed.");
957 return;
958 }
959 ipam_insert_mac(&laddrs.ea, true);
960
961 /* IP is only added to IPAM if the switch's subnet option
962 * is set, whereas MAC is always added to MACAM. */
963 if (!od->ipam_info.allocated_ipv4s) {
964 destroy_lport_addresses(&laddrs);
965 return;
966 }
967
968 for (size_t j = 0; j < laddrs.n_ipv4_addrs; j++) {
969 uint32_t ip = ntohl(laddrs.ipv4_addrs[j].addr);
970 ipam_insert_ip(od, ip);
971 }
972
973 destroy_lport_addresses(&laddrs);
974 }
975
976 static void
977 ipam_add_port_addresses(struct ovn_datapath *od, struct ovn_port *op)
978 {
979 if (!od || !op) {
980 return;
981 }
982
983 if (op->nbsp) {
984 /* Add all the port's addresses to address data structures. */
985 for (size_t i = 0; i < op->nbsp->n_addresses; i++) {
986 ipam_insert_lsp_addresses(od, op, op->nbsp->addresses[i]);
987 }
988 if (op->nbsp->dynamic_addresses) {
989 ipam_insert_lsp_addresses(od, op, op->nbsp->dynamic_addresses);
990 }
991 } else if (op->nbrp) {
992 struct lport_addresses lrp_networks;
993 if (!extract_lrp_networks(op->nbrp, &lrp_networks)) {
994 static struct vlog_rate_limit rl
995 = VLOG_RATE_LIMIT_INIT(1, 1);
996 VLOG_WARN_RL(&rl, "Extract addresses failed.");
997 return;
998 }
999 ipam_insert_mac(&lrp_networks.ea, true);
1000
1001 if (!op->peer || !op->peer->nbsp || !op->peer->od || !op->peer->od->nbs
1002 || !smap_get(&op->peer->od->nbs->other_config, "subnet")) {
1003 destroy_lport_addresses(&lrp_networks);
1004 return;
1005 }
1006
1007 for (size_t i = 0; i < lrp_networks.n_ipv4_addrs; i++) {
1008 uint32_t ip = ntohl(lrp_networks.ipv4_addrs[i].addr);
1009 ipam_insert_ip(op->peer->od, ip);
1010 }
1011
1012 destroy_lport_addresses(&lrp_networks);
1013 }
1014 }
1015
1016 static uint64_t
1017 ipam_get_unused_mac(void)
1018 {
1019 /* Stores the suffix of the most recently ipam-allocated MAC address. */
1020 static uint32_t last_mac;
1021
1022 uint64_t mac64;
1023 struct eth_addr mac;
1024 uint32_t mac_addr_suffix, i;
1025 for (i = 0; i < MAC_ADDR_SPACE - 1; i++) {
1026 /* The tentative MAC's suffix will be in the interval (1, 0xfffffe). */
1027 mac_addr_suffix = ((last_mac + i) % (MAC_ADDR_SPACE - 1)) + 1;
1028 mac64 = MAC_ADDR_PREFIX | mac_addr_suffix;
1029 eth_addr_from_uint64(mac64, &mac);
1030 if (!ipam_is_duplicate_mac(&mac, mac64, false)) {
1031 last_mac = mac_addr_suffix;
1032 break;
1033 }
1034 }
1035
1036 if (i == MAC_ADDR_SPACE) {
1037 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1038 VLOG_WARN_RL(&rl, "MAC address space exhausted.");
1039 mac64 = 0;
1040 }
1041
1042 return mac64;
1043 }
1044
1045 static uint32_t
1046 ipam_get_unused_ip(struct ovn_datapath *od)
1047 {
1048 if (!od || !od->ipam_info.allocated_ipv4s) {
1049 return 0;
1050 }
1051
1052 size_t new_ip_index = bitmap_scan(od->ipam_info.allocated_ipv4s, 0, 0,
1053 od->ipam_info.total_ipv4s - 1);
1054 if (new_ip_index == od->ipam_info.total_ipv4s - 1) {
1055 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1056 VLOG_WARN_RL( &rl, "Subnet address space has been exhausted.");
1057 return 0;
1058 }
1059
1060 return od->ipam_info.start_ipv4 + new_ip_index;
1061 }
1062
1063 static bool
1064 ipam_allocate_addresses(struct ovn_datapath *od, struct ovn_port *op,
1065 const char *addrspec)
1066 {
1067 if (!op->nbsp) {
1068 return false;
1069 }
1070
1071 /* Get or generate MAC address. */
1072 struct eth_addr mac;
1073 bool dynamic_mac;
1074 int n = 0;
1075 if (ovs_scan(addrspec, ETH_ADDR_SCAN_FMT" dynamic%n",
1076 ETH_ADDR_SCAN_ARGS(mac), &n)
1077 && addrspec[n] == '\0') {
1078 dynamic_mac = false;
1079 } else {
1080 uint64_t mac64 = ipam_get_unused_mac();
1081 if (!mac64) {
1082 return false;
1083 }
1084 eth_addr_from_uint64(mac64, &mac);
1085 dynamic_mac = true;
1086 }
1087
1088 /* Generate IPv4 address, if desirable. */
1089 bool dynamic_ip4 = od->ipam_info.allocated_ipv4s != NULL;
1090 uint32_t ip4 = dynamic_ip4 ? ipam_get_unused_ip(od) : 0;
1091
1092 /* Generate IPv6 address, if desirable. */
1093 bool dynamic_ip6 = od->ipam_info.ipv6_prefix_set;
1094 struct in6_addr ip6;
1095 if (dynamic_ip6) {
1096 in6_generate_eui64(mac, &od->ipam_info.ipv6_prefix, &ip6);
1097 }
1098
1099 /* If we didn't generate anything, bail out. */
1100 if (!dynamic_ip4 && !dynamic_ip6) {
1101 return false;
1102 }
1103
1104 /* Save the dynamic addresses. */
1105 struct ds new_addr = DS_EMPTY_INITIALIZER;
1106 ds_put_format(&new_addr, ETH_ADDR_FMT, ETH_ADDR_ARGS(mac));
1107 if (dynamic_ip4 && ip4) {
1108 ipam_insert_ip(od, ip4);
1109 ds_put_format(&new_addr, " "IP_FMT, IP_ARGS(htonl(ip4)));
1110 }
1111 if (dynamic_ip6) {
1112 char ip6_s[INET6_ADDRSTRLEN + 1];
1113 ipv6_string_mapped(ip6_s, &ip6);
1114 ds_put_format(&new_addr, " %s", ip6_s);
1115 }
1116 ipam_insert_mac(&mac, !dynamic_mac);
1117 nbrec_logical_switch_port_set_dynamic_addresses(op->nbsp,
1118 ds_cstr(&new_addr));
1119 ds_destroy(&new_addr);
1120 return true;
1121 }
1122
1123 static void
1124 build_ipam(struct hmap *datapaths, struct hmap *ports)
1125 {
1126 /* IPAM generally stands for IP address management. In non-virtualized
1127 * world, MAC addresses come with the hardware. But, with virtualized
1128 * workloads, they need to be assigned and managed. This function
1129 * does both IP address management (ipam) and MAC address management
1130 * (macam). */
1131
1132 /* If the switch's other_config:subnet is set, allocate new addresses for
1133 * ports that have the "dynamic" keyword in their addresses column. */
1134 struct ovn_datapath *od;
1135 HMAP_FOR_EACH (od, key_node, datapaths) {
1136 if (!od->nbs || (!od->ipam_info.allocated_ipv4s &&
1137 !od->ipam_info.ipv6_prefix_set)) {
1138 continue;
1139 }
1140
1141 struct ovn_port *op;
1142 for (size_t i = 0; i < od->nbs->n_ports; i++) {
1143 const struct nbrec_logical_switch_port *nbsp =
1144 od->nbs->ports[i];
1145
1146 if (!nbsp) {
1147 continue;
1148 }
1149
1150 op = ovn_port_find(ports, nbsp->name);
1151 if (!op || (op->nbsp && op->peer)) {
1152 /* Do not allocate addresses for logical switch ports that
1153 * have a peer. */
1154 continue;
1155 }
1156
1157 for (size_t j = 0; j < nbsp->n_addresses; j++) {
1158 if (is_dynamic_lsp_address(nbsp->addresses[j])
1159 && !nbsp->dynamic_addresses) {
1160 if (!ipam_allocate_addresses(od, op, nbsp->addresses[j])
1161 || !extract_lsp_addresses(nbsp->dynamic_addresses,
1162 &op->lsp_addrs[op->n_lsp_addrs])) {
1163 static struct vlog_rate_limit rl
1164 = VLOG_RATE_LIMIT_INIT(1, 1);
1165 VLOG_INFO_RL(&rl, "Failed to allocate address.");
1166 } else {
1167 op->n_lsp_addrs++;
1168 }
1169 break;
1170 }
1171 }
1172
1173 if (!nbsp->n_addresses && nbsp->dynamic_addresses) {
1174 nbrec_logical_switch_port_set_dynamic_addresses(op->nbsp,
1175 NULL);
1176 }
1177 }
1178 }
1179 }
1180 \f
1181 /* Tag allocation for nested containers.
1182 *
1183 * For a logical switch port with 'parent_name' and a request to allocate tags,
1184 * keeps a track of all allocated tags. */
1185 struct tag_alloc_node {
1186 struct hmap_node hmap_node;
1187 char *parent_name;
1188 unsigned long *allocated_tags; /* A bitmap to track allocated tags. */
1189 };
1190
1191 static void
1192 tag_alloc_destroy(struct hmap *tag_alloc_table)
1193 {
1194 struct tag_alloc_node *node;
1195 HMAP_FOR_EACH_POP (node, hmap_node, tag_alloc_table) {
1196 bitmap_free(node->allocated_tags);
1197 free(node->parent_name);
1198 free(node);
1199 }
1200 hmap_destroy(tag_alloc_table);
1201 }
1202
1203 static struct tag_alloc_node *
1204 tag_alloc_get_node(struct hmap *tag_alloc_table, const char *parent_name)
1205 {
1206 /* If a node for the 'parent_name' exists, return it. */
1207 struct tag_alloc_node *tag_alloc_node;
1208 HMAP_FOR_EACH_WITH_HASH (tag_alloc_node, hmap_node,
1209 hash_string(parent_name, 0),
1210 tag_alloc_table) {
1211 if (!strcmp(tag_alloc_node->parent_name, parent_name)) {
1212 return tag_alloc_node;
1213 }
1214 }
1215
1216 /* Create a new node. */
1217 tag_alloc_node = xmalloc(sizeof *tag_alloc_node);
1218 tag_alloc_node->parent_name = xstrdup(parent_name);
1219 tag_alloc_node->allocated_tags = bitmap_allocate(MAX_OVN_TAGS);
1220 /* Tag 0 is invalid for nested containers. */
1221 bitmap_set1(tag_alloc_node->allocated_tags, 0);
1222 hmap_insert(tag_alloc_table, &tag_alloc_node->hmap_node,
1223 hash_string(parent_name, 0));
1224
1225 return tag_alloc_node;
1226 }
1227
1228 static void
1229 tag_alloc_add_existing_tags(struct hmap *tag_alloc_table,
1230 const struct nbrec_logical_switch_port *nbsp)
1231 {
1232 /* Add the tags of already existing nested containers. If there is no
1233 * 'nbsp->parent_name' or no 'nbsp->tag' set, there is nothing to do. */
1234 if (!nbsp->parent_name || !nbsp->parent_name[0] || !nbsp->tag) {
1235 return;
1236 }
1237
1238 struct tag_alloc_node *tag_alloc_node;
1239 tag_alloc_node = tag_alloc_get_node(tag_alloc_table, nbsp->parent_name);
1240 bitmap_set1(tag_alloc_node->allocated_tags, *nbsp->tag);
1241 }
1242
1243 static void
1244 tag_alloc_create_new_tag(struct hmap *tag_alloc_table,
1245 const struct nbrec_logical_switch_port *nbsp)
1246 {
1247 if (!nbsp->tag_request) {
1248 return;
1249 }
1250
1251 if (nbsp->parent_name && nbsp->parent_name[0]
1252 && *nbsp->tag_request == 0) {
1253 /* For nested containers that need allocation, do the allocation. */
1254
1255 if (nbsp->tag) {
1256 /* This has already been allocated. */
1257 return;
1258 }
1259
1260 struct tag_alloc_node *tag_alloc_node;
1261 int64_t tag;
1262 tag_alloc_node = tag_alloc_get_node(tag_alloc_table,
1263 nbsp->parent_name);
1264 tag = bitmap_scan(tag_alloc_node->allocated_tags, 0, 1, MAX_OVN_TAGS);
1265 if (tag == MAX_OVN_TAGS) {
1266 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1267 VLOG_ERR_RL(&rl, "out of vlans for logical switch ports with "
1268 "parent %s", nbsp->parent_name);
1269 return;
1270 }
1271 bitmap_set1(tag_alloc_node->allocated_tags, tag);
1272 nbrec_logical_switch_port_set_tag(nbsp, &tag, 1);
1273 } else if (*nbsp->tag_request != 0) {
1274 /* For everything else, copy the contents of 'tag_request' to 'tag'. */
1275 nbrec_logical_switch_port_set_tag(nbsp, nbsp->tag_request, 1);
1276 }
1277 }
1278 \f
1279
1280 /*
1281 * This function checks if the MAC in "address" parameter (if present) is
1282 * different from the one stored in Logical_Switch_Port.dynamic_addresses
1283 * and updates it.
1284 */
1285 static void
1286 check_and_update_mac_in_dynamic_addresses(
1287 const char *address,
1288 const struct nbrec_logical_switch_port *nbsp)
1289 {
1290 if (!nbsp->dynamic_addresses) {
1291 return;
1292 }
1293 int buf_index = 0;
1294 struct eth_addr ea;
1295 if (!ovs_scan_len(address, &buf_index,
1296 ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(ea))) {
1297 return;
1298 }
1299
1300 struct eth_addr present_ea;
1301 buf_index = 0;
1302 if (ovs_scan_len(nbsp->dynamic_addresses, &buf_index,
1303 ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(present_ea))
1304 && !eth_addr_equals(ea, present_ea)) {
1305 /* MAC address has changed. Update it */
1306 char *new_addr = xasprintf(
1307 ETH_ADDR_FMT"%s", ETH_ADDR_ARGS(ea),
1308 &nbsp->dynamic_addresses[buf_index]);
1309 nbrec_logical_switch_port_set_dynamic_addresses(
1310 nbsp, new_addr);
1311 free(new_addr);
1312 }
1313 }
1314
1315 static void
1316 join_logical_ports(struct northd_context *ctx,
1317 struct hmap *datapaths, struct hmap *ports,
1318 struct hmap *chassis_qdisc_queues,
1319 struct hmap *tag_alloc_table, struct ovs_list *sb_only,
1320 struct ovs_list *nb_only, struct ovs_list *both)
1321 {
1322 hmap_init(ports);
1323 ovs_list_init(sb_only);
1324 ovs_list_init(nb_only);
1325 ovs_list_init(both);
1326
1327 const struct sbrec_port_binding *sb;
1328 SBREC_PORT_BINDING_FOR_EACH (sb, ctx->ovnsb_idl) {
1329 struct ovn_port *op = ovn_port_create(ports, sb->logical_port,
1330 NULL, NULL, sb);
1331 ovs_list_push_back(sb_only, &op->list);
1332 }
1333
1334 struct ovn_datapath *od;
1335 HMAP_FOR_EACH (od, key_node, datapaths) {
1336 if (od->nbs) {
1337 for (size_t i = 0; i < od->nbs->n_ports; i++) {
1338 const struct nbrec_logical_switch_port *nbsp
1339 = od->nbs->ports[i];
1340 struct ovn_port *op = ovn_port_find(ports, nbsp->name);
1341 if (op) {
1342 if (op->nbsp || op->nbrp) {
1343 static struct vlog_rate_limit rl
1344 = VLOG_RATE_LIMIT_INIT(5, 1);
1345 VLOG_WARN_RL(&rl, "duplicate logical port %s",
1346 nbsp->name);
1347 continue;
1348 }
1349 op->nbsp = nbsp;
1350 ovs_list_remove(&op->list);
1351
1352 uint32_t queue_id = smap_get_int(&op->sb->options,
1353 "qdisc_queue_id", 0);
1354 if (queue_id && op->sb->chassis) {
1355 add_chassis_queue(
1356 chassis_qdisc_queues, &op->sb->chassis->header_.uuid,
1357 queue_id);
1358 }
1359
1360 ovs_list_push_back(both, &op->list);
1361
1362 /* This port exists due to a SB binding, but should
1363 * not have been initialized fully. */
1364 ovs_assert(!op->n_lsp_addrs && !op->n_ps_addrs);
1365 } else {
1366 op = ovn_port_create(ports, nbsp->name, nbsp, NULL, NULL);
1367 ovs_list_push_back(nb_only, &op->list);
1368 }
1369
1370 if (!strcmp(nbsp->type, "localnet")) {
1371 od->localnet_port = op;
1372 }
1373
1374 op->lsp_addrs
1375 = xmalloc(sizeof *op->lsp_addrs * nbsp->n_addresses);
1376 for (size_t j = 0; j < nbsp->n_addresses; j++) {
1377 if (!strcmp(nbsp->addresses[j], "unknown")
1378 || !strcmp(nbsp->addresses[j], "router")) {
1379 continue;
1380 }
1381 if (is_dynamic_lsp_address(nbsp->addresses[j])) {
1382 if (nbsp->dynamic_addresses) {
1383 check_and_update_mac_in_dynamic_addresses(
1384 nbsp->addresses[j], nbsp);
1385 if (!extract_lsp_addresses(nbsp->dynamic_addresses,
1386 &op->lsp_addrs[op->n_lsp_addrs])) {
1387 static struct vlog_rate_limit rl
1388 = VLOG_RATE_LIMIT_INIT(1, 1);
1389 VLOG_INFO_RL(&rl, "invalid syntax '%s' in "
1390 "logical switch port "
1391 "dynamic_addresses. No "
1392 "MAC address found",
1393 op->nbsp->dynamic_addresses);
1394 continue;
1395 }
1396 } else {
1397 continue;
1398 }
1399 } else if (!extract_lsp_addresses(nbsp->addresses[j],
1400 &op->lsp_addrs[op->n_lsp_addrs])) {
1401 static struct vlog_rate_limit rl
1402 = VLOG_RATE_LIMIT_INIT(1, 1);
1403 VLOG_INFO_RL(&rl, "invalid syntax '%s' in logical "
1404 "switch port addresses. No MAC "
1405 "address found",
1406 op->nbsp->addresses[j]);
1407 continue;
1408 }
1409 op->n_lsp_addrs++;
1410 }
1411
1412 op->ps_addrs
1413 = xmalloc(sizeof *op->ps_addrs * nbsp->n_port_security);
1414 for (size_t j = 0; j < nbsp->n_port_security; j++) {
1415 if (!extract_lsp_addresses(nbsp->port_security[j],
1416 &op->ps_addrs[op->n_ps_addrs])) {
1417 static struct vlog_rate_limit rl
1418 = VLOG_RATE_LIMIT_INIT(1, 1);
1419 VLOG_INFO_RL(&rl, "invalid syntax '%s' in port "
1420 "security. No MAC address found",
1421 op->nbsp->port_security[j]);
1422 continue;
1423 }
1424 op->n_ps_addrs++;
1425 }
1426
1427 op->od = od;
1428 ipam_add_port_addresses(od, op);
1429 tag_alloc_add_existing_tags(tag_alloc_table, nbsp);
1430 }
1431 } else {
1432 for (size_t i = 0; i < od->nbr->n_ports; i++) {
1433 const struct nbrec_logical_router_port *nbrp
1434 = od->nbr->ports[i];
1435
1436 struct lport_addresses lrp_networks;
1437 if (!extract_lrp_networks(nbrp, &lrp_networks)) {
1438 static struct vlog_rate_limit rl
1439 = VLOG_RATE_LIMIT_INIT(5, 1);
1440 VLOG_WARN_RL(&rl, "bad 'mac' %s", nbrp->mac);
1441 continue;
1442 }
1443
1444 if (!lrp_networks.n_ipv4_addrs && !lrp_networks.n_ipv6_addrs) {
1445 continue;
1446 }
1447
1448 struct ovn_port *op = ovn_port_find(ports, nbrp->name);
1449 if (op) {
1450 if (op->nbsp || op->nbrp) {
1451 static struct vlog_rate_limit rl
1452 = VLOG_RATE_LIMIT_INIT(5, 1);
1453 VLOG_WARN_RL(&rl, "duplicate logical router port %s",
1454 nbrp->name);
1455 continue;
1456 }
1457 op->nbrp = nbrp;
1458 ovs_list_remove(&op->list);
1459 ovs_list_push_back(both, &op->list);
1460
1461 /* This port exists but should not have been
1462 * initialized fully. */
1463 ovs_assert(!op->lrp_networks.n_ipv4_addrs
1464 && !op->lrp_networks.n_ipv6_addrs);
1465 } else {
1466 op = ovn_port_create(ports, nbrp->name, NULL, nbrp, NULL);
1467 ovs_list_push_back(nb_only, &op->list);
1468 }
1469
1470 op->lrp_networks = lrp_networks;
1471 op->od = od;
1472 ipam_add_port_addresses(op->od, op);
1473
1474 const char *redirect_chassis = smap_get(&op->nbrp->options,
1475 "redirect-chassis");
1476 if (redirect_chassis || op->nbrp->n_gateway_chassis) {
1477 /* Additional "derived" ovn_port crp represents the
1478 * instance of op on the "redirect-chassis". */
1479 const char *gw_chassis = smap_get(&op->od->nbr->options,
1480 "chassis");
1481 if (gw_chassis) {
1482 static struct vlog_rate_limit rl
1483 = VLOG_RATE_LIMIT_INIT(1, 1);
1484 VLOG_WARN_RL(&rl, "Bad configuration: "
1485 "redirect-chassis configured on port %s "
1486 "on L3 gateway router", nbrp->name);
1487 continue;
1488 }
1489 if (od->l3dgw_port || od->l3redirect_port) {
1490 static struct vlog_rate_limit rl
1491 = VLOG_RATE_LIMIT_INIT(1, 1);
1492 VLOG_WARN_RL(&rl, "Bad configuration: multiple ports "
1493 "with redirect-chassis on same logical "
1494 "router %s", od->nbr->name);
1495 continue;
1496 }
1497
1498 char *redirect_name = chassis_redirect_name(nbrp->name);
1499 struct ovn_port *crp = ovn_port_find(ports, redirect_name);
1500 if (crp) {
1501 crp->derived = true;
1502 crp->nbrp = nbrp;
1503 ovs_list_remove(&crp->list);
1504 ovs_list_push_back(both, &crp->list);
1505 } else {
1506 crp = ovn_port_create(ports, redirect_name,
1507 NULL, nbrp, NULL);
1508 crp->derived = true;
1509 ovs_list_push_back(nb_only, &crp->list);
1510 }
1511 crp->od = od;
1512 free(redirect_name);
1513
1514 /* Set l3dgw_port and l3redirect_port in od, for later
1515 * use during flow creation. */
1516 od->l3dgw_port = op;
1517 od->l3redirect_port = crp;
1518 }
1519 }
1520 }
1521 }
1522
1523 /* Connect logical router ports, and logical switch ports of type "router",
1524 * to their peers. */
1525 struct ovn_port *op;
1526 HMAP_FOR_EACH (op, key_node, ports) {
1527 if (op->nbsp && !strcmp(op->nbsp->type, "router") && !op->derived) {
1528 const char *peer_name = smap_get(&op->nbsp->options, "router-port");
1529 if (!peer_name) {
1530 continue;
1531 }
1532
1533 struct ovn_port *peer = ovn_port_find(ports, peer_name);
1534 if (!peer || !peer->nbrp) {
1535 continue;
1536 }
1537
1538 peer->peer = op;
1539 op->peer = peer;
1540 op->od->router_ports = xrealloc(
1541 op->od->router_ports,
1542 sizeof *op->od->router_ports * (op->od->n_router_ports + 1));
1543 op->od->router_ports[op->od->n_router_ports++] = op;
1544
1545 /* Fill op->lsp_addrs for op->nbsp->addresses[] with
1546 * contents "router", which was skipped in the loop above. */
1547 for (size_t j = 0; j < op->nbsp->n_addresses; j++) {
1548 if (!strcmp(op->nbsp->addresses[j], "router")) {
1549 if (extract_lrp_networks(peer->nbrp,
1550 &op->lsp_addrs[op->n_lsp_addrs])) {
1551 op->n_lsp_addrs++;
1552 }
1553 break;
1554 }
1555 }
1556 } else if (op->nbrp && op->nbrp->peer && !op->derived) {
1557 struct ovn_port *peer = ovn_port_find(ports, op->nbrp->peer);
1558 if (peer) {
1559 if (peer->nbrp) {
1560 op->peer = peer;
1561 } else if (peer->nbsp) {
1562 /* An ovn_port for a switch port of type "router" does have
1563 * a router port as its peer (see the case above for
1564 * "router" ports), but this is set via options:router-port
1565 * in Logical_Switch_Port and does not involve the
1566 * Logical_Router_Port's 'peer' column. */
1567 static struct vlog_rate_limit rl =
1568 VLOG_RATE_LIMIT_INIT(5, 1);
1569 VLOG_WARN_RL(&rl, "Bad configuration: The peer of router "
1570 "port %s is a switch port", op->key);
1571 }
1572 }
1573 }
1574 }
1575 }
1576
1577 static void
1578 ip_address_and_port_from_lb_key(const char *key, char **ip_address,
1579 uint16_t *port, int *addr_family);
1580
1581 static void
1582 get_router_load_balancer_ips(const struct ovn_datapath *od,
1583 struct sset *all_ips, int *addr_family)
1584 {
1585 if (!od->nbr) {
1586 return;
1587 }
1588
1589 for (int i = 0; i < od->nbr->n_load_balancer; i++) {
1590 struct nbrec_load_balancer *lb = od->nbr->load_balancer[i];
1591 struct smap *vips = &lb->vips;
1592 struct smap_node *node;
1593
1594 SMAP_FOR_EACH (node, vips) {
1595 /* node->key contains IP:port or just IP. */
1596 char *ip_address = NULL;
1597 uint16_t port;
1598
1599 ip_address_and_port_from_lb_key(node->key, &ip_address, &port,
1600 addr_family);
1601 if (!ip_address) {
1602 continue;
1603 }
1604
1605 if (!sset_contains(all_ips, ip_address)) {
1606 sset_add(all_ips, ip_address);
1607 }
1608
1609 free(ip_address);
1610 }
1611 }
1612 }
1613
1614 /* Returns an array of strings, each consisting of a MAC address followed
1615 * by one or more IP addresses, and if the port is a distributed gateway
1616 * port, followed by 'is_chassis_resident("LPORT_NAME")', where the
1617 * LPORT_NAME is the name of the L3 redirect port or the name of the
1618 * logical_port specified in a NAT rule. These strings include the
1619 * external IP addresses of all NAT rules defined on that router, and all
1620 * of the IP addresses used in load balancer VIPs defined on that router.
1621 *
1622 * The caller must free each of the n returned strings with free(),
1623 * and must free the returned array when it is no longer needed. */
1624 static char **
1625 get_nat_addresses(const struct ovn_port *op, size_t *n)
1626 {
1627 size_t n_nats = 0;
1628 struct eth_addr mac;
1629 if (!op->nbrp || !op->od || !op->od->nbr
1630 || (!op->od->nbr->n_nat && !op->od->nbr->n_load_balancer)
1631 || !eth_addr_from_string(op->nbrp->mac, &mac)) {
1632 *n = n_nats;
1633 return NULL;
1634 }
1635
1636 struct ds c_addresses = DS_EMPTY_INITIALIZER;
1637 ds_put_format(&c_addresses, ETH_ADDR_FMT, ETH_ADDR_ARGS(mac));
1638 bool central_ip_address = false;
1639
1640 char **addresses;
1641 addresses = xmalloc(sizeof *addresses * (op->od->nbr->n_nat + 1));
1642
1643 /* Get NAT IP addresses. */
1644 for (size_t i = 0; i < op->od->nbr->n_nat; i++) {
1645 const struct nbrec_nat *nat = op->od->nbr->nat[i];
1646 ovs_be32 ip, mask;
1647
1648 char *error = ip_parse_masked(nat->external_ip, &ip, &mask);
1649 if (error || mask != OVS_BE32_MAX) {
1650 free(error);
1651 continue;
1652 }
1653
1654 /* Determine whether this NAT rule satisfies the conditions for
1655 * distributed NAT processing. */
1656 if (op->od->l3redirect_port && !strcmp(nat->type, "dnat_and_snat")
1657 && nat->logical_port && nat->external_mac) {
1658 /* Distributed NAT rule. */
1659 if (eth_addr_from_string(nat->external_mac, &mac)) {
1660 struct ds address = DS_EMPTY_INITIALIZER;
1661 ds_put_format(&address, ETH_ADDR_FMT, ETH_ADDR_ARGS(mac));
1662 ds_put_format(&address, " %s", nat->external_ip);
1663 ds_put_format(&address, " is_chassis_resident(\"%s\")",
1664 nat->logical_port);
1665 addresses[n_nats++] = ds_steal_cstr(&address);
1666 }
1667 } else {
1668 /* Centralized NAT rule, either on gateway router or distributed
1669 * router. */
1670 ds_put_format(&c_addresses, " %s", nat->external_ip);
1671 central_ip_address = true;
1672 }
1673 }
1674
1675 /* A set to hold all load-balancer vips. */
1676 struct sset all_ips = SSET_INITIALIZER(&all_ips);
1677 int addr_family;
1678 get_router_load_balancer_ips(op->od, &all_ips, &addr_family);
1679
1680 const char *ip_address;
1681 SSET_FOR_EACH (ip_address, &all_ips) {
1682 ds_put_format(&c_addresses, " %s", ip_address);
1683 central_ip_address = true;
1684 }
1685 sset_destroy(&all_ips);
1686
1687 if (central_ip_address) {
1688 /* Gratuitous ARP for centralized NAT rules on distributed gateway
1689 * ports should be restricted to the "redirect-chassis". */
1690 if (op->od->l3redirect_port) {
1691 ds_put_format(&c_addresses, " is_chassis_resident(%s)",
1692 op->od->l3redirect_port->json_key);
1693 }
1694
1695 addresses[n_nats++] = ds_steal_cstr(&c_addresses);
1696 }
1697
1698 *n = n_nats;
1699
1700 return addresses;
1701 }
1702
1703 static bool
1704 gateway_chassis_equal(const struct nbrec_gateway_chassis *nb_gwc,
1705 const struct sbrec_chassis *nb_gwc_c,
1706 const struct sbrec_gateway_chassis *sb_gwc)
1707 {
1708 bool equal = !strcmp(nb_gwc->name, sb_gwc->name)
1709 && nb_gwc->priority == sb_gwc->priority
1710 && smap_equal(&nb_gwc->options, &sb_gwc->options)
1711 && smap_equal(&nb_gwc->external_ids, &sb_gwc->external_ids);
1712
1713 if (!equal) {
1714 return false;
1715 }
1716
1717 /* If everything else matched and we were unable to find the SBDB
1718 * Chassis entry at this time, assume a match and return true.
1719 * This happens when an ovn-controller is restarting and the Chassis
1720 * entry is gone away momentarily */
1721 return !nb_gwc_c
1722 || (sb_gwc->chassis && !strcmp(nb_gwc_c->name,
1723 sb_gwc->chassis->name));
1724 }
1725
1726 static bool
1727 sbpb_gw_chassis_needs_update(
1728 struct ovsdb_idl_index *sbrec_chassis_by_name,
1729 const struct sbrec_port_binding *port_binding,
1730 const struct nbrec_logical_router_port *lrp)
1731 {
1732 if (!lrp || !port_binding) {
1733 return false;
1734 }
1735
1736 /* These arrays are used to collect valid Gateway_Chassis and valid
1737 * Chassis records from the Logical_Router_Port Gateway_Chassis list,
1738 * we ignore the ones we can't match on the SBDB */
1739 struct nbrec_gateway_chassis **lrp_gwc = xzalloc(lrp->n_gateway_chassis *
1740 sizeof *lrp_gwc);
1741 const struct sbrec_chassis **lrp_gwc_c = xzalloc(lrp->n_gateway_chassis *
1742 sizeof *lrp_gwc_c);
1743
1744 /* Count the number of gateway chassis chassis names from the logical
1745 * router port that we are able to match on the southbound database */
1746 int lrp_n_gateway_chassis = 0;
1747 int n;
1748 for (n = 0; n < lrp->n_gateway_chassis; n++) {
1749
1750 if (!lrp->gateway_chassis[n]->chassis_name) {
1751 continue;
1752 }
1753
1754 const struct sbrec_chassis *chassis =
1755 chassis_lookup_by_name(sbrec_chassis_by_name,
1756 lrp->gateway_chassis[n]->chassis_name);
1757
1758 lrp_gwc_c[lrp_n_gateway_chassis] = chassis;
1759 lrp_gwc[lrp_n_gateway_chassis] = lrp->gateway_chassis[n];
1760 lrp_n_gateway_chassis++;
1761 if (!chassis) {
1762 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1763 VLOG_WARN_RL(
1764 &rl, "Chassis name %s referenced in NBDB via Gateway_Chassis "
1765 "on logical router port %s does not exist in SBDB",
1766 lrp->gateway_chassis[n]->chassis_name, lrp->name);
1767 }
1768 }
1769
1770 /* Basic check, different amount of Gateway_Chassis means that we
1771 * need to update southbound database Port_Binding */
1772 if (lrp_n_gateway_chassis != port_binding->n_gateway_chassis) {
1773 free(lrp_gwc_c);
1774 free(lrp_gwc);
1775 return true;
1776 }
1777
1778 for (n = 0; n < lrp_n_gateway_chassis; n++) {
1779 int i;
1780 /* For each of the valid gw chassis on the lrp, check if there's
1781 * a match on the Port_Binding list, we assume order is not
1782 * persisted */
1783 for (i = 0; i < port_binding->n_gateway_chassis; i++) {
1784 if (gateway_chassis_equal(lrp_gwc[n],
1785 lrp_gwc_c[n],
1786 port_binding->gateway_chassis[i])) {
1787 break; /* we found a match */
1788 }
1789 }
1790
1791 /* if no Port_Binding gateway chassis matched for the entry... */
1792 if (i == port_binding->n_gateway_chassis) {
1793 free(lrp_gwc_c);
1794 free(lrp_gwc);
1795 return true; /* found no match for this gateway chassis on lrp */
1796 }
1797 }
1798
1799 /* no need for update, all ports matched */
1800 free(lrp_gwc_c);
1801 free(lrp_gwc);
1802 return false;
1803 }
1804
1805 /* This functions translates the gw chassis on the nb database
1806 * to sb database entries, the only difference is that SB database
1807 * Gateway_Chassis table references the chassis directly instead
1808 * of using the name */
1809 static void
1810 copy_gw_chassis_from_nbrp_to_sbpb(
1811 struct northd_context *ctx,
1812 struct ovsdb_idl_index *sbrec_chassis_by_name,
1813 const struct nbrec_logical_router_port *lrp,
1814 const struct sbrec_port_binding *port_binding) {
1815
1816 if (!lrp || !port_binding || !lrp->n_gateway_chassis) {
1817 return;
1818 }
1819
1820 struct sbrec_gateway_chassis **gw_chassis = NULL;
1821 int n_gwc = 0;
1822 int n;
1823
1824 /* XXX: This can be improved. This code will generate a set of new
1825 * Gateway_Chassis and push them all in a single transaction, instead
1826 * this would be more optimal if we just add/update/remove the rows in
1827 * the southbound db that need to change. We don't expect lots of
1828 * changes to the Gateway_Chassis table, but if that proves to be wrong
1829 * we should optimize this. */
1830 for (n = 0; n < lrp->n_gateway_chassis; n++) {
1831 struct nbrec_gateway_chassis *lrp_gwc = lrp->gateway_chassis[n];
1832 if (!lrp_gwc->chassis_name) {
1833 continue;
1834 }
1835
1836 const struct sbrec_chassis *chassis =
1837 chassis_lookup_by_name(sbrec_chassis_by_name,
1838 lrp_gwc->chassis_name);
1839
1840 gw_chassis = xrealloc(gw_chassis, (n_gwc + 1) * sizeof *gw_chassis);
1841
1842 struct sbrec_gateway_chassis *pb_gwc =
1843 sbrec_gateway_chassis_insert(ctx->ovnsb_txn);
1844
1845 sbrec_gateway_chassis_set_name(pb_gwc, lrp_gwc->name);
1846 sbrec_gateway_chassis_set_priority(pb_gwc, lrp_gwc->priority);
1847 sbrec_gateway_chassis_set_chassis(pb_gwc, chassis);
1848 sbrec_gateway_chassis_set_options(pb_gwc, &lrp_gwc->options);
1849 sbrec_gateway_chassis_set_external_ids(pb_gwc, &lrp_gwc->external_ids);
1850
1851 gw_chassis[n_gwc++] = pb_gwc;
1852 }
1853 sbrec_port_binding_set_gateway_chassis(port_binding, gw_chassis, n_gwc);
1854 free(gw_chassis);
1855 }
1856
1857 static void
1858 ovn_port_update_sbrec(struct northd_context *ctx,
1859 struct ovsdb_idl_index *sbrec_chassis_by_name,
1860 const struct ovn_port *op,
1861 struct hmap *chassis_qdisc_queues)
1862 {
1863 sbrec_port_binding_set_datapath(op->sb, op->od->sb);
1864 if (op->nbrp) {
1865 /* If the router is for l3 gateway, it resides on a chassis
1866 * and its port type is "l3gateway". */
1867 const char *chassis_name = smap_get(&op->od->nbr->options, "chassis");
1868 if (op->derived) {
1869 sbrec_port_binding_set_type(op->sb, "chassisredirect");
1870 } else if (chassis_name) {
1871 sbrec_port_binding_set_type(op->sb, "l3gateway");
1872 } else {
1873 sbrec_port_binding_set_type(op->sb, "patch");
1874 }
1875
1876 struct smap new;
1877 smap_init(&new);
1878 if (op->derived) {
1879 const char *redirect_chassis = smap_get(&op->nbrp->options,
1880 "redirect-chassis");
1881 if (op->nbrp->n_gateway_chassis && redirect_chassis) {
1882 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1883 VLOG_WARN_RL(
1884 &rl, "logical router port %s has both options:"
1885 "redirect-chassis and gateway_chassis populated "
1886 "redirect-chassis will be ignored in favour of "
1887 "gateway chassis", op->nbrp->name);
1888 }
1889
1890 if (op->nbrp->n_gateway_chassis) {
1891 if (sbpb_gw_chassis_needs_update(sbrec_chassis_by_name,
1892 op->sb, op->nbrp)) {
1893 copy_gw_chassis_from_nbrp_to_sbpb(ctx,
1894 sbrec_chassis_by_name,
1895 op->nbrp, op->sb);
1896 }
1897
1898 } else if (redirect_chassis) {
1899 /* Handle ports that had redirect-chassis option attached
1900 * to them, and for backwards compatibility convert them
1901 * to a single Gateway_Chassis entry */
1902 const struct sbrec_chassis *chassis =
1903 chassis_lookup_by_name(sbrec_chassis_by_name,
1904 redirect_chassis);
1905 if (chassis) {
1906 /* If we found the chassis, and the gw chassis on record
1907 * differs from what we expect go ahead and update */
1908 if (op->sb->n_gateway_chassis != 1
1909 || !op->sb->gateway_chassis[0]->chassis
1910 || strcmp(op->sb->gateway_chassis[0]->chassis->name,
1911 chassis->name)
1912 || op->sb->gateway_chassis[0]->priority != 0) {
1913 /* Construct a single Gateway_Chassis entry on the
1914 * Port_Binding attached to the redirect_chassis
1915 * name */
1916 struct sbrec_gateway_chassis *gw_chassis =
1917 sbrec_gateway_chassis_insert(ctx->ovnsb_txn);
1918
1919 char *gwc_name = xasprintf("%s_%s", op->nbrp->name,
1920 chassis->name);
1921
1922 /* XXX: Again, here, we could just update an existing
1923 * Gateway_Chassis, instead of creating a new one
1924 * and replacing it */
1925 sbrec_gateway_chassis_set_name(gw_chassis, gwc_name);
1926 sbrec_gateway_chassis_set_priority(gw_chassis, 0);
1927 sbrec_gateway_chassis_set_chassis(gw_chassis, chassis);
1928 sbrec_gateway_chassis_set_external_ids(gw_chassis,
1929 &op->nbrp->external_ids);
1930 sbrec_port_binding_set_gateway_chassis(op->sb,
1931 &gw_chassis, 1);
1932 free(gwc_name);
1933 }
1934 } else {
1935 VLOG_WARN("chassis name '%s' from redirect from logical "
1936 " router port '%s' redirect-chassis not found",
1937 redirect_chassis, op->nbrp->name);
1938 if (op->sb->n_gateway_chassis) {
1939 sbrec_port_binding_set_gateway_chassis(op->sb, NULL,
1940 0);
1941 }
1942 }
1943 }
1944 smap_add(&new, "distributed-port", op->nbrp->name);
1945 } else {
1946 if (op->peer) {
1947 smap_add(&new, "peer", op->peer->key);
1948 }
1949 if (chassis_name) {
1950 smap_add(&new, "l3gateway-chassis", chassis_name);
1951 }
1952 }
1953 sbrec_port_binding_set_options(op->sb, &new);
1954 smap_destroy(&new);
1955
1956 sbrec_port_binding_set_parent_port(op->sb, NULL);
1957 sbrec_port_binding_set_tag(op->sb, NULL, 0);
1958
1959 struct ds s = DS_EMPTY_INITIALIZER;
1960 ds_put_cstr(&s, op->nbrp->mac);
1961 for (int i = 0; i < op->nbrp->n_networks; ++i) {
1962 ds_put_format(&s, " %s", op->nbrp->networks[i]);
1963 }
1964 const char *addresses = ds_cstr(&s);
1965 sbrec_port_binding_set_mac(op->sb, &addresses, 1);
1966 ds_destroy(&s);
1967
1968 struct smap ids = SMAP_INITIALIZER(&ids);
1969 sbrec_port_binding_set_external_ids(op->sb, &ids);
1970 } else {
1971 if (strcmp(op->nbsp->type, "router")) {
1972 uint32_t queue_id = smap_get_int(
1973 &op->sb->options, "qdisc_queue_id", 0);
1974 bool has_qos = port_has_qos_params(&op->nbsp->options);
1975 struct smap options;
1976
1977 if (op->sb->chassis && has_qos && !queue_id) {
1978 queue_id = allocate_chassis_queueid(chassis_qdisc_queues,
1979 op->sb->chassis);
1980 } else if (!has_qos && queue_id) {
1981 free_chassis_queueid(chassis_qdisc_queues,
1982 op->sb->chassis,
1983 queue_id);
1984 queue_id = 0;
1985 }
1986
1987 smap_clone(&options, &op->nbsp->options);
1988 if (queue_id) {
1989 smap_add_format(&options,
1990 "qdisc_queue_id", "%d", queue_id);
1991 }
1992 sbrec_port_binding_set_options(op->sb, &options);
1993 smap_destroy(&options);
1994 if (ovn_is_known_nb_lsp_type(op->nbsp->type)) {
1995 sbrec_port_binding_set_type(op->sb, op->nbsp->type);
1996 } else {
1997 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1998 VLOG_WARN_RL(
1999 &rl, "Unknown port type '%s' set on logical switch '%s'.",
2000 op->nbsp->type, op->nbsp->name);
2001 }
2002 } else {
2003 const char *chassis = NULL;
2004 if (op->peer && op->peer->od && op->peer->od->nbr) {
2005 chassis = smap_get(&op->peer->od->nbr->options, "chassis");
2006 }
2007
2008 /* A switch port connected to a gateway router is also of
2009 * type "l3gateway". */
2010 if (chassis) {
2011 sbrec_port_binding_set_type(op->sb, "l3gateway");
2012 } else {
2013 sbrec_port_binding_set_type(op->sb, "patch");
2014 }
2015
2016 const char *router_port = smap_get(&op->nbsp->options,
2017 "router-port");
2018 if (router_port || chassis) {
2019 struct smap new;
2020 smap_init(&new);
2021 if (router_port) {
2022 smap_add(&new, "peer", router_port);
2023 }
2024 if (chassis) {
2025 smap_add(&new, "l3gateway-chassis", chassis);
2026 }
2027 sbrec_port_binding_set_options(op->sb, &new);
2028 smap_destroy(&new);
2029 }
2030
2031 const char *nat_addresses = smap_get(&op->nbsp->options,
2032 "nat-addresses");
2033 if (nat_addresses && !strcmp(nat_addresses, "router")) {
2034 if (op->peer && op->peer->od
2035 && (chassis || op->peer->od->l3redirect_port)) {
2036 size_t n_nats;
2037 char **nats = get_nat_addresses(op->peer, &n_nats);
2038 if (n_nats) {
2039 sbrec_port_binding_set_nat_addresses(op->sb,
2040 (const char **) nats, n_nats);
2041 for (size_t i = 0; i < n_nats; i++) {
2042 free(nats[i]);
2043 }
2044 free(nats);
2045 } else {
2046 sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0);
2047 }
2048 } else {
2049 sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0);
2050 }
2051 /* Only accept manual specification of ethernet address
2052 * followed by IPv4 addresses on type "l3gateway" ports. */
2053 } else if (nat_addresses && chassis) {
2054 struct lport_addresses laddrs;
2055 if (!extract_lsp_addresses(nat_addresses, &laddrs)) {
2056 static struct vlog_rate_limit rl =
2057 VLOG_RATE_LIMIT_INIT(1, 1);
2058 VLOG_WARN_RL(&rl, "Error extracting nat-addresses.");
2059 sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0);
2060 } else {
2061 sbrec_port_binding_set_nat_addresses(op->sb,
2062 &nat_addresses, 1);
2063 destroy_lport_addresses(&laddrs);
2064 }
2065 } else {
2066 sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0);
2067 }
2068 }
2069 sbrec_port_binding_set_parent_port(op->sb, op->nbsp->parent_name);
2070 sbrec_port_binding_set_tag(op->sb, op->nbsp->tag, op->nbsp->n_tag);
2071 sbrec_port_binding_set_mac(op->sb, (const char **) op->nbsp->addresses,
2072 op->nbsp->n_addresses);
2073
2074 struct smap ids = SMAP_INITIALIZER(&ids);
2075 smap_clone(&ids, &op->nbsp->external_ids);
2076 const char *name = smap_get(&ids, "neutron:port_name");
2077 if (name && name[0]) {
2078 smap_add(&ids, "name", name);
2079 }
2080 sbrec_port_binding_set_external_ids(op->sb, &ids);
2081 smap_destroy(&ids);
2082 }
2083 }
2084
2085 /* Remove mac_binding entries that refer to logical_ports which are
2086 * deleted. */
2087 static void
2088 cleanup_mac_bindings(struct northd_context *ctx, struct hmap *ports)
2089 {
2090 const struct sbrec_mac_binding *b, *n;
2091 SBREC_MAC_BINDING_FOR_EACH_SAFE (b, n, ctx->ovnsb_idl) {
2092 if (!ovn_port_find(ports, b->logical_port)) {
2093 sbrec_mac_binding_delete(b);
2094 }
2095 }
2096 }
2097
2098 /* Updates the southbound Port_Binding table so that it contains the logical
2099 * switch ports specified by the northbound database.
2100 *
2101 * Initializes 'ports' to contain a "struct ovn_port" for every logical port,
2102 * using the "struct ovn_datapath"s in 'datapaths' to look up logical
2103 * datapaths. */
2104 static void
2105 build_ports(struct northd_context *ctx,
2106 struct ovsdb_idl_index *sbrec_chassis_by_name,
2107 struct hmap *datapaths, struct hmap *ports)
2108 {
2109 struct ovs_list sb_only, nb_only, both;
2110 struct hmap tag_alloc_table = HMAP_INITIALIZER(&tag_alloc_table);
2111 struct hmap chassis_qdisc_queues = HMAP_INITIALIZER(&chassis_qdisc_queues);
2112
2113 join_logical_ports(ctx, datapaths, ports, &chassis_qdisc_queues,
2114 &tag_alloc_table, &sb_only, &nb_only, &both);
2115
2116 struct ovn_port *op, *next;
2117 /* For logical ports that are in both databases, update the southbound
2118 * record based on northbound data. Also index the in-use tunnel_keys.
2119 * For logical ports that are in NB database, do any tag allocation
2120 * needed. */
2121 LIST_FOR_EACH_SAFE (op, next, list, &both) {
2122 if (op->nbsp) {
2123 tag_alloc_create_new_tag(&tag_alloc_table, op->nbsp);
2124 }
2125 ovn_port_update_sbrec(ctx, sbrec_chassis_by_name,
2126 op, &chassis_qdisc_queues);
2127
2128 add_tnlid(&op->od->port_tnlids, op->sb->tunnel_key);
2129 if (op->sb->tunnel_key > op->od->port_key_hint) {
2130 op->od->port_key_hint = op->sb->tunnel_key;
2131 }
2132 }
2133
2134 /* Add southbound record for each unmatched northbound record. */
2135 LIST_FOR_EACH_SAFE (op, next, list, &nb_only) {
2136 uint16_t tunnel_key = ovn_port_allocate_key(op->od);
2137 if (!tunnel_key) {
2138 continue;
2139 }
2140
2141 op->sb = sbrec_port_binding_insert(ctx->ovnsb_txn);
2142 ovn_port_update_sbrec(ctx, sbrec_chassis_by_name, op,
2143 &chassis_qdisc_queues);
2144
2145 sbrec_port_binding_set_logical_port(op->sb, op->key);
2146 sbrec_port_binding_set_tunnel_key(op->sb, tunnel_key);
2147 }
2148
2149 bool remove_mac_bindings = false;
2150 if (!ovs_list_is_empty(&sb_only)) {
2151 remove_mac_bindings = true;
2152 }
2153
2154 /* Delete southbound records without northbound matches. */
2155 LIST_FOR_EACH_SAFE(op, next, list, &sb_only) {
2156 ovs_list_remove(&op->list);
2157 sbrec_port_binding_delete(op->sb);
2158 ovn_port_destroy(ports, op);
2159 }
2160 if (remove_mac_bindings) {
2161 cleanup_mac_bindings(ctx, ports);
2162 }
2163
2164 tag_alloc_destroy(&tag_alloc_table);
2165 destroy_chassis_queues(&chassis_qdisc_queues);
2166 }
2167 \f
2168 #define OVN_MIN_MULTICAST 32768
2169 #define OVN_MAX_MULTICAST 65535
2170
2171 struct multicast_group {
2172 const char *name;
2173 uint16_t key; /* OVN_MIN_MULTICAST...OVN_MAX_MULTICAST. */
2174 };
2175
2176 #define MC_FLOOD "_MC_flood"
2177 static const struct multicast_group mc_flood = { MC_FLOOD, 65535 };
2178
2179 #define MC_UNKNOWN "_MC_unknown"
2180 static const struct multicast_group mc_unknown = { MC_UNKNOWN, 65534 };
2181
2182 static bool
2183 multicast_group_equal(const struct multicast_group *a,
2184 const struct multicast_group *b)
2185 {
2186 return !strcmp(a->name, b->name) && a->key == b->key;
2187 }
2188
2189 /* Multicast group entry. */
2190 struct ovn_multicast {
2191 struct hmap_node hmap_node; /* Index on 'datapath' and 'key'. */
2192 struct ovn_datapath *datapath;
2193 const struct multicast_group *group;
2194
2195 struct ovn_port **ports;
2196 size_t n_ports, allocated_ports;
2197 };
2198
2199 static uint32_t
2200 ovn_multicast_hash(const struct ovn_datapath *datapath,
2201 const struct multicast_group *group)
2202 {
2203 return hash_pointer(datapath, group->key);
2204 }
2205
2206 static struct ovn_multicast *
2207 ovn_multicast_find(struct hmap *mcgroups, struct ovn_datapath *datapath,
2208 const struct multicast_group *group)
2209 {
2210 struct ovn_multicast *mc;
2211
2212 HMAP_FOR_EACH_WITH_HASH (mc, hmap_node,
2213 ovn_multicast_hash(datapath, group), mcgroups) {
2214 if (mc->datapath == datapath
2215 && multicast_group_equal(mc->group, group)) {
2216 return mc;
2217 }
2218 }
2219 return NULL;
2220 }
2221
2222 static void
2223 ovn_multicast_add(struct hmap *mcgroups, const struct multicast_group *group,
2224 struct ovn_port *port)
2225 {
2226 struct ovn_datapath *od = port->od;
2227 struct ovn_multicast *mc = ovn_multicast_find(mcgroups, od, group);
2228 if (!mc) {
2229 mc = xmalloc(sizeof *mc);
2230 hmap_insert(mcgroups, &mc->hmap_node, ovn_multicast_hash(od, group));
2231 mc->datapath = od;
2232 mc->group = group;
2233 mc->n_ports = 0;
2234 mc->allocated_ports = 4;
2235 mc->ports = xmalloc(mc->allocated_ports * sizeof *mc->ports);
2236 }
2237 if (mc->n_ports >= mc->allocated_ports) {
2238 mc->ports = x2nrealloc(mc->ports, &mc->allocated_ports,
2239 sizeof *mc->ports);
2240 }
2241 mc->ports[mc->n_ports++] = port;
2242 }
2243
2244 static void
2245 ovn_multicast_destroy(struct hmap *mcgroups, struct ovn_multicast *mc)
2246 {
2247 if (mc) {
2248 hmap_remove(mcgroups, &mc->hmap_node);
2249 free(mc->ports);
2250 free(mc);
2251 }
2252 }
2253
2254 static void
2255 ovn_multicast_update_sbrec(const struct ovn_multicast *mc,
2256 const struct sbrec_multicast_group *sb)
2257 {
2258 struct sbrec_port_binding **ports = xmalloc(mc->n_ports * sizeof *ports);
2259 for (size_t i = 0; i < mc->n_ports; i++) {
2260 ports[i] = CONST_CAST(struct sbrec_port_binding *, mc->ports[i]->sb);
2261 }
2262 sbrec_multicast_group_set_ports(sb, ports, mc->n_ports);
2263 free(ports);
2264 }
2265 \f
2266 /* Logical flow generation.
2267 *
2268 * This code generates the Logical_Flow table in the southbound database, as a
2269 * function of most of the northbound database.
2270 */
2271
2272 struct ovn_lflow {
2273 struct hmap_node hmap_node;
2274
2275 struct ovn_datapath *od;
2276 enum ovn_stage stage;
2277 uint16_t priority;
2278 char *match;
2279 char *actions;
2280 char *stage_hint;
2281 const char *where;
2282 };
2283
2284 static size_t
2285 ovn_lflow_hash(const struct ovn_lflow *lflow)
2286 {
2287 return ovn_logical_flow_hash(&lflow->od->sb->header_.uuid,
2288 ovn_stage_get_table(lflow->stage),
2289 ovn_stage_get_pipeline_name(lflow->stage),
2290 lflow->priority, lflow->match,
2291 lflow->actions);
2292 }
2293
2294 static bool
2295 ovn_lflow_equal(const struct ovn_lflow *a, const struct ovn_lflow *b)
2296 {
2297 return (a->od == b->od
2298 && a->stage == b->stage
2299 && a->priority == b->priority
2300 && !strcmp(a->match, b->match)
2301 && !strcmp(a->actions, b->actions));
2302 }
2303
2304 static void
2305 ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
2306 enum ovn_stage stage, uint16_t priority,
2307 char *match, char *actions, char *stage_hint,
2308 const char *where)
2309 {
2310 lflow->od = od;
2311 lflow->stage = stage;
2312 lflow->priority = priority;
2313 lflow->match = match;
2314 lflow->actions = actions;
2315 lflow->stage_hint = stage_hint;
2316 lflow->where = where;
2317 }
2318
2319 /* Adds a row with the specified contents to the Logical_Flow table. */
2320 static void
2321 ovn_lflow_add_at(struct hmap *lflow_map, struct ovn_datapath *od,
2322 enum ovn_stage stage, uint16_t priority,
2323 const char *match, const char *actions,
2324 const char *stage_hint, const char *where)
2325 {
2326 ovs_assert(ovn_stage_to_datapath_type(stage) == ovn_datapath_get_type(od));
2327
2328 struct ovn_lflow *lflow = xmalloc(sizeof *lflow);
2329 ovn_lflow_init(lflow, od, stage, priority,
2330 xstrdup(match), xstrdup(actions),
2331 nullable_xstrdup(stage_hint), where);
2332 hmap_insert(lflow_map, &lflow->hmap_node, ovn_lflow_hash(lflow));
2333 }
2334
2335 /* Adds a row with the specified contents to the Logical_Flow table. */
2336 #define ovn_lflow_add_with_hint(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, \
2337 ACTIONS, STAGE_HINT) \
2338 ovn_lflow_add_at(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, ACTIONS, \
2339 STAGE_HINT, OVS_SOURCE_LOCATOR)
2340
2341 #define ovn_lflow_add(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, ACTIONS) \
2342 ovn_lflow_add_with_hint(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, \
2343 ACTIONS, NULL)
2344
2345 static struct ovn_lflow *
2346 ovn_lflow_find(struct hmap *lflows, struct ovn_datapath *od,
2347 enum ovn_stage stage, uint16_t priority,
2348 const char *match, const char *actions, uint32_t hash)
2349 {
2350 struct ovn_lflow target;
2351 ovn_lflow_init(&target, od, stage, priority,
2352 CONST_CAST(char *, match), CONST_CAST(char *, actions),
2353 NULL, NULL);
2354
2355 struct ovn_lflow *lflow;
2356 HMAP_FOR_EACH_WITH_HASH (lflow, hmap_node, hash, lflows) {
2357 if (ovn_lflow_equal(lflow, &target)) {
2358 return lflow;
2359 }
2360 }
2361 return NULL;
2362 }
2363
2364 static void
2365 ovn_lflow_destroy(struct hmap *lflows, struct ovn_lflow *lflow)
2366 {
2367 if (lflow) {
2368 hmap_remove(lflows, &lflow->hmap_node);
2369 free(lflow->match);
2370 free(lflow->actions);
2371 free(lflow->stage_hint);
2372 free(lflow);
2373 }
2374 }
2375
2376 /* Appends port security constraints on L2 address field 'eth_addr_field'
2377 * (e.g. "eth.src" or "eth.dst") to 'match'. 'ps_addrs', with 'n_ps_addrs'
2378 * elements, is the collection of port_security constraints from an
2379 * OVN_NB Logical_Switch_Port row generated by extract_lsp_addresses(). */
2380 static void
2381 build_port_security_l2(const char *eth_addr_field,
2382 struct lport_addresses *ps_addrs,
2383 unsigned int n_ps_addrs,
2384 struct ds *match)
2385 {
2386 if (!n_ps_addrs) {
2387 return;
2388 }
2389
2390 ds_put_format(match, " && %s == {", eth_addr_field);
2391
2392 for (size_t i = 0; i < n_ps_addrs; i++) {
2393 ds_put_format(match, "%s ", ps_addrs[i].ea_s);
2394 }
2395 ds_chomp(match, ' ');
2396 ds_put_cstr(match, "}");
2397 }
2398
2399 static void
2400 build_port_security_ipv6_nd_flow(
2401 struct ds *match, struct eth_addr ea, struct ipv6_netaddr *ipv6_addrs,
2402 int n_ipv6_addrs)
2403 {
2404 ds_put_format(match, " && ip6 && nd && ((nd.sll == "ETH_ADDR_FMT" || "
2405 "nd.sll == "ETH_ADDR_FMT") || ((nd.tll == "ETH_ADDR_FMT" || "
2406 "nd.tll == "ETH_ADDR_FMT")", ETH_ADDR_ARGS(eth_addr_zero),
2407 ETH_ADDR_ARGS(ea), ETH_ADDR_ARGS(eth_addr_zero),
2408 ETH_ADDR_ARGS(ea));
2409 if (!n_ipv6_addrs) {
2410 ds_put_cstr(match, "))");
2411 return;
2412 }
2413
2414 char ip6_str[INET6_ADDRSTRLEN + 1];
2415 struct in6_addr lla;
2416 in6_generate_lla(ea, &lla);
2417 memset(ip6_str, 0, sizeof(ip6_str));
2418 ipv6_string_mapped(ip6_str, &lla);
2419 ds_put_format(match, " && (nd.target == %s", ip6_str);
2420
2421 for(int i = 0; i < n_ipv6_addrs; i++) {
2422 memset(ip6_str, 0, sizeof(ip6_str));
2423 ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr);
2424 ds_put_format(match, " || nd.target == %s", ip6_str);
2425 }
2426
2427 ds_put_format(match, ")))");
2428 }
2429
2430 static void
2431 build_port_security_ipv6_flow(
2432 enum ovn_pipeline pipeline, struct ds *match, struct eth_addr ea,
2433 struct ipv6_netaddr *ipv6_addrs, int n_ipv6_addrs)
2434 {
2435 char ip6_str[INET6_ADDRSTRLEN + 1];
2436
2437 ds_put_format(match, " && %s == {",
2438 pipeline == P_IN ? "ip6.src" : "ip6.dst");
2439
2440 /* Allow link-local address. */
2441 struct in6_addr lla;
2442 in6_generate_lla(ea, &lla);
2443 ipv6_string_mapped(ip6_str, &lla);
2444 ds_put_format(match, "%s, ", ip6_str);
2445
2446 /* Allow ip6.dst=ff00::/8 for multicast packets */
2447 if (pipeline == P_OUT) {
2448 ds_put_cstr(match, "ff00::/8, ");
2449 }
2450 for(int i = 0; i < n_ipv6_addrs; i++) {
2451 ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr);
2452 ds_put_format(match, "%s, ", ip6_str);
2453 }
2454 /* Replace ", " by "}". */
2455 ds_chomp(match, ' ');
2456 ds_chomp(match, ',');
2457 ds_put_cstr(match, "}");
2458 }
2459
2460 /**
2461 * Build port security constraints on ARP and IPv6 ND fields
2462 * and add logical flows to S_SWITCH_IN_PORT_SEC_ND stage.
2463 *
2464 * For each port security of the logical port, following
2465 * logical flows are added
2466 * - If the port security has no IP (both IPv4 and IPv6) or
2467 * if it has IPv4 address(es)
2468 * - Priority 90 flow to allow ARP packets for known MAC addresses
2469 * in the eth.src and arp.spa fields. If the port security
2470 * has IPv4 addresses, allow known IPv4 addresses in the arp.tpa field.
2471 *
2472 * - If the port security has no IP (both IPv4 and IPv6) or
2473 * if it has IPv6 address(es)
2474 * - Priority 90 flow to allow IPv6 ND packets for known MAC addresses
2475 * in the eth.src and nd.sll/nd.tll fields. If the port security
2476 * has IPv6 addresses, allow known IPv6 addresses in the nd.target field
2477 * for IPv6 Neighbor Advertisement packet.
2478 *
2479 * - Priority 80 flow to drop ARP and IPv6 ND packets.
2480 */
2481 static void
2482 build_port_security_nd(struct ovn_port *op, struct hmap *lflows)
2483 {
2484 struct ds match = DS_EMPTY_INITIALIZER;
2485
2486 for (size_t i = 0; i < op->n_ps_addrs; i++) {
2487 struct lport_addresses *ps = &op->ps_addrs[i];
2488
2489 bool no_ip = !(ps->n_ipv4_addrs || ps->n_ipv6_addrs);
2490
2491 ds_clear(&match);
2492 if (ps->n_ipv4_addrs || no_ip) {
2493 ds_put_format(&match,
2494 "inport == %s && eth.src == %s && arp.sha == %s",
2495 op->json_key, ps->ea_s, ps->ea_s);
2496
2497 if (ps->n_ipv4_addrs) {
2498 ds_put_cstr(&match, " && arp.spa == {");
2499 for (size_t j = 0; j < ps->n_ipv4_addrs; j++) {
2500 /* When the netmask is applied, if the host portion is
2501 * non-zero, the host can only use the specified
2502 * address in the arp.spa. If zero, the host is allowed
2503 * to use any address in the subnet. */
2504 if (ps->ipv4_addrs[j].plen == 32
2505 || ps->ipv4_addrs[j].addr & ~ps->ipv4_addrs[j].mask) {
2506 ds_put_cstr(&match, ps->ipv4_addrs[j].addr_s);
2507 } else {
2508 ds_put_format(&match, "%s/%d",
2509 ps->ipv4_addrs[j].network_s,
2510 ps->ipv4_addrs[j].plen);
2511 }
2512 ds_put_cstr(&match, ", ");
2513 }
2514 ds_chomp(&match, ' ');
2515 ds_chomp(&match, ',');
2516 ds_put_cstr(&match, "}");
2517 }
2518 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90,
2519 ds_cstr(&match), "next;");
2520 }
2521
2522 if (ps->n_ipv6_addrs || no_ip) {
2523 ds_clear(&match);
2524 ds_put_format(&match, "inport == %s && eth.src == %s",
2525 op->json_key, ps->ea_s);
2526 build_port_security_ipv6_nd_flow(&match, ps->ea, ps->ipv6_addrs,
2527 ps->n_ipv6_addrs);
2528 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90,
2529 ds_cstr(&match), "next;");
2530 }
2531 }
2532
2533 ds_clear(&match);
2534 ds_put_format(&match, "inport == %s && (arp || nd)", op->json_key);
2535 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 80,
2536 ds_cstr(&match), "drop;");
2537 ds_destroy(&match);
2538 }
2539
2540 /**
2541 * Build port security constraints on IPv4 and IPv6 src and dst fields
2542 * and add logical flows to S_SWITCH_(IN/OUT)_PORT_SEC_IP stage.
2543 *
2544 * For each port security of the logical port, following
2545 * logical flows are added
2546 * - If the port security has IPv4 addresses,
2547 * - Priority 90 flow to allow IPv4 packets for known IPv4 addresses
2548 *
2549 * - If the port security has IPv6 addresses,
2550 * - Priority 90 flow to allow IPv6 packets for known IPv6 addresses
2551 *
2552 * - If the port security has IPv4 addresses or IPv6 addresses or both
2553 * - Priority 80 flow to drop all IPv4 and IPv6 traffic
2554 */
2555 static void
2556 build_port_security_ip(enum ovn_pipeline pipeline, struct ovn_port *op,
2557 struct hmap *lflows)
2558 {
2559 char *port_direction;
2560 enum ovn_stage stage;
2561 if (pipeline == P_IN) {
2562 port_direction = "inport";
2563 stage = S_SWITCH_IN_PORT_SEC_IP;
2564 } else {
2565 port_direction = "outport";
2566 stage = S_SWITCH_OUT_PORT_SEC_IP;
2567 }
2568
2569 for (size_t i = 0; i < op->n_ps_addrs; i++) {
2570 struct lport_addresses *ps = &op->ps_addrs[i];
2571
2572 if (!(ps->n_ipv4_addrs || ps->n_ipv6_addrs)) {
2573 continue;
2574 }
2575
2576 if (ps->n_ipv4_addrs) {
2577 struct ds match = DS_EMPTY_INITIALIZER;
2578 if (pipeline == P_IN) {
2579 /* Permit use of the unspecified address for DHCP discovery */
2580 struct ds dhcp_match = DS_EMPTY_INITIALIZER;
2581 ds_put_format(&dhcp_match, "inport == %s"
2582 " && eth.src == %s"
2583 " && ip4.src == 0.0.0.0"
2584 " && ip4.dst == 255.255.255.255"
2585 " && udp.src == 68 && udp.dst == 67",
2586 op->json_key, ps->ea_s);
2587 ovn_lflow_add(lflows, op->od, stage, 90,
2588 ds_cstr(&dhcp_match), "next;");
2589 ds_destroy(&dhcp_match);
2590 ds_put_format(&match, "inport == %s && eth.src == %s"
2591 " && ip4.src == {", op->json_key,
2592 ps->ea_s);
2593 } else {
2594 ds_put_format(&match, "outport == %s && eth.dst == %s"
2595 " && ip4.dst == {255.255.255.255, 224.0.0.0/4, ",
2596 op->json_key, ps->ea_s);
2597 }
2598
2599 for (int j = 0; j < ps->n_ipv4_addrs; j++) {
2600 ovs_be32 mask = ps->ipv4_addrs[j].mask;
2601 /* When the netmask is applied, if the host portion is
2602 * non-zero, the host can only use the specified
2603 * address. If zero, the host is allowed to use any
2604 * address in the subnet.
2605 */
2606 if (ps->ipv4_addrs[j].plen == 32
2607 || ps->ipv4_addrs[j].addr & ~mask) {
2608 ds_put_format(&match, "%s", ps->ipv4_addrs[j].addr_s);
2609 if (pipeline == P_OUT && ps->ipv4_addrs[j].plen != 32) {
2610 /* Host is also allowed to receive packets to the
2611 * broadcast address in the specified subnet. */
2612 ds_put_format(&match, ", %s",
2613 ps->ipv4_addrs[j].bcast_s);
2614 }
2615 } else {
2616 /* host portion is zero */
2617 ds_put_format(&match, "%s/%d", ps->ipv4_addrs[j].network_s,
2618 ps->ipv4_addrs[j].plen);
2619 }
2620 ds_put_cstr(&match, ", ");
2621 }
2622
2623 /* Replace ", " by "}". */
2624 ds_chomp(&match, ' ');
2625 ds_chomp(&match, ',');
2626 ds_put_cstr(&match, "}");
2627 ovn_lflow_add(lflows, op->od, stage, 90, ds_cstr(&match), "next;");
2628 ds_destroy(&match);
2629 }
2630
2631 if (ps->n_ipv6_addrs) {
2632 struct ds match = DS_EMPTY_INITIALIZER;
2633 if (pipeline == P_IN) {
2634 /* Permit use of unspecified address for duplicate address
2635 * detection */
2636 struct ds dad_match = DS_EMPTY_INITIALIZER;
2637 ds_put_format(&dad_match, "inport == %s"
2638 " && eth.src == %s"
2639 " && ip6.src == ::"
2640 " && ip6.dst == ff02::/16"
2641 " && icmp6.type == {131, 135, 143}", op->json_key,
2642 ps->ea_s);
2643 ovn_lflow_add(lflows, op->od, stage, 90,
2644 ds_cstr(&dad_match), "next;");
2645 ds_destroy(&dad_match);
2646 }
2647 ds_put_format(&match, "%s == %s && %s == %s",
2648 port_direction, op->json_key,
2649 pipeline == P_IN ? "eth.src" : "eth.dst", ps->ea_s);
2650 build_port_security_ipv6_flow(pipeline, &match, ps->ea,
2651 ps->ipv6_addrs, ps->n_ipv6_addrs);
2652 ovn_lflow_add(lflows, op->od, stage, 90,
2653 ds_cstr(&match), "next;");
2654 ds_destroy(&match);
2655 }
2656
2657 char *match = xasprintf("%s == %s && %s == %s && ip",
2658 port_direction, op->json_key,
2659 pipeline == P_IN ? "eth.src" : "eth.dst",
2660 ps->ea_s);
2661 ovn_lflow_add(lflows, op->od, stage, 80, match, "drop;");
2662 free(match);
2663 }
2664
2665 }
2666
2667 static bool
2668 lsp_is_enabled(const struct nbrec_logical_switch_port *lsp)
2669 {
2670 return !lsp->enabled || *lsp->enabled;
2671 }
2672
2673 static bool
2674 lsp_is_up(const struct nbrec_logical_switch_port *lsp)
2675 {
2676 return !lsp->up || *lsp->up;
2677 }
2678
2679 static bool
2680 build_dhcpv4_action(struct ovn_port *op, ovs_be32 offer_ip,
2681 struct ds *options_action, struct ds *response_action,
2682 struct ds *ipv4_addr_match)
2683 {
2684 if (!op->nbsp->dhcpv4_options) {
2685 /* CMS has disabled native DHCPv4 for this lport. */
2686 return false;
2687 }
2688
2689 ovs_be32 host_ip, mask;
2690 char *error = ip_parse_masked(op->nbsp->dhcpv4_options->cidr, &host_ip,
2691 &mask);
2692 if (error || ((offer_ip ^ host_ip) & mask)) {
2693 /* Either
2694 * - cidr defined is invalid or
2695 * - the offer ip of the logical port doesn't belong to the cidr
2696 * defined in the DHCPv4 options.
2697 * */
2698 free(error);
2699 return false;
2700 }
2701
2702 const char *server_ip = smap_get(
2703 &op->nbsp->dhcpv4_options->options, "server_id");
2704 const char *server_mac = smap_get(
2705 &op->nbsp->dhcpv4_options->options, "server_mac");
2706 const char *lease_time = smap_get(
2707 &op->nbsp->dhcpv4_options->options, "lease_time");
2708
2709 if (!(server_ip && server_mac && lease_time)) {
2710 /* "server_id", "server_mac" and "lease_time" should be
2711 * present in the dhcp_options. */
2712 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
2713 VLOG_WARN_RL(&rl, "Required DHCPv4 options not defined for lport - %s",
2714 op->json_key);
2715 return false;
2716 }
2717
2718 struct smap dhcpv4_options = SMAP_INITIALIZER(&dhcpv4_options);
2719 smap_clone(&dhcpv4_options, &op->nbsp->dhcpv4_options->options);
2720
2721 /* server_mac is not DHCPv4 option, delete it from the smap. */
2722 smap_remove(&dhcpv4_options, "server_mac");
2723 char *netmask = xasprintf(IP_FMT, IP_ARGS(mask));
2724 smap_add(&dhcpv4_options, "netmask", netmask);
2725 free(netmask);
2726
2727 ds_put_format(options_action,
2728 REGBIT_DHCP_OPTS_RESULT" = put_dhcp_opts(offerip = "
2729 IP_FMT", ", IP_ARGS(offer_ip));
2730
2731 /* We're not using SMAP_FOR_EACH because we want a consistent order of the
2732 * options on different architectures (big or little endian, SSE4.2) */
2733 const struct smap_node **sorted_opts = smap_sort(&dhcpv4_options);
2734 for (size_t i = 0; i < smap_count(&dhcpv4_options); i++) {
2735 const struct smap_node *node = sorted_opts[i];
2736 ds_put_format(options_action, "%s = %s, ", node->key, node->value);
2737 }
2738 free(sorted_opts);
2739
2740 ds_chomp(options_action, ' ');
2741 ds_chomp(options_action, ',');
2742 ds_put_cstr(options_action, "); next;");
2743
2744 ds_put_format(response_action, "eth.dst = eth.src; eth.src = %s; "
2745 "ip4.dst = "IP_FMT"; ip4.src = %s; udp.src = 67; "
2746 "udp.dst = 68; outport = inport; flags.loopback = 1; "
2747 "output;",
2748 server_mac, IP_ARGS(offer_ip), server_ip);
2749
2750 ds_put_format(ipv4_addr_match,
2751 "ip4.src == "IP_FMT" && ip4.dst == {%s, 255.255.255.255}",
2752 IP_ARGS(offer_ip), server_ip);
2753 smap_destroy(&dhcpv4_options);
2754 return true;
2755 }
2756
2757 static bool
2758 build_dhcpv6_action(struct ovn_port *op, struct in6_addr *offer_ip,
2759 struct ds *options_action, struct ds *response_action)
2760 {
2761 if (!op->nbsp->dhcpv6_options) {
2762 /* CMS has disabled native DHCPv6 for this lport. */
2763 return false;
2764 }
2765
2766 struct in6_addr host_ip, mask;
2767
2768 char *error = ipv6_parse_masked(op->nbsp->dhcpv6_options->cidr, &host_ip,
2769 &mask);
2770 if (error) {
2771 free(error);
2772 return false;
2773 }
2774 struct in6_addr ip6_mask = ipv6_addr_bitxor(offer_ip, &host_ip);
2775 ip6_mask = ipv6_addr_bitand(&ip6_mask, &mask);
2776 if (!ipv6_mask_is_any(&ip6_mask)) {
2777 /* offer_ip doesn't belongs to the cidr defined in lport's DHCPv6
2778 * options.*/
2779 return false;
2780 }
2781
2782 const struct smap *options_map = &op->nbsp->dhcpv6_options->options;
2783 /* "server_id" should be the MAC address. */
2784 const char *server_mac = smap_get(options_map, "server_id");
2785 struct eth_addr ea;
2786 if (!server_mac || !eth_addr_from_string(server_mac, &ea)) {
2787 /* "server_id" should be present in the dhcpv6_options. */
2788 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2789 VLOG_WARN_RL(&rl, "server_id not present in the DHCPv6 options"
2790 " for lport %s", op->json_key);
2791 return false;
2792 }
2793
2794 /* Get the link local IP of the DHCPv6 server from the server MAC. */
2795 struct in6_addr lla;
2796 in6_generate_lla(ea, &lla);
2797
2798 char server_ip[INET6_ADDRSTRLEN + 1];
2799 ipv6_string_mapped(server_ip, &lla);
2800
2801 char ia_addr[INET6_ADDRSTRLEN + 1];
2802 ipv6_string_mapped(ia_addr, offer_ip);
2803
2804 ds_put_format(options_action,
2805 REGBIT_DHCP_OPTS_RESULT" = put_dhcpv6_opts(");
2806
2807 /* Check whether the dhcpv6 options should be configured as stateful.
2808 * Only reply with ia_addr option for dhcpv6 stateful address mode. */
2809 if (!smap_get_bool(options_map, "dhcpv6_stateless", false)) {
2810 ipv6_string_mapped(ia_addr, offer_ip);
2811 ds_put_format(options_action, "ia_addr = %s, ", ia_addr);
2812 }
2813
2814 /* We're not using SMAP_FOR_EACH because we want a consistent order of the
2815 * options on different architectures (big or little endian, SSE4.2) */
2816 const struct smap_node **sorted_opts = smap_sort(options_map);
2817 for (size_t i = 0; i < smap_count(options_map); i++) {
2818 const struct smap_node *node = sorted_opts[i];
2819 if (strcmp(node->key, "dhcpv6_stateless")) {
2820 ds_put_format(options_action, "%s = %s, ", node->key, node->value);
2821 }
2822 }
2823 free(sorted_opts);
2824
2825 ds_chomp(options_action, ' ');
2826 ds_chomp(options_action, ',');
2827 ds_put_cstr(options_action, "); next;");
2828
2829 ds_put_format(response_action, "eth.dst = eth.src; eth.src = %s; "
2830 "ip6.dst = ip6.src; ip6.src = %s; udp.src = 547; "
2831 "udp.dst = 546; outport = inport; flags.loopback = 1; "
2832 "output;",
2833 server_mac, server_ip);
2834
2835 return true;
2836 }
2837
2838 struct ovn_port_group_ls {
2839 struct hmap_node key_node; /* Index on 'key'. */
2840 struct uuid key; /* nb_ls->header_.uuid. */
2841 const struct nbrec_logical_switch *nb_ls;
2842 };
2843
2844 struct ovn_port_group {
2845 struct hmap_node key_node; /* Index on 'key'. */
2846 struct uuid key; /* nb_pg->header_.uuid. */
2847 const struct nbrec_port_group *nb_pg;
2848 struct hmap nb_lswitches; /* NB lswitches related to the port group */
2849 size_t n_acls; /* Number of ACLs applied to the port group */
2850 struct nbrec_acl **acls; /* ACLs applied to the port group */
2851 };
2852
2853 static void
2854 ovn_port_group_ls_add(struct ovn_port_group *pg,
2855 const struct nbrec_logical_switch *nb_ls)
2856 {
2857 struct ovn_port_group_ls *pg_ls = xzalloc(sizeof *pg_ls);
2858 pg_ls->key = nb_ls->header_.uuid;
2859 pg_ls->nb_ls = nb_ls;
2860 hmap_insert(&pg->nb_lswitches, &pg_ls->key_node, uuid_hash(&pg_ls->key));
2861 }
2862
2863 static struct ovn_port_group_ls *
2864 ovn_port_group_ls_find(struct ovn_port_group *pg, const struct uuid *ls_uuid)
2865 {
2866 struct ovn_port_group_ls *pg_ls;
2867
2868 HMAP_FOR_EACH_WITH_HASH (pg_ls, key_node, uuid_hash(ls_uuid),
2869 &pg->nb_lswitches) {
2870 if (uuid_equals(ls_uuid, &pg_ls->key)) {
2871 return pg_ls;
2872 }
2873 }
2874 return NULL;
2875 }
2876
2877 static bool
2878 has_stateful_acl(struct ovn_datapath *od, struct hmap *port_groups)
2879 {
2880 for (size_t i = 0; i < od->nbs->n_acls; i++) {
2881 struct nbrec_acl *acl = od->nbs->acls[i];
2882 if (!strcmp(acl->action, "allow-related")) {
2883 return true;
2884 }
2885 }
2886
2887 struct ovn_port_group *pg;
2888 HMAP_FOR_EACH (pg, key_node, port_groups) {
2889 if (ovn_port_group_ls_find(pg, &od->nbs->header_.uuid)) {
2890 for (size_t i = 0; i < pg->n_acls; i++) {
2891 struct nbrec_acl *acl = pg->acls[i];
2892 if (!strcmp(acl->action, "allow-related")) {
2893 return true;
2894 }
2895 }
2896 }
2897 }
2898 return false;
2899 }
2900
2901 static void
2902 build_pre_acls(struct ovn_datapath *od, struct hmap *lflows,
2903 struct hmap *port_groups)
2904 {
2905 bool has_stateful = has_stateful_acl(od, port_groups);
2906
2907 /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are
2908 * allowed by default. */
2909 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 0, "1", "next;");
2910 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 0, "1", "next;");
2911
2912 /* If there are any stateful ACL rules in this datapath, we must
2913 * send all IP packets through the conntrack action, which handles
2914 * defragmentation, in order to match L4 headers. */
2915 if (has_stateful) {
2916 for (size_t i = 0; i < od->n_router_ports; i++) {
2917 struct ovn_port *op = od->router_ports[i];
2918 /* Can't use ct() for router ports. Consider the
2919 * following configuration: lp1(10.0.0.2) on
2920 * hostA--ls1--lr0--ls2--lp2(10.0.1.2) on hostB, For a
2921 * ping from lp1 to lp2, First, the response will go
2922 * through ct() with a zone for lp2 in the ls2 ingress
2923 * pipeline on hostB. That ct zone knows about this
2924 * connection. Next, it goes through ct() with the zone
2925 * for the router port in the egress pipeline of ls2 on
2926 * hostB. This zone does not know about the connection,
2927 * as the icmp request went through the logical router
2928 * on hostA, not hostB. This would only work with
2929 * distributed conntrack state across all chassis. */
2930 struct ds match_in = DS_EMPTY_INITIALIZER;
2931 struct ds match_out = DS_EMPTY_INITIALIZER;
2932
2933 ds_put_format(&match_in, "ip && inport == %s", op->json_key);
2934 ds_put_format(&match_out, "ip && outport == %s", op->json_key);
2935 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110,
2936 ds_cstr(&match_in), "next;");
2937 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110,
2938 ds_cstr(&match_out), "next;");
2939
2940 ds_destroy(&match_in);
2941 ds_destroy(&match_out);
2942 }
2943 if (od->localnet_port) {
2944 struct ds match_in = DS_EMPTY_INITIALIZER;
2945 struct ds match_out = DS_EMPTY_INITIALIZER;
2946
2947 ds_put_format(&match_in, "ip && inport == %s",
2948 od->localnet_port->json_key);
2949 ds_put_format(&match_out, "ip && outport == %s",
2950 od->localnet_port->json_key);
2951 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110,
2952 ds_cstr(&match_in), "next;");
2953 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110,
2954 ds_cstr(&match_out), "next;");
2955
2956 ds_destroy(&match_in);
2957 ds_destroy(&match_out);
2958 }
2959
2960 /* Ingress and Egress Pre-ACL Table (Priority 110).
2961 *
2962 * Not to do conntrack on ND and ICMP destination
2963 * unreachable packets. */
2964 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110,
2965 "nd || nd_rs || nd_ra || icmp4.type == 3 || "
2966 "icmp6.type == 1 || (tcp && tcp.flags == 4)",
2967 "next;");
2968 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110,
2969 "nd || nd_rs || nd_ra || icmp4.type == 3 || "
2970 "icmp6.type == 1 || (tcp && tcp.flags == 4)",
2971 "next;");
2972
2973 /* Ingress and Egress Pre-ACL Table (Priority 100).
2974 *
2975 * Regardless of whether the ACL is "from-lport" or "to-lport",
2976 * we need rules in both the ingress and egress table, because
2977 * the return traffic needs to be followed.
2978 *
2979 * 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send
2980 * it to conntrack for tracking and defragmentation. */
2981 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 100, "ip",
2982 REGBIT_CONNTRACK_DEFRAG" = 1; next;");
2983 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip",
2984 REGBIT_CONNTRACK_DEFRAG" = 1; next;");
2985 }
2986 }
2987
2988 /* For a 'key' of the form "IP:port" or just "IP", sets 'port' and
2989 * 'ip_address'. The caller must free() the memory allocated for
2990 * 'ip_address'. */
2991 static void
2992 ip_address_and_port_from_lb_key(const char *key, char **ip_address,
2993 uint16_t *port, int *addr_family)
2994 {
2995 struct sockaddr_storage ss;
2996 if (!inet_parse_active(key, 0, &ss)) {
2997 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2998 VLOG_WARN_RL(&rl, "bad ip address or port for load balancer key %s",
2999 key);
3000 return;
3001 }
3002
3003 struct ds s = DS_EMPTY_INITIALIZER;
3004 ss_format_address_nobracks(&ss, &s);
3005 *ip_address = ds_steal_cstr(&s);
3006
3007 *port = ss_get_port(&ss);
3008
3009 *addr_family = ss.ss_family;
3010 }
3011
3012 /*
3013 * Returns true if logical switch is configured with DNS records, false
3014 * otherwise.
3015 */
3016 static bool
3017 ls_has_dns_records(const struct nbrec_logical_switch *nbs)
3018 {
3019 for (size_t i = 0; i < nbs->n_dns_records; i++) {
3020 if (!smap_is_empty(&nbs->dns_records[i]->records)) {
3021 return true;
3022 }
3023 }
3024
3025 return false;
3026 }
3027
3028 static void
3029 build_pre_lb(struct ovn_datapath *od, struct hmap *lflows)
3030 {
3031 /* Do not send ND packets to conntrack */
3032 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 110,
3033 "nd || nd_rs || nd_ra", "next;");
3034 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 110,
3035 "nd || nd_rs || nd_ra", "next;");
3036
3037 /* Allow all packets to go to next tables by default. */
3038 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 0, "1", "next;");
3039 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 0, "1", "next;");
3040
3041 struct sset all_ips = SSET_INITIALIZER(&all_ips);
3042 bool vip_configured = false;
3043 int addr_family = AF_INET;
3044 for (int i = 0; i < od->nbs->n_load_balancer; i++) {
3045 struct nbrec_load_balancer *lb = od->nbs->load_balancer[i];
3046 struct smap *vips = &lb->vips;
3047 struct smap_node *node;
3048
3049 SMAP_FOR_EACH (node, vips) {
3050 vip_configured = true;
3051
3052 /* node->key contains IP:port or just IP. */
3053 char *ip_address = NULL;
3054 uint16_t port;
3055 ip_address_and_port_from_lb_key(node->key, &ip_address, &port,
3056 &addr_family);
3057 if (!ip_address) {
3058 continue;
3059 }
3060
3061 if (!sset_contains(&all_ips, ip_address)) {
3062 sset_add(&all_ips, ip_address);
3063 }
3064
3065 free(ip_address);
3066
3067 /* Ignore L4 port information in the key because fragmented packets
3068 * may not have L4 information. The pre-stateful table will send
3069 * the packet through ct() action to de-fragment. In stateful
3070 * table, we will eventually look at L4 information. */
3071 }
3072 }
3073
3074 /* 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send
3075 * packet to conntrack for defragmentation. */
3076 const char *ip_address;
3077 SSET_FOR_EACH(ip_address, &all_ips) {
3078 char *match;
3079
3080 if (addr_family == AF_INET) {
3081 match = xasprintf("ip && ip4.dst == %s", ip_address);
3082 } else {
3083 match = xasprintf("ip && ip6.dst == %s", ip_address);
3084 }
3085 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB,
3086 100, match, REGBIT_CONNTRACK_DEFRAG" = 1; next;");
3087 free(match);
3088 }
3089
3090 sset_destroy(&all_ips);
3091
3092 if (vip_configured) {
3093 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB,
3094 100, "ip", REGBIT_CONNTRACK_DEFRAG" = 1; next;");
3095 }
3096 }
3097
3098 static void
3099 build_pre_stateful(struct ovn_datapath *od, struct hmap *lflows)
3100 {
3101 /* Ingress and Egress pre-stateful Table (Priority 0): Packets are
3102 * allowed by default. */
3103 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 0, "1", "next;");
3104 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 0, "1", "next;");
3105
3106 /* If REGBIT_CONNTRACK_DEFRAG is set as 1, then the packets should be
3107 * sent to conntrack for tracking and defragmentation. */
3108 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 100,
3109 REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;");
3110 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 100,
3111 REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;");
3112 }
3113
3114 static void
3115 build_acl_log(struct ds *actions, const struct nbrec_acl *acl)
3116 {
3117 if (!acl->log) {
3118 return;
3119 }
3120
3121 ds_put_cstr(actions, "log(");
3122
3123 if (acl->name) {
3124 ds_put_format(actions, "name=\"%s\", ", acl->name);
3125 }
3126
3127 /* If a severity level isn't specified, default to "info". */
3128 if (acl->severity) {
3129 ds_put_format(actions, "severity=%s, ", acl->severity);
3130 } else {
3131 ds_put_format(actions, "severity=info, ");
3132 }
3133
3134 if (!strcmp(acl->action, "drop")) {
3135 ds_put_cstr(actions, "verdict=drop, ");
3136 } else if (!strcmp(acl->action, "reject")) {
3137 ds_put_cstr(actions, "verdict=reject, ");
3138 } else if (!strcmp(acl->action, "allow")
3139 || !strcmp(acl->action, "allow-related")) {
3140 ds_put_cstr(actions, "verdict=allow, ");
3141 }
3142
3143 ds_chomp(actions, ' ');
3144 ds_chomp(actions, ',');
3145 ds_put_cstr(actions, "); ");
3146 }
3147
3148 static void
3149 build_reject_acl_rules(struct ovn_datapath *od, struct hmap *lflows,
3150 enum ovn_stage stage, struct nbrec_acl *acl,
3151 struct ds *extra_match, struct ds *extra_actions)
3152 {
3153 struct ds match = DS_EMPTY_INITIALIZER;
3154 struct ds actions = DS_EMPTY_INITIALIZER;
3155 bool ingress = (stage == S_SWITCH_IN_ACL);
3156
3157 /* TCP */
3158 build_acl_log(&actions, acl);
3159 if (extra_match->length > 0) {
3160 ds_put_format(&match, "(%s) && ", extra_match->string);
3161 }
3162 ds_put_format(&match, "ip4 && tcp && (%s)", acl->match);
3163 ds_put_format(&actions, "reg0 = 0; "
3164 "eth.dst <-> eth.src; ip4.dst <-> ip4.src; "
3165 "tcp_reset { outport <-> inport; %s };",
3166 ingress ? "output;" : "next(pipeline=ingress,table=0);");
3167 ovn_lflow_add(lflows, od, stage, acl->priority + OVN_ACL_PRI_OFFSET + 10,
3168 ds_cstr(&match), ds_cstr(&actions));
3169 ds_clear(&match);
3170 ds_clear(&actions);
3171 build_acl_log(&actions, acl);
3172 if (extra_match->length > 0) {
3173 ds_put_format(&match, "(%s) && ", extra_match->string);
3174 }
3175 ds_put_format(&match, "ip6 && tcp && (%s)", acl->match);
3176 ds_put_format(&actions, "reg0 = 0; "
3177 "eth.dst <-> eth.src; ip6.dst <-> ip6.src; "
3178 "tcp_reset { outport <-> inport; %s };",
3179 ingress ? "output;" : "next(pipeline=ingress,table=0);");
3180 ovn_lflow_add(lflows, od, stage, acl->priority + OVN_ACL_PRI_OFFSET + 10,
3181 ds_cstr(&match), ds_cstr(&actions));
3182
3183 /* IP traffic */
3184 ds_clear(&match);
3185 ds_clear(&actions);
3186 build_acl_log(&actions, acl);
3187 if (extra_match->length > 0) {
3188 ds_put_format(&match, "(%s) && ", extra_match->string);
3189 }
3190 ds_put_format(&match, "ip4 && (%s)", acl->match);
3191 if (extra_actions->length > 0) {
3192 ds_put_format(&actions, "%s ", extra_actions->string);
3193 }
3194 ds_put_format(&actions, "reg0 = 0; "
3195 "eth.dst <-> eth.src; ip4.dst <-> ip4.src; "
3196 "icmp4 { outport <-> inport; %s };",
3197 ingress ? "output;" : "next(pipeline=ingress,table=0);");
3198 ovn_lflow_add(lflows, od, stage, acl->priority + OVN_ACL_PRI_OFFSET,
3199 ds_cstr(&match), ds_cstr(&actions));
3200 ds_clear(&match);
3201 ds_clear(&actions);
3202 build_acl_log(&actions, acl);
3203 if (extra_match->length > 0) {
3204 ds_put_format(&match, "(%s) && ", extra_match->string);
3205 }
3206 ds_put_format(&match, "ip6 && (%s)", acl->match);
3207 if (extra_actions->length > 0) {
3208 ds_put_format(&actions, "%s ", extra_actions->string);
3209 }
3210 ds_put_format(&actions, "reg0 = 0; icmp6 { "
3211 "eth.dst <-> eth.src; ip6.dst <-> ip6.src; "
3212 "outport <-> inport; %s };",
3213 ingress ? "output;" : "next(pipeline=ingress,table=0);");
3214 ovn_lflow_add(lflows, od, stage, acl->priority + OVN_ACL_PRI_OFFSET,
3215 ds_cstr(&match), ds_cstr(&actions));
3216
3217 ds_destroy(&match);
3218 ds_destroy(&actions);
3219 }
3220
3221 static void
3222 consider_acl(struct hmap *lflows, struct ovn_datapath *od,
3223 struct nbrec_acl *acl, bool has_stateful)
3224 {
3225 bool ingress = !strcmp(acl->direction, "from-lport") ? true :false;
3226 enum ovn_stage stage = ingress ? S_SWITCH_IN_ACL : S_SWITCH_OUT_ACL;
3227
3228 char *stage_hint = xasprintf("%08x", acl->header_.uuid.parts[0]);
3229 if (!strcmp(acl->action, "allow")
3230 || !strcmp(acl->action, "allow-related")) {
3231 /* If there are any stateful flows, we must even commit "allow"
3232 * actions. This is because, while the initiater's
3233 * direction may not have any stateful rules, the server's
3234 * may and then its return traffic would not have an
3235 * associated conntrack entry and would return "+invalid". */
3236 if (!has_stateful) {
3237 struct ds actions = DS_EMPTY_INITIALIZER;
3238 build_acl_log(&actions, acl);
3239 ds_put_cstr(&actions, "next;");
3240 ovn_lflow_add_with_hint(lflows, od, stage,
3241 acl->priority + OVN_ACL_PRI_OFFSET,
3242 acl->match, ds_cstr(&actions),
3243 stage_hint);
3244 ds_destroy(&actions);
3245 } else {
3246 struct ds match = DS_EMPTY_INITIALIZER;
3247 struct ds actions = DS_EMPTY_INITIALIZER;
3248
3249 /* Commit the connection tracking entry if it's a new
3250 * connection that matches this ACL. After this commit,
3251 * the reply traffic is allowed by a flow we create at
3252 * priority 65535, defined earlier.
3253 *
3254 * It's also possible that a known connection was marked for
3255 * deletion after a policy was deleted, but the policy was
3256 * re-added while that connection is still known. We catch
3257 * that case here and un-set ct_label.blocked (which will be done
3258 * by ct_commit in the "stateful" stage) to indicate that the
3259 * connection should be allowed to resume.
3260 */
3261 ds_put_format(&match, "((ct.new && !ct.est)"
3262 " || (!ct.new && ct.est && !ct.rpl "
3263 "&& ct_label.blocked == 1)) "
3264 "&& (%s)", acl->match);
3265 ds_put_cstr(&actions, REGBIT_CONNTRACK_COMMIT" = 1; ");
3266 build_acl_log(&actions, acl);
3267 ds_put_cstr(&actions, "next;");
3268 ovn_lflow_add_with_hint(lflows, od, stage,
3269 acl->priority + OVN_ACL_PRI_OFFSET,
3270 ds_cstr(&match),
3271 ds_cstr(&actions),
3272 stage_hint);
3273
3274 /* Match on traffic in the request direction for an established
3275 * connection tracking entry that has not been marked for
3276 * deletion. There is no need to commit here, so we can just
3277 * proceed to the next table. We use this to ensure that this
3278 * connection is still allowed by the currently defined
3279 * policy. */
3280 ds_clear(&match);
3281 ds_clear(&actions);
3282 ds_put_format(&match,
3283 "!ct.new && ct.est && !ct.rpl"
3284 " && ct_label.blocked == 0 && (%s)",
3285 acl->match);
3286
3287 build_acl_log(&actions, acl);
3288 ds_put_cstr(&actions, "next;");
3289 ovn_lflow_add_with_hint(lflows, od, stage,
3290 acl->priority + OVN_ACL_PRI_OFFSET,
3291 ds_cstr(&match), ds_cstr(&actions),
3292 stage_hint);
3293
3294 ds_destroy(&match);
3295 ds_destroy(&actions);
3296 }
3297 } else if (!strcmp(acl->action, "drop")
3298 || !strcmp(acl->action, "reject")) {
3299 struct ds match = DS_EMPTY_INITIALIZER;
3300 struct ds actions = DS_EMPTY_INITIALIZER;
3301
3302 /* The implementation of "drop" differs if stateful ACLs are in
3303 * use for this datapath. In that case, the actions differ
3304 * depending on whether the connection was previously committed
3305 * to the connection tracker with ct_commit. */
3306 if (has_stateful) {
3307 /* If the packet is not part of an established connection, then
3308 * we can simply reject/drop it. */
3309 ds_put_cstr(&match,
3310 "(!ct.est || (ct.est && ct_label.blocked == 1))");
3311 if (!strcmp(acl->action, "reject")) {
3312 build_reject_acl_rules(od, lflows, stage, acl, &match,
3313 &actions);
3314 } else {
3315 ds_put_format(&match, " && (%s)", acl->match);
3316 build_acl_log(&actions, acl);
3317 ds_put_cstr(&actions, "/* drop */");
3318 ovn_lflow_add(lflows, od, stage,
3319 acl->priority + OVN_ACL_PRI_OFFSET,
3320 ds_cstr(&match), ds_cstr(&actions));
3321 }
3322 /* For an existing connection without ct_label set, we've
3323 * encountered a policy change. ACLs previously allowed
3324 * this connection and we committed the connection tracking
3325 * entry. Current policy says that we should drop this
3326 * connection. First, we set bit 0 of ct_label to indicate
3327 * that this connection is set for deletion. By not
3328 * specifying "next;", we implicitly drop the packet after
3329 * updating conntrack state. We would normally defer
3330 * ct_commit() to the "stateful" stage, but since we're
3331 * rejecting/dropping the packet, we go ahead and do it here.
3332 */
3333 ds_clear(&match);
3334 ds_clear(&actions);
3335 ds_put_cstr(&match, "ct.est && ct_label.blocked == 0");
3336 ds_put_cstr(&actions, "ct_commit(ct_label=1/1); ");
3337 if (!strcmp(acl->action, "reject")) {
3338 build_reject_acl_rules(od, lflows, stage, acl, &match,
3339 &actions);
3340 } else {
3341 ds_put_format(&match, " && (%s)", acl->match);
3342 build_acl_log(&actions, acl);
3343 ds_put_cstr(&actions, "/* drop */");
3344 ovn_lflow_add(lflows, od, stage,
3345 acl->priority + OVN_ACL_PRI_OFFSET,
3346 ds_cstr(&match), ds_cstr(&actions));
3347 }
3348 } else {
3349 /* There are no stateful ACLs in use on this datapath,
3350 * so a "reject/drop" ACL is simply the "reject/drop"
3351 * logical flow action in all cases. */
3352 if (!strcmp(acl->action, "reject")) {
3353 build_reject_acl_rules(od, lflows, stage, acl, &match,
3354 &actions);
3355 } else {
3356 build_acl_log(&actions, acl);
3357 ds_put_cstr(&actions, "/* drop */");
3358 ovn_lflow_add(lflows, od, stage,
3359 acl->priority + OVN_ACL_PRI_OFFSET,
3360 acl->match, ds_cstr(&actions));
3361 }
3362 }
3363 ds_destroy(&match);
3364 ds_destroy(&actions);
3365 }
3366 free(stage_hint);
3367 }
3368
3369 static struct ovn_port_group *
3370 ovn_port_group_create(struct hmap *pgs,
3371 const struct nbrec_port_group *nb_pg)
3372 {
3373 struct ovn_port_group *pg = xzalloc(sizeof *pg);
3374 pg->key = nb_pg->header_.uuid;
3375 pg->nb_pg = nb_pg;
3376 pg->n_acls = nb_pg->n_acls;
3377 pg->acls = nb_pg->acls;
3378 hmap_init(&pg->nb_lswitches);
3379 hmap_insert(pgs, &pg->key_node, uuid_hash(&pg->key));
3380 return pg;
3381 }
3382
3383 static void
3384 ovn_port_group_destroy(struct hmap *pgs, struct ovn_port_group *pg)
3385 {
3386 if (pg) {
3387 hmap_remove(pgs, &pg->key_node);
3388 struct ovn_port_group_ls *ls;
3389 HMAP_FOR_EACH_POP (ls, key_node, &pg->nb_lswitches) {
3390 free(ls);
3391 }
3392 hmap_destroy(&pg->nb_lswitches);
3393 free(pg);
3394 }
3395 }
3396
3397 static void
3398 build_port_group_lswitches(struct northd_context *ctx, struct hmap *pgs,
3399 struct hmap *ports)
3400 {
3401 hmap_init(pgs);
3402
3403 const struct nbrec_port_group *nb_pg;
3404 NBREC_PORT_GROUP_FOR_EACH (nb_pg, ctx->ovnnb_idl) {
3405 struct ovn_port_group *pg = ovn_port_group_create(pgs, nb_pg);
3406 for (size_t i = 0; i < nb_pg->n_ports; i++) {
3407 struct ovn_port *op = ovn_port_find(ports, nb_pg->ports[i]->name);
3408 if (!op) {
3409 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
3410 VLOG_ERR_RL(&rl, "lport %s in port group %s not found.",
3411 nb_pg->ports[i]->name,
3412 nb_pg->name);
3413 continue;
3414 }
3415
3416 if (!op->od->nbs) {
3417 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
3418 VLOG_WARN_RL(&rl, "lport %s in port group %s has no lswitch.",
3419 nb_pg->ports[i]->name,
3420 nb_pg->name);
3421 continue;
3422 }
3423
3424 struct ovn_port_group_ls *pg_ls =
3425 ovn_port_group_ls_find(pg, &op->od->nbs->header_.uuid);
3426 if (!pg_ls) {
3427 ovn_port_group_ls_add(pg, op->od->nbs);
3428 }
3429 }
3430 }
3431 }
3432
3433 static void
3434 build_acls(struct ovn_datapath *od, struct hmap *lflows,
3435 struct hmap *port_groups)
3436 {
3437 bool has_stateful = has_stateful_acl(od, port_groups);
3438
3439 /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
3440 * default. A related rule at priority 1 is added below if there
3441 * are any stateful ACLs in this datapath. */
3442 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 0, "1", "next;");
3443 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 0, "1", "next;");
3444
3445 if (has_stateful) {
3446 /* Ingress and Egress ACL Table (Priority 1).
3447 *
3448 * By default, traffic is allowed. This is partially handled by
3449 * the Priority 0 ACL flows added earlier, but we also need to
3450 * commit IP flows. This is because, while the initiater's
3451 * direction may not have any stateful rules, the server's may
3452 * and then its return traffic would not have an associated
3453 * conntrack entry and would return "+invalid".
3454 *
3455 * We use "ct_commit" for a connection that is not already known
3456 * by the connection tracker. Once a connection is committed,
3457 * subsequent packets will hit the flow at priority 0 that just
3458 * uses "next;"
3459 *
3460 * We also check for established connections that have ct_label.blocked
3461 * set on them. That's a connection that was disallowed, but is
3462 * now allowed by policy again since it hit this default-allow flow.
3463 * We need to set ct_label.blocked=0 to let the connection continue,
3464 * which will be done by ct_commit() in the "stateful" stage.
3465 * Subsequent packets will hit the flow at priority 0 that just
3466 * uses "next;". */
3467 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 1,
3468 "ip && (!ct.est || (ct.est && ct_label.blocked == 1))",
3469 REGBIT_CONNTRACK_COMMIT" = 1; next;");
3470 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 1,
3471 "ip && (!ct.est || (ct.est && ct_label.blocked == 1))",
3472 REGBIT_CONNTRACK_COMMIT" = 1; next;");
3473
3474 /* Ingress and Egress ACL Table (Priority 65535).
3475 *
3476 * Always drop traffic that's in an invalid state. Also drop
3477 * reply direction packets for connections that have been marked
3478 * for deletion (bit 0 of ct_label is set).
3479 *
3480 * This is enforced at a higher priority than ACLs can be defined. */
3481 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
3482 "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
3483 "drop;");
3484 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
3485 "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
3486 "drop;");
3487
3488 /* Ingress and Egress ACL Table (Priority 65535).
3489 *
3490 * Allow reply traffic that is part of an established
3491 * conntrack entry that has not been marked for deletion
3492 * (bit 0 of ct_label). We only match traffic in the
3493 * reply direction because we want traffic in the request
3494 * direction to hit the currently defined policy from ACLs.
3495 *
3496 * This is enforced at a higher priority than ACLs can be defined. */
3497 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
3498 "ct.est && !ct.rel && !ct.new && !ct.inv "
3499 "&& ct.rpl && ct_label.blocked == 0",
3500 "next;");
3501 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
3502 "ct.est && !ct.rel && !ct.new && !ct.inv "
3503 "&& ct.rpl && ct_label.blocked == 0",
3504 "next;");
3505
3506 /* Ingress and Egress ACL Table (Priority 65535).
3507 *
3508 * Allow traffic that is related to an existing conntrack entry that
3509 * has not been marked for deletion (bit 0 of ct_label).
3510 *
3511 * This is enforced at a higher priority than ACLs can be defined.
3512 *
3513 * NOTE: This does not support related data sessions (eg,
3514 * a dynamically negotiated FTP data channel), but will allow
3515 * related traffic such as an ICMP Port Unreachable through
3516 * that's generated from a non-listening UDP port. */
3517 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
3518 "!ct.est && ct.rel && !ct.new && !ct.inv "
3519 "&& ct_label.blocked == 0",
3520 "next;");
3521 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
3522 "!ct.est && ct.rel && !ct.new && !ct.inv "
3523 "&& ct_label.blocked == 0",
3524 "next;");
3525
3526 /* Ingress and Egress ACL Table (Priority 65535).
3527 *
3528 * Not to do conntrack on ND packets. */
3529 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, "nd", "next;");
3530 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, "nd", "next;");
3531 }
3532
3533 /* Ingress or Egress ACL Table (Various priorities). */
3534 for (size_t i = 0; i < od->nbs->n_acls; i++) {
3535 struct nbrec_acl *acl = od->nbs->acls[i];
3536 consider_acl(lflows, od, acl, has_stateful);
3537 }
3538 struct ovn_port_group *pg;
3539 HMAP_FOR_EACH (pg, key_node, port_groups) {
3540 if (ovn_port_group_ls_find(pg, &od->nbs->header_.uuid)) {
3541 for (size_t i = 0; i < pg->n_acls; i++) {
3542 consider_acl(lflows, od, pg->acls[i], has_stateful);
3543 }
3544 }
3545 }
3546
3547 /* Add 34000 priority flow to allow DHCP reply from ovn-controller to all
3548 * logical ports of the datapath if the CMS has configured DHCPv4 options.
3549 * */
3550 for (size_t i = 0; i < od->nbs->n_ports; i++) {
3551 if (od->nbs->ports[i]->dhcpv4_options) {
3552 const char *server_id = smap_get(
3553 &od->nbs->ports[i]->dhcpv4_options->options, "server_id");
3554 const char *server_mac = smap_get(
3555 &od->nbs->ports[i]->dhcpv4_options->options, "server_mac");
3556 const char *lease_time = smap_get(
3557 &od->nbs->ports[i]->dhcpv4_options->options, "lease_time");
3558 if (server_id && server_mac && lease_time) {
3559 struct ds match = DS_EMPTY_INITIALIZER;
3560 const char *actions =
3561 has_stateful ? "ct_commit; next;" : "next;";
3562 ds_put_format(&match, "outport == \"%s\" && eth.src == %s "
3563 "&& ip4.src == %s && udp && udp.src == 67 "
3564 "&& udp.dst == 68", od->nbs->ports[i]->name,
3565 server_mac, server_id);
3566 ovn_lflow_add(
3567 lflows, od, S_SWITCH_OUT_ACL, 34000, ds_cstr(&match),
3568 actions);
3569 ds_destroy(&match);
3570 }
3571 }
3572
3573 if (od->nbs->ports[i]->dhcpv6_options) {
3574 const char *server_mac = smap_get(
3575 &od->nbs->ports[i]->dhcpv6_options->options, "server_id");
3576 struct eth_addr ea;
3577 if (server_mac && eth_addr_from_string(server_mac, &ea)) {
3578 /* Get the link local IP of the DHCPv6 server from the
3579 * server MAC. */
3580 struct in6_addr lla;
3581 in6_generate_lla(ea, &lla);
3582
3583 char server_ip[INET6_ADDRSTRLEN + 1];
3584 ipv6_string_mapped(server_ip, &lla);
3585
3586 struct ds match = DS_EMPTY_INITIALIZER;
3587 const char *actions = has_stateful ? "ct_commit; next;" :
3588 "next;";
3589 ds_put_format(&match, "outport == \"%s\" && eth.src == %s "
3590 "&& ip6.src == %s && udp && udp.src == 547 "
3591 "&& udp.dst == 546", od->nbs->ports[i]->name,
3592 server_mac, server_ip);
3593 ovn_lflow_add(
3594 lflows, od, S_SWITCH_OUT_ACL, 34000, ds_cstr(&match),
3595 actions);
3596 ds_destroy(&match);
3597 }
3598 }
3599 }
3600
3601 /* Add a 34000 priority flow to advance the DNS reply from ovn-controller,
3602 * if the CMS has configured DNS records for the datapath.
3603 */
3604 if (ls_has_dns_records(od->nbs)) {
3605 const char *actions = has_stateful ? "ct_commit; next;" : "next;";
3606 ovn_lflow_add(
3607 lflows, od, S_SWITCH_OUT_ACL, 34000, "udp.src == 53",
3608 actions);
3609 }
3610 }
3611
3612 static void
3613 build_qos(struct ovn_datapath *od, struct hmap *lflows) {
3614 ovn_lflow_add(lflows, od, S_SWITCH_IN_QOS_MARK, 0, "1", "next;");
3615 ovn_lflow_add(lflows, od, S_SWITCH_OUT_QOS_MARK, 0, "1", "next;");
3616 ovn_lflow_add(lflows, od, S_SWITCH_IN_QOS_METER, 0, "1", "next;");
3617 ovn_lflow_add(lflows, od, S_SWITCH_OUT_QOS_METER, 0, "1", "next;");
3618
3619 for (size_t i = 0; i < od->nbs->n_qos_rules; i++) {
3620 struct nbrec_qos *qos = od->nbs->qos_rules[i];
3621 bool ingress = !strcmp(qos->direction, "from-lport") ? true :false;
3622 enum ovn_stage stage = ingress ? S_SWITCH_IN_QOS_MARK : S_SWITCH_OUT_QOS_MARK;
3623 int64_t rate = 0;
3624 int64_t burst = 0;
3625
3626 for (size_t j = 0; j < qos->n_action; j++) {
3627 if (!strcmp(qos->key_action[j], "dscp")) {
3628 struct ds dscp_action = DS_EMPTY_INITIALIZER;
3629
3630 ds_put_format(&dscp_action, "ip.dscp = %"PRId64"; next;",
3631 qos->value_action[j]);
3632 ovn_lflow_add(lflows, od, stage,
3633 qos->priority,
3634 qos->match, ds_cstr(&dscp_action));
3635 ds_destroy(&dscp_action);
3636 }
3637 }
3638
3639 for (size_t n = 0; n < qos->n_bandwidth; n++) {
3640 if (!strcmp(qos->key_bandwidth[n], "rate")) {
3641 rate = qos->value_bandwidth[n];
3642 } else if (!strcmp(qos->key_bandwidth[n], "burst")) {
3643 burst = qos->value_bandwidth[n];
3644 }
3645 }
3646 if (rate) {
3647 struct ds meter_action = DS_EMPTY_INITIALIZER;
3648 stage = ingress ? S_SWITCH_IN_QOS_METER : S_SWITCH_OUT_QOS_METER;
3649 if (burst) {
3650 ds_put_format(&meter_action,
3651 "set_meter(%"PRId64", %"PRId64"); next;",
3652 rate, burst);
3653 } else {
3654 ds_put_format(&meter_action,
3655 "set_meter(%"PRId64"); next;",
3656 rate);
3657 }
3658
3659 /* Ingress and Egress QoS Meter Table.
3660 *
3661 * We limit the bandwidth of this flow by adding a meter table.
3662 */
3663 ovn_lflow_add(lflows, od, stage,
3664 qos->priority,
3665 qos->match, ds_cstr(&meter_action));
3666 ds_destroy(&meter_action);
3667 }
3668 }
3669 }
3670
3671 static void
3672 build_lb(struct ovn_datapath *od, struct hmap *lflows)
3673 {
3674 /* Ingress and Egress LB Table (Priority 0): Packets are allowed by
3675 * default. */
3676 ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, 0, "1", "next;");
3677 ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, 0, "1", "next;");
3678
3679 if (od->nbs->load_balancer) {
3680 /* Ingress and Egress LB Table (Priority 65535).
3681 *
3682 * Send established traffic through conntrack for just NAT. */
3683 ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, UINT16_MAX,
3684 "ct.est && !ct.rel && !ct.new && !ct.inv",
3685 REGBIT_CONNTRACK_NAT" = 1; next;");
3686 ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, UINT16_MAX,
3687 "ct.est && !ct.rel && !ct.new && !ct.inv",
3688 REGBIT_CONNTRACK_NAT" = 1; next;");
3689 }
3690 }
3691
3692 static void
3693 build_stateful(struct ovn_datapath *od, struct hmap *lflows)
3694 {
3695 /* Ingress and Egress stateful Table (Priority 0): Packets are
3696 * allowed by default. */
3697 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 0, "1", "next;");
3698 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 0, "1", "next;");
3699
3700 /* If REGBIT_CONNTRACK_COMMIT is set as 1, then the packets should be
3701 * committed to conntrack. We always set ct_label.blocked to 0 here as
3702 * any packet that makes it this far is part of a connection we
3703 * want to allow to continue. */
3704 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100,
3705 REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit(ct_label=0/1); next;");
3706 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100,
3707 REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit(ct_label=0/1); next;");
3708
3709 /* If REGBIT_CONNTRACK_NAT is set as 1, then packets should just be sent
3710 * through nat (without committing).
3711 *
3712 * REGBIT_CONNTRACK_COMMIT is set for new connections and
3713 * REGBIT_CONNTRACK_NAT is set for established connections. So they
3714 * don't overlap.
3715 */
3716 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100,
3717 REGBIT_CONNTRACK_NAT" == 1", "ct_lb;");
3718 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100,
3719 REGBIT_CONNTRACK_NAT" == 1", "ct_lb;");
3720
3721 /* Load balancing rules for new connections get committed to conntrack
3722 * table. So even if REGBIT_CONNTRACK_COMMIT is set in a previous table
3723 * a higher priority rule for load balancing below also commits the
3724 * connection, so it is okay if we do not hit the above match on
3725 * REGBIT_CONNTRACK_COMMIT. */
3726 for (int i = 0; i < od->nbs->n_load_balancer; i++) {
3727 struct nbrec_load_balancer *lb = od->nbs->load_balancer[i];
3728 struct smap *vips = &lb->vips;
3729 struct smap_node *node;
3730
3731 SMAP_FOR_EACH (node, vips) {
3732 uint16_t port = 0;
3733 int addr_family;
3734
3735 /* node->key contains IP:port or just IP. */
3736 char *ip_address = NULL;
3737 ip_address_and_port_from_lb_key(node->key, &ip_address, &port,
3738 &addr_family);
3739 if (!ip_address) {
3740 continue;
3741 }
3742
3743 /* New connections in Ingress table. */
3744 char *action = xasprintf("ct_lb(%s);", node->value);
3745 struct ds match = DS_EMPTY_INITIALIZER;
3746 if (addr_family == AF_INET) {
3747 ds_put_format(&match, "ct.new && ip4.dst == %s", ip_address);
3748 } else {
3749 ds_put_format(&match, "ct.new && ip6.dst == %s", ip_address);
3750 }
3751 if (port) {
3752 if (lb->protocol && !strcmp(lb->protocol, "udp")) {
3753 ds_put_format(&match, " && udp.dst == %d", port);
3754 } else {
3755 ds_put_format(&match, " && tcp.dst == %d", port);
3756 }
3757 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL,
3758 120, ds_cstr(&match), action);
3759 } else {
3760 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL,
3761 110, ds_cstr(&match), action);
3762 }
3763
3764 free(ip_address);
3765 ds_destroy(&match);
3766 free(action);
3767 }
3768 }
3769 }
3770
3771 static void
3772 build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
3773 struct hmap *port_groups, struct hmap *lflows,
3774 struct hmap *mcgroups)
3775 {
3776 /* This flow table structure is documented in ovn-northd(8), so please
3777 * update ovn-northd.8.xml if you change anything. */
3778
3779 struct ds match = DS_EMPTY_INITIALIZER;
3780 struct ds actions = DS_EMPTY_INITIALIZER;
3781
3782 /* Build pre-ACL and ACL tables for both ingress and egress.
3783 * Ingress tables 3 through 10. Egress tables 0 through 7. */
3784 struct ovn_datapath *od;
3785 HMAP_FOR_EACH (od, key_node, datapaths) {
3786 if (!od->nbs) {
3787 continue;
3788 }
3789
3790 build_pre_acls(od, lflows, port_groups);
3791 build_pre_lb(od, lflows);
3792 build_pre_stateful(od, lflows);
3793 build_acls(od, lflows, port_groups);
3794 build_qos(od, lflows);
3795 build_lb(od, lflows);
3796 build_stateful(od, lflows);
3797 }
3798
3799 /* Logical switch ingress table 0: Admission control framework (priority
3800 * 100). */
3801 HMAP_FOR_EACH (od, key_node, datapaths) {
3802 if (!od->nbs) {
3803 continue;
3804 }
3805
3806 /* Logical VLANs not supported. */
3807 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "vlan.present",
3808 "drop;");
3809
3810 /* Broadcast/multicast source address is invalid. */
3811 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "eth.src[40]",
3812 "drop;");
3813
3814 /* Port security flows have priority 50 (see below) and will continue
3815 * to the next table if packet source is acceptable. */
3816 }
3817
3818 /* Logical switch ingress table 0: Ingress port security - L2
3819 * (priority 50).
3820 * Ingress table 1: Ingress port security - IP (priority 90 and 80)
3821 * Ingress table 2: Ingress port security - ND (priority 90 and 80)
3822 */
3823 struct ovn_port *op;
3824 HMAP_FOR_EACH (op, key_node, ports) {
3825 if (!op->nbsp) {
3826 continue;
3827 }
3828
3829 if (!lsp_is_enabled(op->nbsp)) {
3830 /* Drop packets from disabled logical ports (since logical flow
3831 * tables are default-drop). */
3832 continue;
3833 }
3834
3835 ds_clear(&match);
3836 ds_clear(&actions);
3837 ds_put_format(&match, "inport == %s", op->json_key);
3838 build_port_security_l2("eth.src", op->ps_addrs, op->n_ps_addrs,
3839 &match);
3840
3841 const char *queue_id = smap_get(&op->sb->options, "qdisc_queue_id");
3842 if (queue_id) {
3843 ds_put_format(&actions, "set_queue(%s); ", queue_id);
3844 }
3845 ds_put_cstr(&actions, "next;");
3846 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_L2, 50,
3847 ds_cstr(&match), ds_cstr(&actions));
3848
3849 if (op->nbsp->n_port_security) {
3850 build_port_security_ip(P_IN, op, lflows);
3851 build_port_security_nd(op, lflows);
3852 }
3853 }
3854
3855 /* Ingress table 1 and 2: Port security - IP and ND, by default goto next.
3856 * (priority 0)*/
3857 HMAP_FOR_EACH (od, key_node, datapaths) {
3858 if (!od->nbs) {
3859 continue;
3860 }
3861
3862 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_ND, 0, "1", "next;");
3863 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_IP, 0, "1", "next;");
3864 }
3865
3866 /* Ingress table 11: ARP/ND responder, skip requests coming from localnet
3867 * and vtep ports. (priority 100); see ovn-northd.8.xml for the
3868 * rationale. */
3869 HMAP_FOR_EACH (op, key_node, ports) {
3870 if (!op->nbsp) {
3871 continue;
3872 }
3873
3874 if ((!strcmp(op->nbsp->type, "localnet")) ||
3875 (!strcmp(op->nbsp->type, "vtep"))) {
3876 ds_clear(&match);
3877 ds_put_format(&match, "inport == %s", op->json_key);
3878 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100,
3879 ds_cstr(&match), "next;");
3880 }
3881 }
3882
3883 /* Ingress table 11: ARP/ND responder, reply for known IPs.
3884 * (priority 50). */
3885 HMAP_FOR_EACH (op, key_node, ports) {
3886 if (!op->nbsp) {
3887 continue;
3888 }
3889
3890 /*
3891 * Add ARP/ND reply flows if either the
3892 * - port is up or
3893 * - port type is router or
3894 * - port type is localport
3895 */
3896 if (!lsp_is_up(op->nbsp) && strcmp(op->nbsp->type, "router") &&
3897 strcmp(op->nbsp->type, "localport")) {
3898 continue;
3899 }
3900
3901 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
3902 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
3903 ds_clear(&match);
3904 ds_put_format(&match, "arp.tpa == %s && arp.op == 1",
3905 op->lsp_addrs[i].ipv4_addrs[j].addr_s);
3906 ds_clear(&actions);
3907 ds_put_format(&actions,
3908 "eth.dst = eth.src; "
3909 "eth.src = %s; "
3910 "arp.op = 2; /* ARP reply */ "
3911 "arp.tha = arp.sha; "
3912 "arp.sha = %s; "
3913 "arp.tpa = arp.spa; "
3914 "arp.spa = %s; "
3915 "outport = inport; "
3916 "flags.loopback = 1; "
3917 "output;",
3918 op->lsp_addrs[i].ea_s, op->lsp_addrs[i].ea_s,
3919 op->lsp_addrs[i].ipv4_addrs[j].addr_s);
3920 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50,
3921 ds_cstr(&match), ds_cstr(&actions));
3922
3923 /* Do not reply to an ARP request from the port that owns the
3924 * address (otherwise a DHCP client that ARPs to check for a
3925 * duplicate address will fail). Instead, forward it the usual
3926 * way.
3927 *
3928 * (Another alternative would be to simply drop the packet. If
3929 * everything is working as it is configured, then this would
3930 * produce equivalent results, since no one should reply to the
3931 * request. But ARPing for one's own IP address is intended to
3932 * detect situations where the network is not working as
3933 * configured, so dropping the request would frustrate that
3934 * intent.) */
3935 ds_put_format(&match, " && inport == %s", op->json_key);
3936 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100,
3937 ds_cstr(&match), "next;");
3938 }
3939
3940 /* For ND solicitations, we need to listen for both the
3941 * unicast IPv6 address and its all-nodes multicast address,
3942 * but always respond with the unicast IPv6 address. */
3943 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
3944 ds_clear(&match);
3945 ds_put_format(&match,
3946 "nd_ns && ip6.dst == {%s, %s} && nd.target == %s",
3947 op->lsp_addrs[i].ipv6_addrs[j].addr_s,
3948 op->lsp_addrs[i].ipv6_addrs[j].sn_addr_s,
3949 op->lsp_addrs[i].ipv6_addrs[j].addr_s);
3950
3951 ds_clear(&actions);
3952 ds_put_format(&actions,
3953 "nd_na { "
3954 "eth.src = %s; "
3955 "ip6.src = %s; "
3956 "nd.target = %s; "
3957 "nd.tll = %s; "
3958 "outport = inport; "
3959 "flags.loopback = 1; "
3960 "output; "
3961 "};",
3962 op->lsp_addrs[i].ea_s,
3963 op->lsp_addrs[i].ipv6_addrs[j].addr_s,
3964 op->lsp_addrs[i].ipv6_addrs[j].addr_s,
3965 op->lsp_addrs[i].ea_s);
3966 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50,
3967 ds_cstr(&match), ds_cstr(&actions));
3968
3969 /* Do not reply to a solicitation from the port that owns the
3970 * address (otherwise DAD detection will fail). */
3971 ds_put_format(&match, " && inport == %s", op->json_key);
3972 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100,
3973 ds_cstr(&match), "next;");
3974 }
3975 }
3976 }
3977
3978 /* Ingress table 11: ARP/ND responder, by default goto next.
3979 * (priority 0)*/
3980 HMAP_FOR_EACH (od, key_node, datapaths) {
3981 if (!od->nbs) {
3982 continue;
3983 }
3984
3985 ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_ND_RSP, 0, "1", "next;");
3986 }
3987
3988 /* Logical switch ingress table 12 and 13: DHCP options and response
3989 * priority 100 flows. */
3990 HMAP_FOR_EACH (op, key_node, ports) {
3991 if (!op->nbsp) {
3992 continue;
3993 }
3994
3995 if (!lsp_is_enabled(op->nbsp) || !strcmp(op->nbsp->type, "router")) {
3996 /* Don't add the DHCP flows if the port is not enabled or if the
3997 * port is a router port. */
3998 continue;
3999 }
4000
4001 if (!op->nbsp->dhcpv4_options && !op->nbsp->dhcpv6_options) {
4002 /* CMS has disabled both native DHCPv4 and DHCPv6 for this lport.
4003 */
4004 continue;
4005 }
4006
4007 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
4008 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
4009 struct ds options_action = DS_EMPTY_INITIALIZER;
4010 struct ds response_action = DS_EMPTY_INITIALIZER;
4011 struct ds ipv4_addr_match = DS_EMPTY_INITIALIZER;
4012 if (build_dhcpv4_action(
4013 op, op->lsp_addrs[i].ipv4_addrs[j].addr,
4014 &options_action, &response_action, &ipv4_addr_match)) {
4015 ds_clear(&match);
4016 ds_put_format(
4017 &match, "inport == %s && eth.src == %s && "
4018 "ip4.src == 0.0.0.0 && ip4.dst == 255.255.255.255 && "
4019 "udp.src == 68 && udp.dst == 67", op->json_key,
4020 op->lsp_addrs[i].ea_s);
4021
4022 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS,
4023 100, ds_cstr(&match),
4024 ds_cstr(&options_action));
4025 ds_clear(&match);
4026 /* Allow ip4.src = OFFER_IP and
4027 * ip4.dst = {SERVER_IP, 255.255.255.255} for the below
4028 * cases
4029 * - When the client wants to renew the IP by sending
4030 * the DHCPREQUEST to the server ip.
4031 * - When the client wants to renew the IP by
4032 * broadcasting the DHCPREQUEST.
4033 */
4034 ds_put_format(
4035 &match, "inport == %s && eth.src == %s && "
4036 "%s && udp.src == 68 && udp.dst == 67", op->json_key,
4037 op->lsp_addrs[i].ea_s, ds_cstr(&ipv4_addr_match));
4038
4039 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS,
4040 100, ds_cstr(&match),
4041 ds_cstr(&options_action));
4042 ds_clear(&match);
4043
4044 /* If REGBIT_DHCP_OPTS_RESULT is set, it means the
4045 * put_dhcp_opts action is successful. */
4046 ds_put_format(
4047 &match, "inport == %s && eth.src == %s && "
4048 "ip4 && udp.src == 68 && udp.dst == 67"
4049 " && "REGBIT_DHCP_OPTS_RESULT, op->json_key,
4050 op->lsp_addrs[i].ea_s);
4051 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_RESPONSE,
4052 100, ds_cstr(&match),
4053 ds_cstr(&response_action));
4054 ds_destroy(&options_action);
4055 ds_destroy(&response_action);
4056 ds_destroy(&ipv4_addr_match);
4057 break;
4058 }
4059 }
4060
4061 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
4062 struct ds options_action = DS_EMPTY_INITIALIZER;
4063 struct ds response_action = DS_EMPTY_INITIALIZER;
4064 if (build_dhcpv6_action(
4065 op, &op->lsp_addrs[i].ipv6_addrs[j].addr,
4066 &options_action, &response_action)) {
4067 ds_clear(&match);
4068 ds_put_format(
4069 &match, "inport == %s && eth.src == %s"
4070 " && ip6.dst == ff02::1:2 && udp.src == 546 &&"
4071 " udp.dst == 547", op->json_key,
4072 op->lsp_addrs[i].ea_s);
4073
4074 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS, 100,
4075 ds_cstr(&match), ds_cstr(&options_action));
4076
4077 /* If REGBIT_DHCP_OPTS_RESULT is set to 1, it means the
4078 * put_dhcpv6_opts action is successful */
4079 ds_put_cstr(&match, " && "REGBIT_DHCP_OPTS_RESULT);
4080 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_RESPONSE, 100,
4081 ds_cstr(&match), ds_cstr(&response_action));
4082 ds_destroy(&options_action);
4083 ds_destroy(&response_action);
4084 break;
4085 }
4086 }
4087 }
4088 }
4089
4090 /* Logical switch ingress table 14 and 15: DNS lookup and response
4091 * priority 100 flows.
4092 */
4093 HMAP_FOR_EACH (od, key_node, datapaths) {
4094 if (!od->nbs || !ls_has_dns_records(od->nbs)) {
4095 continue;
4096 }
4097
4098 struct ds action = DS_EMPTY_INITIALIZER;
4099
4100 ds_clear(&match);
4101 ds_put_cstr(&match, "udp.dst == 53");
4102 ds_put_format(&action,
4103 REGBIT_DNS_LOOKUP_RESULT" = dns_lookup(); next;");
4104 ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_LOOKUP, 100,
4105 ds_cstr(&match), ds_cstr(&action));
4106 ds_clear(&action);
4107 ds_put_cstr(&match, " && "REGBIT_DNS_LOOKUP_RESULT);
4108 ds_put_format(&action, "eth.dst <-> eth.src; ip4.src <-> ip4.dst; "
4109 "udp.dst = udp.src; udp.src = 53; outport = inport; "
4110 "flags.loopback = 1; output;");
4111 ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_RESPONSE, 100,
4112 ds_cstr(&match), ds_cstr(&action));
4113 ds_clear(&action);
4114 ds_put_format(&action, "eth.dst <-> eth.src; ip6.src <-> ip6.dst; "
4115 "udp.dst = udp.src; udp.src = 53; outport = inport; "
4116 "flags.loopback = 1; output;");
4117 ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_RESPONSE, 100,
4118 ds_cstr(&match), ds_cstr(&action));
4119 ds_destroy(&action);
4120 }
4121
4122 /* Ingress table 12 and 13: DHCP options and response, by default goto
4123 * next. (priority 0).
4124 * Ingress table 14 and 15: DNS lookup and response, by default goto next.
4125 * (priority 0).*/
4126
4127 HMAP_FOR_EACH (od, key_node, datapaths) {
4128 if (!od->nbs) {
4129 continue;
4130 }
4131
4132 ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_OPTIONS, 0, "1", "next;");
4133 ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_RESPONSE, 0, "1", "next;");
4134 ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_LOOKUP, 0, "1", "next;");
4135 ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_RESPONSE, 0, "1", "next;");
4136 }
4137
4138 /* Ingress table 16: Destination lookup, broadcast and multicast handling
4139 * (priority 100). */
4140 HMAP_FOR_EACH (op, key_node, ports) {
4141 if (!op->nbsp) {
4142 continue;
4143 }
4144
4145 if (lsp_is_enabled(op->nbsp)) {
4146 ovn_multicast_add(mcgroups, &mc_flood, op);
4147 }
4148 }
4149 HMAP_FOR_EACH (od, key_node, datapaths) {
4150 if (!od->nbs) {
4151 continue;
4152 }
4153
4154 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 100, "eth.mcast",
4155 "outport = \""MC_FLOOD"\"; output;");
4156 }
4157
4158 /* Ingress table 16: Destination lookup, unicast handling (priority 50), */
4159 HMAP_FOR_EACH (op, key_node, ports) {
4160 if (!op->nbsp) {
4161 continue;
4162 }
4163
4164 for (size_t i = 0; i < op->nbsp->n_addresses; i++) {
4165 /* Addresses are owned by the logical port.
4166 * Ethernet address followed by zero or more IPv4
4167 * or IPv6 addresses (or both). */
4168 struct eth_addr mac;
4169 if (ovs_scan(op->nbsp->addresses[i],
4170 ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) {
4171 ds_clear(&match);
4172 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
4173 ETH_ADDR_ARGS(mac));
4174
4175 ds_clear(&actions);
4176 ds_put_format(&actions, "outport = %s; output;", op->json_key);
4177 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
4178 ds_cstr(&match), ds_cstr(&actions));
4179 } else if (!strcmp(op->nbsp->addresses[i], "unknown")) {
4180 if (lsp_is_enabled(op->nbsp)) {
4181 ovn_multicast_add(mcgroups, &mc_unknown, op);
4182 op->od->has_unknown = true;
4183 }
4184 } else if (is_dynamic_lsp_address(op->nbsp->addresses[i])) {
4185 if (!op->nbsp->dynamic_addresses
4186 || !ovs_scan(op->nbsp->dynamic_addresses,
4187 ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) {
4188 continue;
4189 }
4190 ds_clear(&match);
4191 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
4192 ETH_ADDR_ARGS(mac));
4193
4194 ds_clear(&actions);
4195 ds_put_format(&actions, "outport = %s; output;", op->json_key);
4196 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
4197 ds_cstr(&match), ds_cstr(&actions));
4198 } else if (!strcmp(op->nbsp->addresses[i], "router")) {
4199 if (!op->peer || !op->peer->nbrp
4200 || !ovs_scan(op->peer->nbrp->mac,
4201 ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) {
4202 continue;
4203 }
4204 ds_clear(&match);
4205 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
4206 ETH_ADDR_ARGS(mac));
4207 if (op->peer->od->l3dgw_port
4208 && op->peer == op->peer->od->l3dgw_port
4209 && op->peer->od->l3redirect_port) {
4210 /* The destination lookup flow for the router's
4211 * distributed gateway port MAC address should only be
4212 * programmed on the "redirect-chassis". */
4213 ds_put_format(&match, " && is_chassis_resident(%s)",
4214 op->peer->od->l3redirect_port->json_key);
4215 }
4216
4217 ds_clear(&actions);
4218 ds_put_format(&actions, "outport = %s; output;", op->json_key);
4219 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
4220 ds_cstr(&match), ds_cstr(&actions));
4221
4222 /* Add ethernet addresses specified in NAT rules on
4223 * distributed logical routers. */
4224 if (op->peer->od->l3dgw_port
4225 && op->peer == op->peer->od->l3dgw_port) {
4226 for (int j = 0; j < op->peer->od->nbr->n_nat; j++) {
4227 const struct nbrec_nat *nat
4228 = op->peer->od->nbr->nat[j];
4229 if (!strcmp(nat->type, "dnat_and_snat")
4230 && nat->logical_port && nat->external_mac
4231 && eth_addr_from_string(nat->external_mac, &mac)) {
4232
4233 ds_clear(&match);
4234 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT
4235 " && is_chassis_resident(\"%s\")",
4236 ETH_ADDR_ARGS(mac),
4237 nat->logical_port);
4238
4239 ds_clear(&actions);
4240 ds_put_format(&actions, "outport = %s; output;",
4241 op->json_key);
4242 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP,
4243 50, ds_cstr(&match),
4244 ds_cstr(&actions));
4245 }
4246 }
4247 }
4248 } else {
4249 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
4250
4251 VLOG_INFO_RL(&rl,
4252 "%s: invalid syntax '%s' in addresses column",
4253 op->nbsp->name, op->nbsp->addresses[i]);
4254 }
4255 }
4256 }
4257
4258 /* Ingress table 16: Destination lookup for unknown MACs (priority 0). */
4259 HMAP_FOR_EACH (od, key_node, datapaths) {
4260 if (!od->nbs) {
4261 continue;
4262 }
4263
4264 if (od->has_unknown) {
4265 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1",
4266 "outport = \""MC_UNKNOWN"\"; output;");
4267 }
4268 }
4269
4270 /* Egress tables 8: Egress port security - IP (priority 0)
4271 * Egress table 9: Egress port security L2 - multicast/broadcast
4272 * (priority 100). */
4273 HMAP_FOR_EACH (od, key_node, datapaths) {
4274 if (!od->nbs) {
4275 continue;
4276 }
4277
4278 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_IP, 0, "1", "next;");
4279 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_L2, 100, "eth.mcast",
4280 "output;");
4281 }
4282
4283 /* Egress table 8: Egress port security - IP (priorities 90 and 80)
4284 * if port security enabled.
4285 *
4286 * Egress table 9: Egress port security - L2 (priorities 50 and 150).
4287 *
4288 * Priority 50 rules implement port security for enabled logical port.
4289 *
4290 * Priority 150 rules drop packets to disabled logical ports, so that they
4291 * don't even receive multicast or broadcast packets. */
4292 HMAP_FOR_EACH (op, key_node, ports) {
4293 if (!op->nbsp) {
4294 continue;
4295 }
4296
4297 ds_clear(&match);
4298 ds_put_format(&match, "outport == %s", op->json_key);
4299 if (lsp_is_enabled(op->nbsp)) {
4300 build_port_security_l2("eth.dst", op->ps_addrs, op->n_ps_addrs,
4301 &match);
4302 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 50,
4303 ds_cstr(&match), "output;");
4304 } else {
4305 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 150,
4306 ds_cstr(&match), "drop;");
4307 }
4308
4309 if (op->nbsp->n_port_security) {
4310 build_port_security_ip(P_OUT, op, lflows);
4311 }
4312 }
4313
4314 ds_destroy(&match);
4315 ds_destroy(&actions);
4316 }
4317
4318 static bool
4319 lrport_is_enabled(const struct nbrec_logical_router_port *lrport)
4320 {
4321 return !lrport->enabled || *lrport->enabled;
4322 }
4323
4324 /* Returns a string of the IP address of the router port 'op' that
4325 * overlaps with 'ip_s". If one is not found, returns NULL.
4326 *
4327 * The caller must not free the returned string. */
4328 static const char *
4329 find_lrp_member_ip(const struct ovn_port *op, const char *ip_s)
4330 {
4331 bool is_ipv4 = strchr(ip_s, '.') ? true : false;
4332
4333 if (is_ipv4) {
4334 ovs_be32 ip;
4335
4336 if (!ip_parse(ip_s, &ip)) {
4337 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4338 VLOG_WARN_RL(&rl, "bad ip address %s", ip_s);
4339 return NULL;
4340 }
4341
4342 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
4343 const struct ipv4_netaddr *na = &op->lrp_networks.ipv4_addrs[i];
4344
4345 if (!((na->network ^ ip) & na->mask)) {
4346 /* There should be only 1 interface that matches the
4347 * supplied IP. Otherwise, it's a configuration error,
4348 * because subnets of a router's interfaces should NOT
4349 * overlap. */
4350 return na->addr_s;
4351 }
4352 }
4353 } else {
4354 struct in6_addr ip6;
4355
4356 if (!ipv6_parse(ip_s, &ip6)) {
4357 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4358 VLOG_WARN_RL(&rl, "bad ipv6 address %s", ip_s);
4359 return NULL;
4360 }
4361
4362 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
4363 const struct ipv6_netaddr *na = &op->lrp_networks.ipv6_addrs[i];
4364 struct in6_addr xor_addr = ipv6_addr_bitxor(&na->network, &ip6);
4365 struct in6_addr and_addr = ipv6_addr_bitand(&xor_addr, &na->mask);
4366
4367 if (ipv6_is_zero(&and_addr)) {
4368 /* There should be only 1 interface that matches the
4369 * supplied IP. Otherwise, it's a configuration error,
4370 * because subnets of a router's interfaces should NOT
4371 * overlap. */
4372 return na->addr_s;
4373 }
4374 }
4375 }
4376
4377 return NULL;
4378 }
4379
4380 static void
4381 add_route(struct hmap *lflows, const struct ovn_port *op,
4382 const char *lrp_addr_s, const char *network_s, int plen,
4383 const char *gateway, const char *policy)
4384 {
4385 bool is_ipv4 = strchr(network_s, '.') ? true : false;
4386 struct ds match = DS_EMPTY_INITIALIZER;
4387 const char *dir;
4388 uint16_t priority;
4389
4390 if (policy && !strcmp(policy, "src-ip")) {
4391 dir = "src";
4392 priority = plen * 2;
4393 } else {
4394 dir = "dst";
4395 priority = (plen * 2) + 1;
4396 }
4397
4398 /* IPv6 link-local addresses must be scoped to the local router port. */
4399 if (!is_ipv4) {
4400 struct in6_addr network;
4401 ovs_assert(ipv6_parse(network_s, &network));
4402 if (in6_is_lla(&network)) {
4403 ds_put_format(&match, "inport == %s && ", op->json_key);
4404 }
4405 }
4406 ds_put_format(&match, "ip%s.%s == %s/%d", is_ipv4 ? "4" : "6", dir,
4407 network_s, plen);
4408
4409 struct ds actions = DS_EMPTY_INITIALIZER;
4410 ds_put_format(&actions, "ip.ttl--; %sreg0 = ", is_ipv4 ? "" : "xx");
4411
4412 if (gateway) {
4413 ds_put_cstr(&actions, gateway);
4414 } else {
4415 ds_put_format(&actions, "ip%s.dst", is_ipv4 ? "4" : "6");
4416 }
4417 ds_put_format(&actions, "; "
4418 "%sreg1 = %s; "
4419 "eth.src = %s; "
4420 "outport = %s; "
4421 "flags.loopback = 1; "
4422 "next;",
4423 is_ipv4 ? "" : "xx",
4424 lrp_addr_s,
4425 op->lrp_networks.ea_s,
4426 op->json_key);
4427
4428 /* The priority here is calculated to implement longest-prefix-match
4429 * routing. */
4430 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_ROUTING, priority,
4431 ds_cstr(&match), ds_cstr(&actions));
4432 ds_destroy(&match);
4433 ds_destroy(&actions);
4434 }
4435
4436 static void
4437 build_static_route_flow(struct hmap *lflows, struct ovn_datapath *od,
4438 struct hmap *ports,
4439 const struct nbrec_logical_router_static_route *route)
4440 {
4441 ovs_be32 nexthop;
4442 const char *lrp_addr_s = NULL;
4443 unsigned int plen;
4444 bool is_ipv4;
4445
4446 /* Verify that the next hop is an IP address with an all-ones mask. */
4447 char *error = ip_parse_cidr(route->nexthop, &nexthop, &plen);
4448 if (!error) {
4449 if (plen != 32) {
4450 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4451 VLOG_WARN_RL(&rl, "bad next hop mask %s", route->nexthop);
4452 return;
4453 }
4454 is_ipv4 = true;
4455 } else {
4456 free(error);
4457
4458 struct in6_addr ip6;
4459 error = ipv6_parse_cidr(route->nexthop, &ip6, &plen);
4460 if (!error) {
4461 if (plen != 128) {
4462 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4463 VLOG_WARN_RL(&rl, "bad next hop mask %s", route->nexthop);
4464 return;
4465 }
4466 is_ipv4 = false;
4467 } else {
4468 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4469 VLOG_WARN_RL(&rl, "bad next hop ip address %s", route->nexthop);
4470 free(error);
4471 return;
4472 }
4473 }
4474
4475 char *prefix_s;
4476 if (is_ipv4) {
4477 ovs_be32 prefix;
4478 /* Verify that ip prefix is a valid IPv4 address. */
4479 error = ip_parse_cidr(route->ip_prefix, &prefix, &plen);
4480 if (error) {
4481 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4482 VLOG_WARN_RL(&rl, "bad 'ip_prefix' in static routes %s",
4483 route->ip_prefix);
4484 free(error);
4485 return;
4486 }
4487 prefix_s = xasprintf(IP_FMT, IP_ARGS(prefix & be32_prefix_mask(plen)));
4488 } else {
4489 /* Verify that ip prefix is a valid IPv6 address. */
4490 struct in6_addr prefix;
4491 error = ipv6_parse_cidr(route->ip_prefix, &prefix, &plen);
4492 if (error) {
4493 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4494 VLOG_WARN_RL(&rl, "bad 'ip_prefix' in static routes %s",
4495 route->ip_prefix);
4496 free(error);
4497 return;
4498 }
4499 struct in6_addr mask = ipv6_create_mask(plen);
4500 struct in6_addr network = ipv6_addr_bitand(&prefix, &mask);
4501 prefix_s = xmalloc(INET6_ADDRSTRLEN);
4502 inet_ntop(AF_INET6, &network, prefix_s, INET6_ADDRSTRLEN);
4503 }
4504
4505 /* Find the outgoing port. */
4506 struct ovn_port *out_port = NULL;
4507 if (route->output_port) {
4508 out_port = ovn_port_find(ports, route->output_port);
4509 if (!out_port) {
4510 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4511 VLOG_WARN_RL(&rl, "Bad out port %s for static route %s",
4512 route->output_port, route->ip_prefix);
4513 goto free_prefix_s;
4514 }
4515 lrp_addr_s = find_lrp_member_ip(out_port, route->nexthop);
4516 if (!lrp_addr_s) {
4517 /* There are no IP networks configured on the router's port via
4518 * which 'route->nexthop' is theoretically reachable. But since
4519 * 'out_port' has been specified, we honor it by trying to reach
4520 * 'route->nexthop' via the first IP address of 'out_port'.
4521 * (There are cases, e.g in GCE, where each VM gets a /32 IP
4522 * address and the default gateway is still reachable from it.) */
4523 if (is_ipv4) {
4524 if (out_port->lrp_networks.n_ipv4_addrs) {
4525 lrp_addr_s = out_port->lrp_networks.ipv4_addrs[0].addr_s;
4526 }
4527 } else {
4528 if (out_port->lrp_networks.n_ipv6_addrs) {
4529 lrp_addr_s = out_port->lrp_networks.ipv6_addrs[0].addr_s;
4530 }
4531 }
4532 }
4533 } else {
4534 /* output_port is not specified, find the
4535 * router port matching the next hop. */
4536 int i;
4537 for (i = 0; i < od->nbr->n_ports; i++) {
4538 struct nbrec_logical_router_port *lrp = od->nbr->ports[i];
4539 out_port = ovn_port_find(ports, lrp->name);
4540 if (!out_port) {
4541 /* This should not happen. */
4542 continue;
4543 }
4544
4545 lrp_addr_s = find_lrp_member_ip(out_port, route->nexthop);
4546 if (lrp_addr_s) {
4547 break;
4548 }
4549 }
4550 }
4551
4552 if (!out_port || !lrp_addr_s) {
4553 /* There is no matched out port. */
4554 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4555 VLOG_WARN_RL(&rl, "No path for static route %s; next hop %s",
4556 route->ip_prefix, route->nexthop);
4557 goto free_prefix_s;
4558 }
4559
4560 char *policy = route->policy ? route->policy : "dst-ip";
4561 add_route(lflows, out_port, lrp_addr_s, prefix_s, plen, route->nexthop,
4562 policy);
4563
4564 free_prefix_s:
4565 free(prefix_s);
4566 }
4567
4568 static void
4569 op_put_v4_networks(struct ds *ds, const struct ovn_port *op, bool add_bcast)
4570 {
4571 if (!add_bcast && op->lrp_networks.n_ipv4_addrs == 1) {
4572 ds_put_format(ds, "%s", op->lrp_networks.ipv4_addrs[0].addr_s);
4573 return;
4574 }
4575
4576 ds_put_cstr(ds, "{");
4577 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
4578 ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].addr_s);
4579 if (add_bcast) {
4580 ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].bcast_s);
4581 }
4582 }
4583 ds_chomp(ds, ' ');
4584 ds_chomp(ds, ',');
4585 ds_put_cstr(ds, "}");
4586 }
4587
4588 static void
4589 op_put_v6_networks(struct ds *ds, const struct ovn_port *op)
4590 {
4591 if (op->lrp_networks.n_ipv6_addrs == 1) {
4592 ds_put_format(ds, "%s", op->lrp_networks.ipv6_addrs[0].addr_s);
4593 return;
4594 }
4595
4596 ds_put_cstr(ds, "{");
4597 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
4598 ds_put_format(ds, "%s, ", op->lrp_networks.ipv6_addrs[i].addr_s);
4599 }
4600 ds_chomp(ds, ' ');
4601 ds_chomp(ds, ',');
4602 ds_put_cstr(ds, "}");
4603 }
4604
4605 static const char *
4606 get_force_snat_ip(struct ovn_datapath *od, const char *key_type, ovs_be32 *ip)
4607 {
4608 char *key = xasprintf("%s_force_snat_ip", key_type);
4609 const char *ip_address = smap_get(&od->nbr->options, key);
4610 free(key);
4611
4612 if (ip_address) {
4613 ovs_be32 mask;
4614 char *error = ip_parse_masked(ip_address, ip, &mask);
4615 if (error || mask != OVS_BE32_MAX) {
4616 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4617 VLOG_WARN_RL(&rl, "bad ip %s in options of router "UUID_FMT"",
4618 ip_address, UUID_ARGS(&od->key));
4619 free(error);
4620 *ip = 0;
4621 return NULL;
4622 }
4623 return ip_address;
4624 }
4625
4626 *ip = 0;
4627 return NULL;
4628 }
4629
4630 static void
4631 add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od,
4632 struct ds *match, struct ds *actions, int priority,
4633 const char *lb_force_snat_ip, char *backend_ips,
4634 bool is_udp, int addr_family)
4635 {
4636 /* A match and actions for new connections. */
4637 char *new_match = xasprintf("ct.new && %s", ds_cstr(match));
4638 if (lb_force_snat_ip) {
4639 char *new_actions = xasprintf("flags.force_snat_for_lb = 1; %s",
4640 ds_cstr(actions));
4641 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, new_match,
4642 new_actions);
4643 free(new_actions);
4644 } else {
4645 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, new_match,
4646 ds_cstr(actions));
4647 }
4648
4649 /* A match and actions for established connections. */
4650 char *est_match = xasprintf("ct.est && %s", ds_cstr(match));
4651 if (lb_force_snat_ip) {
4652 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, est_match,
4653 "flags.force_snat_for_lb = 1; ct_dnat;");
4654 } else {
4655 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, est_match,
4656 "ct_dnat;");
4657 }
4658
4659 free(new_match);
4660 free(est_match);
4661
4662 if (!od->l3dgw_port || !od->l3redirect_port || !backend_ips) {
4663 return;
4664 }
4665
4666 /* Add logical flows to UNDNAT the load balanced reverse traffic in
4667 * the router egress pipleine stage - S_ROUTER_OUT_UNDNAT if the logical
4668 * router has a gateway router port associated.
4669 */
4670 struct ds undnat_match = DS_EMPTY_INITIALIZER;
4671 if (addr_family == AF_INET) {
4672 ds_put_cstr(&undnat_match, "ip4 && (");
4673 } else {
4674 ds_put_cstr(&undnat_match, "ip6 && (");
4675 }
4676 char *start, *next, *ip_str;
4677 start = next = xstrdup(backend_ips);
4678 ip_str = strsep(&next, ",");
4679 bool backend_ips_found = false;
4680 while (ip_str && ip_str[0]) {
4681 char *ip_address = NULL;
4682 uint16_t port = 0;
4683 int addr_family_;
4684 ip_address_and_port_from_lb_key(ip_str, &ip_address, &port,
4685 &addr_family_);
4686 if (!ip_address) {
4687 break;
4688 }
4689
4690 if (addr_family_ == AF_INET) {
4691 ds_put_format(&undnat_match, "(ip4.src == %s", ip_address);
4692 } else {
4693 ds_put_format(&undnat_match, "(ip6.src == %s", ip_address);
4694 }
4695 free(ip_address);
4696 if (port) {
4697 ds_put_format(&undnat_match, " && %s.src == %d) || ",
4698 is_udp ? "udp" : "tcp", port);
4699 } else {
4700 ds_put_cstr(&undnat_match, ") || ");
4701 }
4702 ip_str = strsep(&next, ",");
4703 backend_ips_found = true;
4704 }
4705
4706 free(start);
4707 if (!backend_ips_found) {
4708 ds_destroy(&undnat_match);
4709 return;
4710 }
4711 ds_chomp(&undnat_match, ' ');
4712 ds_chomp(&undnat_match, '|');
4713 ds_chomp(&undnat_match, '|');
4714 ds_chomp(&undnat_match, ' ');
4715 ds_put_format(&undnat_match, ") && outport == %s && "
4716 "is_chassis_resident(%s)", od->l3dgw_port->json_key,
4717 od->l3redirect_port->json_key);
4718 if (lb_force_snat_ip) {
4719 ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 120,
4720 ds_cstr(&undnat_match),
4721 "flags.force_snat_for_lb = 1; ct_dnat;");
4722 } else {
4723 ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 120,
4724 ds_cstr(&undnat_match), "ct_dnat;");
4725 }
4726
4727 ds_destroy(&undnat_match);
4728 }
4729
4730 #define ND_RA_MAX_INTERVAL_MAX 1800
4731 #define ND_RA_MAX_INTERVAL_MIN 4
4732
4733 #define ND_RA_MIN_INTERVAL_MAX(max) ((max) * 3 / 4)
4734 #define ND_RA_MIN_INTERVAL_MIN 3
4735
4736 static void
4737 copy_ra_to_sb(struct ovn_port *op, const char *address_mode)
4738 {
4739 struct smap options;
4740 smap_clone(&options, &op->sb->options);
4741
4742 smap_add(&options, "ipv6_ra_send_periodic", "true");
4743 smap_add(&options, "ipv6_ra_address_mode", address_mode);
4744
4745 int max_interval = smap_get_int(&op->nbrp->ipv6_ra_configs,
4746 "max_interval", ND_RA_MAX_INTERVAL_DEFAULT);
4747 if (max_interval > ND_RA_MAX_INTERVAL_MAX) {
4748 max_interval = ND_RA_MAX_INTERVAL_MAX;
4749 }
4750 if (max_interval < ND_RA_MAX_INTERVAL_MIN) {
4751 max_interval = ND_RA_MAX_INTERVAL_MIN;
4752 }
4753 smap_add_format(&options, "ipv6_ra_max_interval", "%d", max_interval);
4754
4755 int min_interval = smap_get_int(&op->nbrp->ipv6_ra_configs,
4756 "min_interval", nd_ra_min_interval_default(max_interval));
4757 if (min_interval > ND_RA_MIN_INTERVAL_MAX(max_interval)) {
4758 min_interval = ND_RA_MIN_INTERVAL_MAX(max_interval);
4759 }
4760 if (min_interval < ND_RA_MIN_INTERVAL_MIN) {
4761 min_interval = ND_RA_MIN_INTERVAL_MIN;
4762 }
4763 smap_add_format(&options, "ipv6_ra_min_interval", "%d", min_interval);
4764
4765 int mtu = smap_get_int(&op->nbrp->ipv6_ra_configs, "mtu", ND_MTU_DEFAULT);
4766 /* RFC 2460 requires the MTU for IPv6 to be at least 1280 */
4767 if (mtu && mtu >= 1280) {
4768 smap_add_format(&options, "ipv6_ra_mtu", "%d", mtu);
4769 }
4770
4771 struct ds s = DS_EMPTY_INITIALIZER;
4772 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; ++i) {
4773 struct ipv6_netaddr *addrs = &op->lrp_networks.ipv6_addrs[i];
4774 if (in6_is_lla(&addrs->network)) {
4775 smap_add(&options, "ipv6_ra_src_addr", addrs->addr_s);
4776 continue;
4777 }
4778 ds_put_format(&s, "%s/%u ", addrs->network_s, addrs->plen);
4779 }
4780 /* Remove trailing space */
4781 ds_chomp(&s, ' ');
4782 smap_add(&options, "ipv6_ra_prefixes", ds_cstr(&s));
4783 ds_destroy(&s);
4784
4785 smap_add(&options, "ipv6_ra_src_eth", op->lrp_networks.ea_s);
4786
4787 sbrec_port_binding_set_options(op->sb, &options);
4788 smap_destroy(&options);
4789 }
4790
4791 static void
4792 build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
4793 struct hmap *lflows)
4794 {
4795 /* This flow table structure is documented in ovn-northd(8), so please
4796 * update ovn-northd.8.xml if you change anything. */
4797
4798 struct ds match = DS_EMPTY_INITIALIZER;
4799 struct ds actions = DS_EMPTY_INITIALIZER;
4800
4801 /* Logical router ingress table 0: Admission control framework. */
4802 struct ovn_datapath *od;
4803 HMAP_FOR_EACH (od, key_node, datapaths) {
4804 if (!od->nbr) {
4805 continue;
4806 }
4807
4808 /* Logical VLANs not supported.
4809 * Broadcast/multicast source address is invalid. */
4810 ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100,
4811 "vlan.present || eth.src[40]", "drop;");
4812 }
4813
4814 /* Logical router ingress table 0: match (priority 50). */
4815 struct ovn_port *op;
4816 HMAP_FOR_EACH (op, key_node, ports) {
4817 if (!op->nbrp) {
4818 continue;
4819 }
4820
4821 if (!lrport_is_enabled(op->nbrp)) {
4822 /* Drop packets from disabled logical ports (since logical flow
4823 * tables are default-drop). */
4824 continue;
4825 }
4826
4827 if (op->derived) {
4828 /* No ingress packets should be received on a chassisredirect
4829 * port. */
4830 continue;
4831 }
4832
4833 ds_clear(&match);
4834 ds_put_format(&match, "eth.mcast && inport == %s", op->json_key);
4835 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
4836 ds_cstr(&match), "next;");
4837
4838 ds_clear(&match);
4839 ds_put_format(&match, "eth.dst == %s && inport == %s",
4840 op->lrp_networks.ea_s, op->json_key);
4841 if (op->od->l3dgw_port && op == op->od->l3dgw_port
4842 && op->od->l3redirect_port) {
4843 /* Traffic with eth.dst = l3dgw_port->lrp_networks.ea_s
4844 * should only be received on the "redirect-chassis". */
4845 ds_put_format(&match, " && is_chassis_resident(%s)",
4846 op->od->l3redirect_port->json_key);
4847 }
4848 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
4849 ds_cstr(&match), "next;");
4850 }
4851
4852 /* Logical router ingress table 1: IP Input. */
4853 HMAP_FOR_EACH (od, key_node, datapaths) {
4854 if (!od->nbr) {
4855 continue;
4856 }
4857
4858 /* L3 admission control: drop multicast and broadcast source, localhost
4859 * source or destination, and zero network source or destination
4860 * (priority 100). */
4861 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100,
4862 "ip4.mcast || "
4863 "ip4.src == 255.255.255.255 || "
4864 "ip4.src == 127.0.0.0/8 || "
4865 "ip4.dst == 127.0.0.0/8 || "
4866 "ip4.src == 0.0.0.0/8 || "
4867 "ip4.dst == 0.0.0.0/8",
4868 "drop;");
4869
4870 /* ARP reply handling. Use ARP replies to populate the logical
4871 * router's ARP table. */
4872 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "arp.op == 2",
4873 "put_arp(inport, arp.spa, arp.sha);");
4874
4875 /* Drop Ethernet local broadcast. By definition this traffic should
4876 * not be forwarded.*/
4877 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
4878 "eth.bcast", "drop;");
4879
4880 /* TTL discard */
4881 ds_clear(&match);
4882 ds_put_cstr(&match, "ip4 && ip.ttl == {0, 1}");
4883 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30,
4884 ds_cstr(&match), "drop;");
4885
4886 /* ND advertisement handling. Use advertisements to populate
4887 * the logical router's ARP/ND table. */
4888 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "nd_na",
4889 "put_nd(inport, nd.target, nd.tll);");
4890
4891 /* Lean from neighbor solicitations that were not directed at
4892 * us. (A priority-90 flow will respond to requests to us and
4893 * learn the sender's mac address. */
4894 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 80, "nd_ns",
4895 "put_nd(inport, ip6.src, nd.sll);");
4896
4897 /* Pass other traffic not already handled to the next table for
4898 * routing. */
4899 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;");
4900 }
4901
4902 /* Logical router ingress table 1: IP Input for IPv4. */
4903 HMAP_FOR_EACH (op, key_node, ports) {
4904 if (!op->nbrp) {
4905 continue;
4906 }
4907
4908 if (op->derived) {
4909 /* No ingress packets are accepted on a chassisredirect
4910 * port, so no need to program flows for that port. */
4911 continue;
4912 }
4913
4914 if (op->lrp_networks.n_ipv4_addrs) {
4915 /* L3 admission control: drop packets that originate from an
4916 * IPv4 address owned by the router or a broadcast address
4917 * known to the router (priority 100). */
4918 ds_clear(&match);
4919 ds_put_cstr(&match, "ip4.src == ");
4920 op_put_v4_networks(&match, op, true);
4921 ds_put_cstr(&match, " && "REGBIT_EGRESS_LOOPBACK" == 0");
4922 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
4923 ds_cstr(&match), "drop;");
4924
4925 /* ICMP echo reply. These flows reply to ICMP echo requests
4926 * received for the router's IP address. Since packets only
4927 * get here as part of the logical router datapath, the inport
4928 * (i.e. the incoming locally attached net) does not matter.
4929 * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */
4930 ds_clear(&match);
4931 ds_put_cstr(&match, "ip4.dst == ");
4932 op_put_v4_networks(&match, op, false);
4933 ds_put_cstr(&match, " && icmp4.type == 8 && icmp4.code == 0");
4934
4935 ds_clear(&actions);
4936 ds_put_format(&actions,
4937 "ip4.dst <-> ip4.src; "
4938 "ip.ttl = 255; "
4939 "icmp4.type = 0; "
4940 "flags.loopback = 1; "
4941 "next; ");
4942 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
4943 ds_cstr(&match), ds_cstr(&actions));
4944 }
4945
4946 /* ICMP time exceeded */
4947 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
4948 ds_clear(&match);
4949 ds_clear(&actions);
4950
4951 ds_put_format(&match,
4952 "inport == %s && ip4 && "
4953 "ip.ttl == {0, 1} && !ip.later_frag", op->json_key);
4954 ds_put_format(&actions,
4955 "icmp4 {"
4956 "eth.dst <-> eth.src; "
4957 "icmp4.type = 11; /* Time exceeded */ "
4958 "icmp4.code = 0; /* TTL exceeded in transit */ "
4959 "ip4.dst = ip4.src; "
4960 "ip4.src = %s; "
4961 "ip.ttl = 255; "
4962 "next; };",
4963 op->lrp_networks.ipv4_addrs[i].addr_s);
4964 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 40,
4965 ds_cstr(&match), ds_cstr(&actions));
4966 }
4967
4968 /* ARP reply. These flows reply to ARP requests for the router's own
4969 * IP address. */
4970 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
4971 ds_clear(&match);
4972 ds_put_format(&match,
4973 "inport == %s && arp.tpa == %s && arp.op == 1",
4974 op->json_key, op->lrp_networks.ipv4_addrs[i].addr_s);
4975 if (op->od->l3dgw_port && op == op->od->l3dgw_port
4976 && op->od->l3redirect_port) {
4977 /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
4978 * should only be sent from the "redirect-chassis", so that
4979 * upstream MAC learning points to the "redirect-chassis".
4980 * Also need to avoid generation of multiple ARP responses
4981 * from different chassis. */
4982 ds_put_format(&match, " && is_chassis_resident(%s)",
4983 op->od->l3redirect_port->json_key);
4984 }
4985
4986 ds_clear(&actions);
4987 ds_put_format(&actions,
4988 "eth.dst = eth.src; "
4989 "eth.src = %s; "
4990 "arp.op = 2; /* ARP reply */ "
4991 "arp.tha = arp.sha; "
4992 "arp.sha = %s; "
4993 "arp.tpa = arp.spa; "
4994 "arp.spa = %s; "
4995 "outport = %s; "
4996 "flags.loopback = 1; "
4997 "output;",
4998 op->lrp_networks.ea_s,
4999 op->lrp_networks.ea_s,
5000 op->lrp_networks.ipv4_addrs[i].addr_s,
5001 op->json_key);
5002 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
5003 ds_cstr(&match), ds_cstr(&actions));
5004 }
5005
5006 /* A set to hold all load-balancer vips that need ARP responses. */
5007 struct sset all_ips = SSET_INITIALIZER(&all_ips);
5008 int addr_family;
5009 get_router_load_balancer_ips(op->od, &all_ips, &addr_family);
5010
5011 const char *ip_address;
5012 SSET_FOR_EACH(ip_address, &all_ips) {
5013 ds_clear(&match);
5014 if (addr_family == AF_INET) {
5015 ds_put_format(&match,
5016 "inport == %s && arp.tpa == %s && arp.op == 1",
5017 op->json_key, ip_address);
5018 } else {
5019 ds_put_format(&match,
5020 "inport == %s && nd_ns && nd.target == %s",
5021 op->json_key, ip_address);
5022 }
5023
5024 ds_clear(&actions);
5025 if (addr_family == AF_INET) {
5026 ds_put_format(&actions,
5027 "eth.dst = eth.src; "
5028 "eth.src = %s; "
5029 "arp.op = 2; /* ARP reply */ "
5030 "arp.tha = arp.sha; "
5031 "arp.sha = %s; "
5032 "arp.tpa = arp.spa; "
5033 "arp.spa = %s; "
5034 "outport = %s; "
5035 "flags.loopback = 1; "
5036 "output;",
5037 op->lrp_networks.ea_s,
5038 op->lrp_networks.ea_s,
5039 ip_address,
5040 op->json_key);
5041 } else {
5042 ds_put_format(&actions,
5043 "nd_na { "
5044 "eth.src = %s; "
5045 "ip6.src = %s; "
5046 "nd.target = %s; "
5047 "nd.tll = %s; "
5048 "outport = inport; "
5049 "flags.loopback = 1; "
5050 "output; "
5051 "};",
5052 op->lrp_networks.ea_s,
5053 ip_address,
5054 ip_address,
5055 op->lrp_networks.ea_s);
5056 }
5057 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
5058 ds_cstr(&match), ds_cstr(&actions));
5059 }
5060
5061 sset_destroy(&all_ips);
5062
5063 /* A gateway router can have 2 SNAT IP addresses to force DNATed and
5064 * LBed traffic respectively to be SNATed. In addition, there can be
5065 * a number of SNAT rules in the NAT table. */
5066 ovs_be32 *snat_ips = xmalloc(sizeof *snat_ips *
5067 (op->od->nbr->n_nat + 2));
5068 size_t n_snat_ips = 0;
5069
5070 ovs_be32 snat_ip;
5071 const char *dnat_force_snat_ip = get_force_snat_ip(op->od, "dnat",
5072 &snat_ip);
5073 if (dnat_force_snat_ip) {
5074 snat_ips[n_snat_ips++] = snat_ip;
5075 }
5076
5077 const char *lb_force_snat_ip = get_force_snat_ip(op->od, "lb",
5078 &snat_ip);
5079 if (lb_force_snat_ip) {
5080 snat_ips[n_snat_ips++] = snat_ip;
5081 }
5082
5083 for (int i = 0; i < op->od->nbr->n_nat; i++) {
5084 const struct nbrec_nat *nat;
5085
5086 nat = op->od->nbr->nat[i];
5087
5088 ovs_be32 ip;
5089 if (!ip_parse(nat->external_ip, &ip) || !ip) {
5090 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
5091 VLOG_WARN_RL(&rl, "bad ip address %s in nat configuration "
5092 "for router %s", nat->external_ip, op->key);
5093 continue;
5094 }
5095
5096 if (!strcmp(nat->type, "snat")) {
5097 snat_ips[n_snat_ips++] = ip;
5098 continue;
5099 }
5100
5101 /* ARP handling for external IP addresses.
5102 *
5103 * DNAT IP addresses are external IP addresses that need ARP
5104 * handling. */
5105 ds_clear(&match);
5106 ds_put_format(&match,
5107 "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1",
5108 op->json_key, IP_ARGS(ip));
5109
5110 ds_clear(&actions);
5111 ds_put_format(&actions,
5112 "eth.dst = eth.src; "
5113 "arp.op = 2; /* ARP reply */ "
5114 "arp.tha = arp.sha; ");
5115
5116 if (op->od->l3dgw_port && op == op->od->l3dgw_port) {
5117 struct eth_addr mac;
5118 if (nat->external_mac &&
5119 eth_addr_from_string(nat->external_mac, &mac)
5120 && nat->logical_port) {
5121 /* distributed NAT case, use nat->external_mac */
5122 ds_put_format(&actions,
5123 "eth.src = "ETH_ADDR_FMT"; "
5124 "arp.sha = "ETH_ADDR_FMT"; ",
5125 ETH_ADDR_ARGS(mac),
5126 ETH_ADDR_ARGS(mac));
5127 /* Traffic with eth.src = nat->external_mac should only be
5128 * sent from the chassis where nat->logical_port is
5129 * resident, so that upstream MAC learning points to the
5130 * correct chassis. Also need to avoid generation of
5131 * multiple ARP responses from different chassis. */
5132 ds_put_format(&match, " && is_chassis_resident(\"%s\")",
5133 nat->logical_port);
5134 } else {
5135 ds_put_format(&actions,
5136 "eth.src = %s; "
5137 "arp.sha = %s; ",
5138 op->lrp_networks.ea_s,
5139 op->lrp_networks.ea_s);
5140 /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
5141 * should only be sent from the "redirect-chassis", so that
5142 * upstream MAC learning points to the "redirect-chassis".
5143 * Also need to avoid generation of multiple ARP responses
5144 * from different chassis. */
5145 if (op->od->l3redirect_port) {
5146 ds_put_format(&match, " && is_chassis_resident(%s)",
5147 op->od->l3redirect_port->json_key);
5148 }
5149 }
5150 } else {
5151 ds_put_format(&actions,
5152 "eth.src = %s; "
5153 "arp.sha = %s; ",
5154 op->lrp_networks.ea_s,
5155 op->lrp_networks.ea_s);
5156 }
5157 ds_put_format(&actions,
5158 "arp.tpa = arp.spa; "
5159 "arp.spa = "IP_FMT"; "
5160 "outport = %s; "
5161 "flags.loopback = 1; "
5162 "output;",
5163 IP_ARGS(ip),
5164 op->json_key);
5165 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
5166 ds_cstr(&match), ds_cstr(&actions));
5167 }
5168
5169 if (!smap_get(&op->od->nbr->options, "chassis")
5170 && !op->od->l3dgw_port) {
5171 /* UDP/TCP port unreachable. */
5172 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
5173 ds_clear(&match);
5174 ds_put_format(&match,
5175 "ip4 && ip4.dst == %s && !ip.later_frag && udp",
5176 op->lrp_networks.ipv4_addrs[i].addr_s);
5177 const char *action = "icmp4 {"
5178 "eth.dst <-> eth.src; "
5179 "ip4.dst <-> ip4.src; "
5180 "ip.ttl = 255; "
5181 "icmp4.type = 3; "
5182 "icmp4.code = 3; "
5183 "next; };";
5184 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 80,
5185 ds_cstr(&match), action);
5186
5187 ds_clear(&match);
5188 ds_put_format(&match,
5189 "ip4 && ip4.dst == %s && !ip.later_frag && tcp",
5190 op->lrp_networks.ipv4_addrs[i].addr_s);
5191 action = "tcp_reset {"
5192 "eth.dst <-> eth.src; "
5193 "ip4.dst <-> ip4.src; "
5194 "next; };";
5195 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 80,
5196 ds_cstr(&match), action);
5197
5198 ds_clear(&match);
5199 ds_put_format(&match,
5200 "ip4 && ip4.dst == %s && !ip.later_frag",
5201 op->lrp_networks.ipv4_addrs[i].addr_s);
5202 action = "icmp4 {"
5203 "eth.dst <-> eth.src; "
5204 "ip4.dst <-> ip4.src; "
5205 "ip.ttl = 255; "
5206 "icmp4.type = 3; "
5207 "icmp4.code = 2; "
5208 "next; };";
5209 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 70,
5210 ds_cstr(&match), action);
5211 }
5212 }
5213
5214 ds_clear(&match);
5215 ds_put_cstr(&match, "ip4.dst == {");
5216 bool has_drop_ips = false;
5217 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
5218 bool snat_ip_is_router_ip = false;
5219 for (int j = 0; j < n_snat_ips; j++) {
5220 /* Packets to SNAT IPs should not be dropped. */
5221 if (op->lrp_networks.ipv4_addrs[i].addr == snat_ips[j]) {
5222 snat_ip_is_router_ip = true;
5223 break;
5224 }
5225 }
5226 if (snat_ip_is_router_ip) {
5227 continue;
5228 }
5229 ds_put_format(&match, "%s, ",
5230 op->lrp_networks.ipv4_addrs[i].addr_s);
5231 has_drop_ips = true;
5232 }
5233 ds_chomp(&match, ' ');
5234 ds_chomp(&match, ',');
5235 ds_put_cstr(&match, "}");
5236
5237 if (has_drop_ips) {
5238 /* Drop IP traffic to this router. */
5239 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
5240 ds_cstr(&match), "drop;");
5241 }
5242
5243 free(snat_ips);
5244 }
5245
5246 /* Logical router ingress table 1: IP Input for IPv6. */
5247 HMAP_FOR_EACH (op, key_node, ports) {
5248 if (!op->nbrp) {
5249 continue;
5250 }
5251
5252 if (op->derived) {
5253 /* No ingress packets are accepted on a chassisredirect
5254 * port, so no need to program flows for that port. */
5255 continue;
5256 }
5257
5258 if (op->lrp_networks.n_ipv6_addrs) {
5259 /* L3 admission control: drop packets that originate from an
5260 * IPv6 address owned by the router (priority 100). */
5261 ds_clear(&match);
5262 ds_put_cstr(&match, "ip6.src == ");
5263 op_put_v6_networks(&match, op);
5264 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
5265 ds_cstr(&match), "drop;");
5266
5267 /* ICMPv6 echo reply. These flows reply to echo requests
5268 * received for the router's IP address. */
5269 ds_clear(&match);
5270 ds_put_cstr(&match, "ip6.dst == ");
5271 op_put_v6_networks(&match, op);
5272 ds_put_cstr(&match, " && icmp6.type == 128 && icmp6.code == 0");
5273
5274 ds_clear(&actions);
5275 ds_put_cstr(&actions,
5276 "ip6.dst <-> ip6.src; "
5277 "ip.ttl = 255; "
5278 "icmp6.type = 129; "
5279 "flags.loopback = 1; "
5280 "next; ");
5281 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
5282 ds_cstr(&match), ds_cstr(&actions));
5283
5284 /* Drop IPv6 traffic to this router. */
5285 ds_clear(&match);
5286 ds_put_cstr(&match, "ip6.dst == ");
5287 op_put_v6_networks(&match, op);
5288 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
5289 ds_cstr(&match), "drop;");
5290 }
5291
5292 /* ND reply. These flows reply to ND solicitations for the
5293 * router's own IP address. */
5294 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
5295 ds_clear(&match);
5296 ds_put_format(&match,
5297 "inport == %s && nd_ns && ip6.dst == {%s, %s} "
5298 "&& nd.target == %s",
5299 op->json_key,
5300 op->lrp_networks.ipv6_addrs[i].addr_s,
5301 op->lrp_networks.ipv6_addrs[i].sn_addr_s,
5302 op->lrp_networks.ipv6_addrs[i].addr_s);
5303 if (op->od->l3dgw_port && op == op->od->l3dgw_port
5304 && op->od->l3redirect_port) {
5305 /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
5306 * should only be sent from the "redirect-chassis", so that
5307 * upstream MAC learning points to the "redirect-chassis".
5308 * Also need to avoid generation of multiple ND replies
5309 * from different chassis. */
5310 ds_put_format(&match, " && is_chassis_resident(%s)",
5311 op->od->l3redirect_port->json_key);
5312 }
5313
5314 ds_clear(&actions);
5315 ds_put_format(&actions,
5316 "put_nd(inport, ip6.src, nd.sll); "
5317 "nd_na_router { "
5318 "eth.src = %s; "
5319 "ip6.src = %s; "
5320 "nd.target = %s; "
5321 "nd.tll = %s; "
5322 "outport = inport; "
5323 "flags.loopback = 1; "
5324 "output; "
5325 "};",
5326 op->lrp_networks.ea_s,
5327 op->lrp_networks.ipv6_addrs[i].addr_s,
5328 op->lrp_networks.ipv6_addrs[i].addr_s,
5329 op->lrp_networks.ea_s);
5330 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
5331 ds_cstr(&match), ds_cstr(&actions));
5332 }
5333
5334 /* UDP/TCP port unreachable */
5335 if (!smap_get(&op->od->nbr->options, "chassis")
5336 && !op->od->l3dgw_port) {
5337 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
5338 ds_clear(&match);
5339 ds_put_format(&match,
5340 "ip6 && ip6.dst == %s && !ip.later_frag && tcp",
5341 op->lrp_networks.ipv6_addrs[i].addr_s);
5342 const char *action = "tcp_reset {"
5343 "eth.dst <-> eth.src; "
5344 "ip6.dst <-> ip6.src; "
5345 "next; };";
5346 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 80,
5347 ds_cstr(&match), action);
5348
5349 ds_clear(&match);
5350 ds_put_format(&match,
5351 "ip6 && ip6.dst == %s && !ip.later_frag && udp",
5352 op->lrp_networks.ipv6_addrs[i].addr_s);
5353 action = "icmp6 {"
5354 "eth.dst <-> eth.src; "
5355 "ip6.dst <-> ip6.src; "
5356 "ip.ttl = 255; "
5357 "icmp6.type = 1; "
5358 "icmp6.code = 4; "
5359 "next; };";
5360 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 80,
5361 ds_cstr(&match), action);
5362
5363 ds_clear(&match);
5364 ds_put_format(&match,
5365 "ip6 && ip6.dst == %s && !ip.later_frag",
5366 op->lrp_networks.ipv6_addrs[i].addr_s);
5367 action = "icmp6 {"
5368 "eth.dst <-> eth.src; "
5369 "ip6.dst <-> ip6.src; "
5370 "ip.ttl = 255; "
5371 "icmp6.type = 1; "
5372 "icmp6.code = 3; "
5373 "next; };";
5374 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 70,
5375 ds_cstr(&match), action);
5376 }
5377 }
5378
5379 /* ICMPv6 time exceeded */
5380 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
5381 /* skip link-local address */
5382 if (in6_is_lla(&op->lrp_networks.ipv6_addrs[i].network)) {
5383 continue;
5384 }
5385
5386 ds_clear(&match);
5387 ds_clear(&actions);
5388
5389 ds_put_format(&match,
5390 "inport == %s && ip6 && "
5391 "ip6.src == %s/%d && "
5392 "ip.ttl == {0, 1} && !ip.later_frag",
5393 op->json_key,
5394 op->lrp_networks.ipv6_addrs[i].network_s,
5395 op->lrp_networks.ipv6_addrs[i].plen);
5396 ds_put_format(&actions,
5397 "icmp6 {"
5398 "eth.dst <-> eth.src; "
5399 "ip6.dst = ip6.src; "
5400 "ip6.src = %s; "
5401 "ip.ttl = 255; "
5402 "icmp6.type = 3; /* Time exceeded */ "
5403 "icmp6.code = 0; /* TTL exceeded in transit */ "
5404 "next; };",
5405 op->lrp_networks.ipv6_addrs[i].addr_s);
5406 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 40,
5407 ds_cstr(&match), ds_cstr(&actions));
5408 }
5409 }
5410
5411 /* NAT, Defrag and load balancing. */
5412 HMAP_FOR_EACH (od, key_node, datapaths) {
5413 if (!od->nbr) {
5414 continue;
5415 }
5416
5417 /* Packets are allowed by default. */
5418 ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG, 0, "1", "next;");
5419 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;");
5420 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;");
5421 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;");
5422 ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 0, "1", "next;");
5423 ovn_lflow_add(lflows, od, S_ROUTER_OUT_EGR_LOOP, 0, "1", "next;");
5424
5425 /* NAT rules are only valid on Gateway routers and routers with
5426 * l3dgw_port (router has a port with "redirect-chassis"
5427 * specified). */
5428 if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) {
5429 continue;
5430 }
5431
5432 ovs_be32 snat_ip;
5433 const char *dnat_force_snat_ip = get_force_snat_ip(od, "dnat",
5434 &snat_ip);
5435 const char *lb_force_snat_ip = get_force_snat_ip(od, "lb",
5436 &snat_ip);
5437
5438 for (int i = 0; i < od->nbr->n_nat; i++) {
5439 const struct nbrec_nat *nat;
5440
5441 nat = od->nbr->nat[i];
5442
5443 ovs_be32 ip, mask;
5444
5445 char *error = ip_parse_masked(nat->external_ip, &ip, &mask);
5446 if (error || mask != OVS_BE32_MAX) {
5447 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
5448 VLOG_WARN_RL(&rl, "bad external ip %s for nat",
5449 nat->external_ip);
5450 free(error);
5451 continue;
5452 }
5453
5454 /* Check the validity of nat->logical_ip. 'logical_ip' can
5455 * be a subnet when the type is "snat". */
5456 error = ip_parse_masked(nat->logical_ip, &ip, &mask);
5457 if (!strcmp(nat->type, "snat")) {
5458 if (error) {
5459 static struct vlog_rate_limit rl =
5460 VLOG_RATE_LIMIT_INIT(5, 1);
5461 VLOG_WARN_RL(&rl, "bad ip network or ip %s for snat "
5462 "in router "UUID_FMT"",
5463 nat->logical_ip, UUID_ARGS(&od->key));
5464 free(error);
5465 continue;
5466 }
5467 } else {
5468 if (error || mask != OVS_BE32_MAX) {
5469 static struct vlog_rate_limit rl =
5470 VLOG_RATE_LIMIT_INIT(5, 1);
5471 VLOG_WARN_RL(&rl, "bad ip %s for dnat in router "
5472 ""UUID_FMT"", nat->logical_ip, UUID_ARGS(&od->key));
5473 free(error);
5474 continue;
5475 }
5476 }
5477
5478 /* For distributed router NAT, determine whether this NAT rule
5479 * satisfies the conditions for distributed NAT processing. */
5480 bool distributed = false;
5481 struct eth_addr mac;
5482 if (od->l3dgw_port && !strcmp(nat->type, "dnat_and_snat") &&
5483 nat->logical_port && nat->external_mac) {
5484 if (eth_addr_from_string(nat->external_mac, &mac)) {
5485 distributed = true;
5486 } else {
5487 static struct vlog_rate_limit rl =
5488 VLOG_RATE_LIMIT_INIT(5, 1);
5489 VLOG_WARN_RL(&rl, "bad mac %s for dnat in router "
5490 ""UUID_FMT"", nat->external_mac, UUID_ARGS(&od->key));
5491 continue;
5492 }
5493 }
5494
5495 /* Ingress UNSNAT table: It is for already established connections'
5496 * reverse traffic. i.e., SNAT has already been done in egress
5497 * pipeline and now the packet has entered the ingress pipeline as
5498 * part of a reply. We undo the SNAT here.
5499 *
5500 * Undoing SNAT has to happen before DNAT processing. This is
5501 * because when the packet was DNATed in ingress pipeline, it did
5502 * not know about the possibility of eventual additional SNAT in
5503 * egress pipeline. */
5504 if (!strcmp(nat->type, "snat")
5505 || !strcmp(nat->type, "dnat_and_snat")) {
5506 if (!od->l3dgw_port) {
5507 /* Gateway router. */
5508 ds_clear(&match);
5509 ds_put_format(&match, "ip && ip4.dst == %s",
5510 nat->external_ip);
5511 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 90,
5512 ds_cstr(&match), "ct_snat;");
5513 } else {
5514 /* Distributed router. */
5515
5516 /* Traffic received on l3dgw_port is subject to NAT. */
5517 ds_clear(&match);
5518 ds_put_format(&match, "ip && ip4.dst == %s"
5519 " && inport == %s",
5520 nat->external_ip,
5521 od->l3dgw_port->json_key);
5522 if (!distributed && od->l3redirect_port) {
5523 /* Flows for NAT rules that are centralized are only
5524 * programmed on the "redirect-chassis". */
5525 ds_put_format(&match, " && is_chassis_resident(%s)",
5526 od->l3redirect_port->json_key);
5527 }
5528 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 100,
5529 ds_cstr(&match), "ct_snat;");
5530
5531 /* Traffic received on other router ports must be
5532 * redirected to the central instance of the l3dgw_port
5533 * for NAT processing. */
5534 ds_clear(&match);
5535 ds_put_format(&match, "ip && ip4.dst == %s",
5536 nat->external_ip);
5537 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 50,
5538 ds_cstr(&match),
5539 REGBIT_NAT_REDIRECT" = 1; next;");
5540 }
5541 }
5542
5543 /* Ingress DNAT table: Packets enter the pipeline with destination
5544 * IP address that needs to be DNATted from a external IP address
5545 * to a logical IP address. */
5546 if (!strcmp(nat->type, "dnat")
5547 || !strcmp(nat->type, "dnat_and_snat")) {
5548 if (!od->l3dgw_port) {
5549 /* Gateway router. */
5550 /* Packet when it goes from the initiator to destination.
5551 * We need to set flags.loopback because the router can
5552 * send the packet back through the same interface. */
5553 ds_clear(&match);
5554 ds_put_format(&match, "ip && ip4.dst == %s",
5555 nat->external_ip);
5556 ds_clear(&actions);
5557 if (dnat_force_snat_ip) {
5558 /* Indicate to the future tables that a DNAT has taken
5559 * place and a force SNAT needs to be done in the
5560 * Egress SNAT table. */
5561 ds_put_format(&actions,
5562 "flags.force_snat_for_dnat = 1; ");
5563 }
5564 ds_put_format(&actions, "flags.loopback = 1; ct_dnat(%s);",
5565 nat->logical_ip);
5566 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100,
5567 ds_cstr(&match), ds_cstr(&actions));
5568 } else {
5569 /* Distributed router. */
5570
5571 /* Traffic received on l3dgw_port is subject to NAT. */
5572 ds_clear(&match);
5573 ds_put_format(&match, "ip && ip4.dst == %s"
5574 " && inport == %s",
5575 nat->external_ip,
5576 od->l3dgw_port->json_key);
5577 if (!distributed && od->l3redirect_port) {
5578 /* Flows for NAT rules that are centralized are only
5579 * programmed on the "redirect-chassis". */
5580 ds_put_format(&match, " && is_chassis_resident(%s)",
5581 od->l3redirect_port->json_key);
5582 }
5583 ds_clear(&actions);
5584 ds_put_format(&actions, "ct_dnat(%s);",
5585 nat->logical_ip);
5586 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100,
5587 ds_cstr(&match), ds_cstr(&actions));
5588
5589 /* Traffic received on other router ports must be
5590 * redirected to the central instance of the l3dgw_port
5591 * for NAT processing. */
5592 ds_clear(&match);
5593 ds_put_format(&match, "ip && ip4.dst == %s",
5594 nat->external_ip);
5595 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50,
5596 ds_cstr(&match),
5597 REGBIT_NAT_REDIRECT" = 1; next;");
5598 }
5599 }
5600
5601 /* Egress UNDNAT table: It is for already established connections'
5602 * reverse traffic. i.e., DNAT has already been done in ingress
5603 * pipeline and now the packet has entered the egress pipeline as
5604 * part of a reply. We undo the DNAT here.
5605 *
5606 * Note that this only applies for NAT on a distributed router.
5607 * Undo DNAT on a gateway router is done in the ingress DNAT
5608 * pipeline stage. */
5609 if (od->l3dgw_port && (!strcmp(nat->type, "dnat")
5610 || !strcmp(nat->type, "dnat_and_snat"))) {
5611 ds_clear(&match);
5612 ds_put_format(&match, "ip && ip4.src == %s"
5613 " && outport == %s",
5614 nat->logical_ip,
5615 od->l3dgw_port->json_key);
5616 if (!distributed && od->l3redirect_port) {
5617 /* Flows for NAT rules that are centralized are only
5618 * programmed on the "redirect-chassis". */
5619 ds_put_format(&match, " && is_chassis_resident(%s)",
5620 od->l3redirect_port->json_key);
5621 }
5622 ds_clear(&actions);
5623 if (distributed) {
5624 ds_put_format(&actions, "eth.src = "ETH_ADDR_FMT"; ",
5625 ETH_ADDR_ARGS(mac));
5626 }
5627 ds_put_format(&actions, "ct_dnat;");
5628 ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 100,
5629 ds_cstr(&match), ds_cstr(&actions));
5630 }
5631
5632 /* Egress SNAT table: Packets enter the egress pipeline with
5633 * source ip address that needs to be SNATted to a external ip
5634 * address. */
5635 if (!strcmp(nat->type, "snat")
5636 || !strcmp(nat->type, "dnat_and_snat")) {
5637 if (!od->l3dgw_port) {
5638 /* Gateway router. */
5639 ds_clear(&match);
5640 ds_put_format(&match, "ip && ip4.src == %s",
5641 nat->logical_ip);
5642 ds_clear(&actions);
5643 ds_put_format(&actions, "ct_snat(%s);", nat->external_ip);
5644
5645 /* The priority here is calculated such that the
5646 * nat->logical_ip with the longest mask gets a higher
5647 * priority. */
5648 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT,
5649 count_1bits(ntohl(mask)) + 1,
5650 ds_cstr(&match), ds_cstr(&actions));
5651 } else {
5652 /* Distributed router. */
5653 ds_clear(&match);
5654 ds_put_format(&match, "ip && ip4.src == %s"
5655 " && outport == %s",
5656 nat->logical_ip,
5657 od->l3dgw_port->json_key);
5658 if (!distributed && od->l3redirect_port) {
5659 /* Flows for NAT rules that are centralized are only
5660 * programmed on the "redirect-chassis". */
5661 ds_put_format(&match, " && is_chassis_resident(%s)",
5662 od->l3redirect_port->json_key);
5663 }
5664 ds_clear(&actions);
5665 if (distributed) {
5666 ds_put_format(&actions, "eth.src = "ETH_ADDR_FMT"; ",
5667 ETH_ADDR_ARGS(mac));
5668 }
5669 ds_put_format(&actions, "ct_snat(%s);", nat->external_ip);
5670
5671 /* The priority here is calculated such that the
5672 * nat->logical_ip with the longest mask gets a higher
5673 * priority. */
5674 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT,
5675 count_1bits(ntohl(mask)) + 1,
5676 ds_cstr(&match), ds_cstr(&actions));
5677 }
5678 }
5679
5680 /* Logical router ingress table 0:
5681 * For NAT on a distributed router, add rules allowing
5682 * ingress traffic with eth.dst matching nat->external_mac
5683 * on the l3dgw_port instance where nat->logical_port is
5684 * resident. */
5685 if (distributed) {
5686 ds_clear(&match);
5687 ds_put_format(&match,
5688 "eth.dst == "ETH_ADDR_FMT" && inport == %s"
5689 " && is_chassis_resident(\"%s\")",
5690 ETH_ADDR_ARGS(mac),
5691 od->l3dgw_port->json_key,
5692 nat->logical_port);
5693 ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 50,
5694 ds_cstr(&match), "next;");
5695 }
5696
5697 /* Ingress Gateway Redirect Table: For NAT on a distributed
5698 * router, add flows that are specific to a NAT rule. These
5699 * flows indicate the presence of an applicable NAT rule that
5700 * can be applied in a distributed manner. */
5701 if (distributed) {
5702 ds_clear(&match);
5703 ds_put_format(&match, "ip4.src == %s && outport == %s",
5704 nat->logical_ip,
5705 od->l3dgw_port->json_key);
5706 ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 100,
5707 ds_cstr(&match), "next;");
5708 }
5709
5710 /* Egress Loopback table: For NAT on a distributed router.
5711 * If packets in the egress pipeline on the distributed
5712 * gateway port have ip.dst matching a NAT external IP, then
5713 * loop a clone of the packet back to the beginning of the
5714 * ingress pipeline with inport = outport. */
5715 if (od->l3dgw_port) {
5716 /* Distributed router. */
5717 ds_clear(&match);
5718 ds_put_format(&match, "ip4.dst == %s && outport == %s",
5719 nat->external_ip,
5720 od->l3dgw_port->json_key);
5721 ds_clear(&actions);
5722 ds_put_format(&actions,
5723 "clone { ct_clear; "
5724 "inport = outport; outport = \"\"; "
5725 "flags = 0; flags.loopback = 1; ");
5726 for (int j = 0; j < MFF_N_LOG_REGS; j++) {
5727 ds_put_format(&actions, "reg%d = 0; ", j);
5728 }
5729 ds_put_format(&actions, REGBIT_EGRESS_LOOPBACK" = 1; "
5730 "next(pipeline=ingress, table=0); };");
5731 ovn_lflow_add(lflows, od, S_ROUTER_OUT_EGR_LOOP, 100,
5732 ds_cstr(&match), ds_cstr(&actions));
5733 }
5734 }
5735
5736 /* Handle force SNAT options set in the gateway router. */
5737 if (dnat_force_snat_ip && !od->l3dgw_port) {
5738 /* If a packet with destination IP address as that of the
5739 * gateway router (as set in options:dnat_force_snat_ip) is seen,
5740 * UNSNAT it. */
5741 ds_clear(&match);
5742 ds_put_format(&match, "ip && ip4.dst == %s", dnat_force_snat_ip);
5743 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 110,
5744 ds_cstr(&match), "ct_snat;");
5745
5746 /* Higher priority rules to force SNAT with the IP addresses
5747 * configured in the Gateway router. This only takes effect
5748 * when the packet has already been DNATed once. */
5749 ds_clear(&match);
5750 ds_put_format(&match, "flags.force_snat_for_dnat == 1 && ip");
5751 ds_clear(&actions);
5752 ds_put_format(&actions, "ct_snat(%s);", dnat_force_snat_ip);
5753 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 100,
5754 ds_cstr(&match), ds_cstr(&actions));
5755 }
5756 if (lb_force_snat_ip && !od->l3dgw_port) {
5757 /* If a packet with destination IP address as that of the
5758 * gateway router (as set in options:lb_force_snat_ip) is seen,
5759 * UNSNAT it. */
5760 ds_clear(&match);
5761 ds_put_format(&match, "ip && ip4.dst == %s", lb_force_snat_ip);
5762 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 100,
5763 ds_cstr(&match), "ct_snat;");
5764
5765 /* Load balanced traffic will have flags.force_snat_for_lb set.
5766 * Force SNAT it. */
5767 ds_clear(&match);
5768 ds_put_format(&match, "flags.force_snat_for_lb == 1 && ip");
5769 ds_clear(&actions);
5770 ds_put_format(&actions, "ct_snat(%s);", lb_force_snat_ip);
5771 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 100,
5772 ds_cstr(&match), ds_cstr(&actions));
5773 }
5774
5775 if (!od->l3dgw_port) {
5776 /* For gateway router, re-circulate every packet through
5777 * the DNAT zone. This helps with the following.
5778 *
5779 * Any packet that needs to be unDNATed in the reverse
5780 * direction gets unDNATed. Ideally this could be done in
5781 * the egress pipeline. But since the gateway router
5782 * does not have any feature that depends on the source
5783 * ip address being external IP address for IP routing,
5784 * we can do it here, saving a future re-circulation. */
5785 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50,
5786 "ip", "flags.loopback = 1; ct_dnat;");
5787 } else {
5788 /* For NAT on a distributed router, add flows to Ingress
5789 * IP Routing table, Ingress ARP Resolution table, and
5790 * Ingress Gateway Redirect Table that are not specific to a
5791 * NAT rule. */
5792
5793 /* The highest priority IN_IP_ROUTING rule matches packets
5794 * with REGBIT_NAT_REDIRECT (set in DNAT or UNSNAT stages),
5795 * with action "ip.ttl--; next;". The IN_GW_REDIRECT table
5796 * will take care of setting the outport. */
5797 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 300,
5798 REGBIT_NAT_REDIRECT" == 1", "ip.ttl--; next;");
5799
5800 /* The highest priority IN_ARP_RESOLVE rule matches packets
5801 * with REGBIT_NAT_REDIRECT (set in DNAT or UNSNAT stages),
5802 * then sets eth.dst to the distributed gateway port's
5803 * ethernet address. */
5804 ds_clear(&actions);
5805 ds_put_format(&actions, "eth.dst = %s; next;",
5806 od->l3dgw_port->lrp_networks.ea_s);
5807 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 200,
5808 REGBIT_NAT_REDIRECT" == 1", ds_cstr(&actions));
5809
5810 /* The highest priority IN_GW_REDIRECT rule redirects packets
5811 * with REGBIT_NAT_REDIRECT (set in DNAT or UNSNAT stages) to
5812 * the central instance of the l3dgw_port for NAT processing. */
5813 ds_clear(&actions);
5814 ds_put_format(&actions, "outport = %s; next;",
5815 od->l3redirect_port->json_key);
5816 ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 200,
5817 REGBIT_NAT_REDIRECT" == 1", ds_cstr(&actions));
5818 }
5819
5820 /* Load balancing and packet defrag are only valid on
5821 * Gateway routers or router with gateway port. */
5822 if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) {
5823 continue;
5824 }
5825
5826 /* A set to hold all ips that need defragmentation and tracking. */
5827 struct sset all_ips = SSET_INITIALIZER(&all_ips);
5828
5829 for (int i = 0; i < od->nbr->n_load_balancer; i++) {
5830 struct nbrec_load_balancer *lb = od->nbr->load_balancer[i];
5831 struct smap *vips = &lb->vips;
5832 struct smap_node *node;
5833
5834 SMAP_FOR_EACH (node, vips) {
5835 uint16_t port = 0;
5836 int addr_family;
5837
5838 /* node->key contains IP:port or just IP. */
5839 char *ip_address = NULL;
5840 ip_address_and_port_from_lb_key(node->key, &ip_address, &port,
5841 &addr_family);
5842 if (!ip_address) {
5843 continue;
5844 }
5845
5846 if (!sset_contains(&all_ips, ip_address)) {
5847 sset_add(&all_ips, ip_address);
5848 /* If there are any load balancing rules, we should send
5849 * the packet to conntrack for defragmentation and
5850 * tracking. This helps with two things.
5851 *
5852 * 1. With tracking, we can send only new connections to
5853 * pick a DNAT ip address from a group.
5854 * 2. If there are L4 ports in load balancing rules, we
5855 * need the defragmentation to match on L4 ports. */
5856 ds_clear(&match);
5857 if (addr_family == AF_INET) {
5858 ds_put_format(&match, "ip && ip4.dst == %s",
5859 ip_address);
5860 } else {
5861 ds_put_format(&match, "ip && ip6.dst == %s",
5862 ip_address);
5863 }
5864 ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG,
5865 100, ds_cstr(&match), "ct_next;");
5866 }
5867
5868 /* Higher priority rules are added for load-balancing in DNAT
5869 * table. For every match (on a VIP[:port]), we add two flows
5870 * via add_router_lb_flow(). One flow is for specific matching
5871 * on ct.new with an action of "ct_lb($targets);". The other
5872 * flow is for ct.est with an action of "ct_dnat;". */
5873 ds_clear(&actions);
5874 ds_put_format(&actions, "ct_lb(%s);", node->value);
5875
5876 ds_clear(&match);
5877 if (addr_family == AF_INET) {
5878 ds_put_format(&match, "ip && ip4.dst == %s",
5879 ip_address);
5880 } else {
5881 ds_put_format(&match, "ip && ip6.dst == %s",
5882 ip_address);
5883 }
5884 free(ip_address);
5885
5886 int prio = 110;
5887 bool is_udp = lb->protocol && !strcmp(lb->protocol, "udp") ?
5888 true : false;
5889 if (port) {
5890 if (is_udp) {
5891 ds_put_format(&match, " && udp && udp.dst == %d",
5892 port);
5893 } else {
5894 ds_put_format(&match, " && tcp && tcp.dst == %d",
5895 port);
5896 }
5897 prio = 120;
5898 }
5899
5900 if (od->l3redirect_port) {
5901 ds_put_format(&match, " && is_chassis_resident(%s)",
5902 od->l3redirect_port->json_key);
5903 }
5904 add_router_lb_flow(lflows, od, &match, &actions, prio,
5905 lb_force_snat_ip, node->value, is_udp,
5906 addr_family);
5907 }
5908 }
5909 sset_destroy(&all_ips);
5910 }
5911
5912 /* Logical router ingress table 5 and 6: IPv6 Router Adv (RA) options and
5913 * response. */
5914 HMAP_FOR_EACH (op, key_node, ports) {
5915 if (!op->nbrp || op->nbrp->peer || !op->peer) {
5916 continue;
5917 }
5918
5919 if (!op->lrp_networks.n_ipv6_addrs) {
5920 continue;
5921 }
5922
5923 const char *address_mode = smap_get(
5924 &op->nbrp->ipv6_ra_configs, "address_mode");
5925
5926 if (!address_mode) {
5927 continue;
5928 }
5929 if (strcmp(address_mode, "slaac") &&
5930 strcmp(address_mode, "dhcpv6_stateful") &&
5931 strcmp(address_mode, "dhcpv6_stateless")) {
5932 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
5933 VLOG_WARN_RL(&rl, "Invalid address mode [%s] defined",
5934 address_mode);
5935 continue;
5936 }
5937
5938 if (smap_get_bool(&op->nbrp->ipv6_ra_configs, "send_periodic",
5939 false)) {
5940 copy_ra_to_sb(op, address_mode);
5941 }
5942
5943 ds_clear(&match);
5944 ds_put_format(&match, "inport == %s && ip6.dst == ff02::2 && nd_rs",
5945 op->json_key);
5946 ds_clear(&actions);
5947
5948 const char *mtu_s = smap_get(
5949 &op->nbrp->ipv6_ra_configs, "mtu");
5950
5951 /* As per RFC 2460, 1280 is minimum IPv6 MTU. */
5952 uint32_t mtu = (mtu_s && atoi(mtu_s) >= 1280) ? atoi(mtu_s) : 0;
5953
5954 ds_put_format(&actions, REGBIT_ND_RA_OPTS_RESULT" = put_nd_ra_opts("
5955 "addr_mode = \"%s\", slla = %s",
5956 address_mode, op->lrp_networks.ea_s);
5957 if (mtu > 0) {
5958 ds_put_format(&actions, ", mtu = %u", mtu);
5959 }
5960
5961 bool add_rs_response_flow = false;
5962
5963 for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
5964 if (in6_is_lla(&op->lrp_networks.ipv6_addrs[i].network)) {
5965 continue;
5966 }
5967
5968 /* Add the prefix option if the address mode is slaac or
5969 * dhcpv6_stateless. */
5970 if (strcmp(address_mode, "dhcpv6_stateful")) {
5971 ds_put_format(&actions, ", prefix = %s/%u",
5972 op->lrp_networks.ipv6_addrs[i].network_s,
5973 op->lrp_networks.ipv6_addrs[i].plen);
5974 }
5975 add_rs_response_flow = true;
5976 }
5977
5978 if (add_rs_response_flow) {
5979 ds_put_cstr(&actions, "); next;");
5980 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ND_RA_OPTIONS, 50,
5981 ds_cstr(&match), ds_cstr(&actions));
5982 ds_clear(&actions);
5983 ds_clear(&match);
5984 ds_put_format(&match, "inport == %s && ip6.dst == ff02::2 && "
5985 "nd_ra && "REGBIT_ND_RA_OPTS_RESULT, op->json_key);
5986
5987 char ip6_str[INET6_ADDRSTRLEN + 1];
5988 struct in6_addr lla;
5989 in6_generate_lla(op->lrp_networks.ea, &lla);
5990 memset(ip6_str, 0, sizeof(ip6_str));
5991 ipv6_string_mapped(ip6_str, &lla);
5992 ds_put_format(&actions, "eth.dst = eth.src; eth.src = %s; "
5993 "ip6.dst = ip6.src; ip6.src = %s; "
5994 "outport = inport; flags.loopback = 1; "
5995 "output;",
5996 op->lrp_networks.ea_s, ip6_str);
5997 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ND_RA_RESPONSE, 50,
5998 ds_cstr(&match), ds_cstr(&actions));
5999 }
6000 }
6001
6002 /* Logical router ingress table 5, 6: RS responder, by default goto next.
6003 * (priority 0)*/
6004 HMAP_FOR_EACH (od, key_node, datapaths) {
6005 if (!od->nbr) {
6006 continue;
6007 }
6008
6009 ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_OPTIONS, 0, "1", "next;");
6010 ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_RESPONSE, 0, "1", "next;");
6011 }
6012
6013 /* Logical router ingress table 7: IP Routing.
6014 *
6015 * A packet that arrives at this table is an IP packet that should be
6016 * routed to the address in 'ip[46].dst'. This table sets outport to
6017 * the correct output port, eth.src to the output port's MAC
6018 * address, and '[xx]reg0' to the next-hop IP address (leaving
6019 * 'ip[46].dst', the packet’s final destination, unchanged), and
6020 * advances to the next table for ARP/ND resolution. */
6021 HMAP_FOR_EACH (op, key_node, ports) {
6022 if (!op->nbrp) {
6023 continue;
6024 }
6025
6026 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
6027 add_route(lflows, op, op->lrp_networks.ipv4_addrs[i].addr_s,
6028 op->lrp_networks.ipv4_addrs[i].network_s,
6029 op->lrp_networks.ipv4_addrs[i].plen, NULL, NULL);
6030 }
6031
6032 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
6033 add_route(lflows, op, op->lrp_networks.ipv6_addrs[i].addr_s,
6034 op->lrp_networks.ipv6_addrs[i].network_s,
6035 op->lrp_networks.ipv6_addrs[i].plen, NULL, NULL);
6036 }
6037 }
6038
6039 /* Convert the static routes to flows. */
6040 HMAP_FOR_EACH (od, key_node, datapaths) {
6041 if (!od->nbr) {
6042 continue;
6043 }
6044
6045 for (int i = 0; i < od->nbr->n_static_routes; i++) {
6046 const struct nbrec_logical_router_static_route *route;
6047
6048 route = od->nbr->static_routes[i];
6049 build_static_route_flow(lflows, od, ports, route);
6050 }
6051 }
6052
6053 /* XXX destination unreachable */
6054
6055 /* Local router ingress table 8: ARP Resolution.
6056 *
6057 * Any packet that reaches this table is an IP packet whose next-hop IP
6058 * address is in reg0. (ip4.dst is the final destination.) This table
6059 * resolves the IP address in reg0 into an output port in outport and an
6060 * Ethernet address in eth.dst. */
6061 HMAP_FOR_EACH (op, key_node, ports) {
6062 if (op->nbsp && !lsp_is_enabled(op->nbsp)) {
6063 continue;
6064 }
6065
6066 if (op->nbrp) {
6067 /* This is a logical router port. If next-hop IP address in
6068 * '[xx]reg0' matches IP address of this router port, then
6069 * the packet is intended to eventually be sent to this
6070 * logical port. Set the destination mac address using this
6071 * port's mac address.
6072 *
6073 * The packet is still in peer's logical pipeline. So the match
6074 * should be on peer's outport. */
6075 if (op->peer && op->nbrp->peer) {
6076 if (op->lrp_networks.n_ipv4_addrs) {
6077 ds_clear(&match);
6078 ds_put_format(&match, "outport == %s && reg0 == ",
6079 op->peer->json_key);
6080 op_put_v4_networks(&match, op, false);
6081
6082 ds_clear(&actions);
6083 ds_put_format(&actions, "eth.dst = %s; next;",
6084 op->lrp_networks.ea_s);
6085 ovn_lflow_add(lflows, op->peer->od, S_ROUTER_IN_ARP_RESOLVE,
6086 100, ds_cstr(&match), ds_cstr(&actions));
6087 }
6088
6089 if (op->lrp_networks.n_ipv6_addrs) {
6090 ds_clear(&match);
6091 ds_put_format(&match, "outport == %s && xxreg0 == ",
6092 op->peer->json_key);
6093 op_put_v6_networks(&match, op);
6094
6095 ds_clear(&actions);
6096 ds_put_format(&actions, "eth.dst = %s; next;",
6097 op->lrp_networks.ea_s);
6098 ovn_lflow_add(lflows, op->peer->od, S_ROUTER_IN_ARP_RESOLVE,
6099 100, ds_cstr(&match), ds_cstr(&actions));
6100 }
6101 }
6102 } else if (op->od->n_router_ports && strcmp(op->nbsp->type, "router")) {
6103 /* This is a logical switch port that backs a VM or a container.
6104 * Extract its addresses. For each of the address, go through all
6105 * the router ports attached to the switch (to which this port
6106 * connects) and if the address in question is reachable from the
6107 * router port, add an ARP/ND entry in that router's pipeline. */
6108
6109 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
6110 const char *ea_s = op->lsp_addrs[i].ea_s;
6111 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
6112 const char *ip_s = op->lsp_addrs[i].ipv4_addrs[j].addr_s;
6113 for (size_t k = 0; k < op->od->n_router_ports; k++) {
6114 /* Get the Logical_Router_Port that the
6115 * Logical_Switch_Port is connected to, as
6116 * 'peer'. */
6117 const char *peer_name = smap_get(
6118 &op->od->router_ports[k]->nbsp->options,
6119 "router-port");
6120 if (!peer_name) {
6121 continue;
6122 }
6123
6124 struct ovn_port *peer = ovn_port_find(ports, peer_name);
6125 if (!peer || !peer->nbrp) {
6126 continue;
6127 }
6128
6129 if (!find_lrp_member_ip(peer, ip_s)) {
6130 continue;
6131 }
6132
6133 ds_clear(&match);
6134 ds_put_format(&match, "outport == %s && reg0 == %s",
6135 peer->json_key, ip_s);
6136
6137 ds_clear(&actions);
6138 ds_put_format(&actions, "eth.dst = %s; next;", ea_s);
6139 ovn_lflow_add(lflows, peer->od,
6140 S_ROUTER_IN_ARP_RESOLVE, 100,
6141 ds_cstr(&match), ds_cstr(&actions));
6142 }
6143 }
6144
6145 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
6146 const char *ip_s = op->lsp_addrs[i].ipv6_addrs[j].addr_s;
6147 for (size_t k = 0; k < op->od->n_router_ports; k++) {
6148 /* Get the Logical_Router_Port that the
6149 * Logical_Switch_Port is connected to, as
6150 * 'peer'. */
6151 const char *peer_name = smap_get(
6152 &op->od->router_ports[k]->nbsp->options,
6153 "router-port");
6154 if (!peer_name) {
6155 continue;
6156 }
6157
6158 struct ovn_port *peer = ovn_port_find(ports, peer_name);
6159 if (!peer || !peer->nbrp) {
6160 continue;
6161 }
6162
6163 if (!find_lrp_member_ip(peer, ip_s)) {
6164 continue;
6165 }
6166
6167 ds_clear(&match);
6168 ds_put_format(&match, "outport == %s && xxreg0 == %s",
6169 peer->json_key, ip_s);
6170
6171 ds_clear(&actions);
6172 ds_put_format(&actions, "eth.dst = %s; next;", ea_s);
6173 ovn_lflow_add(lflows, peer->od,
6174 S_ROUTER_IN_ARP_RESOLVE, 100,
6175 ds_cstr(&match), ds_cstr(&actions));
6176 }
6177 }
6178 }
6179 } else if (!strcmp(op->nbsp->type, "router")) {
6180 /* This is a logical switch port that connects to a router. */
6181
6182 /* The peer of this switch port is the router port for which
6183 * we need to add logical flows such that it can resolve
6184 * ARP entries for all the other router ports connected to
6185 * the switch in question. */
6186
6187 const char *peer_name = smap_get(&op->nbsp->options,
6188 "router-port");
6189 if (!peer_name) {
6190 continue;
6191 }
6192
6193 struct ovn_port *peer = ovn_port_find(ports, peer_name);
6194 if (!peer || !peer->nbrp) {
6195 continue;
6196 }
6197
6198 for (size_t i = 0; i < op->od->n_router_ports; i++) {
6199 const char *router_port_name = smap_get(
6200 &op->od->router_ports[i]->nbsp->options,
6201 "router-port");
6202 struct ovn_port *router_port = ovn_port_find(ports,
6203 router_port_name);
6204 if (!router_port || !router_port->nbrp) {
6205 continue;
6206 }
6207
6208 /* Skip the router port under consideration. */
6209 if (router_port == peer) {
6210 continue;
6211 }
6212
6213 if (router_port->lrp_networks.n_ipv4_addrs) {
6214 ds_clear(&match);
6215 ds_put_format(&match, "outport == %s && reg0 == ",
6216 peer->json_key);
6217 op_put_v4_networks(&match, router_port, false);
6218
6219 ds_clear(&actions);
6220 ds_put_format(&actions, "eth.dst = %s; next;",
6221 router_port->lrp_networks.ea_s);
6222 ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE,
6223 100, ds_cstr(&match), ds_cstr(&actions));
6224 }
6225
6226 if (router_port->lrp_networks.n_ipv6_addrs) {
6227 ds_clear(&match);
6228 ds_put_format(&match, "outport == %s && xxreg0 == ",
6229 peer->json_key);
6230 op_put_v6_networks(&match, router_port);
6231
6232 ds_clear(&actions);
6233 ds_put_format(&actions, "eth.dst = %s; next;",
6234 router_port->lrp_networks.ea_s);
6235 ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE,
6236 100, ds_cstr(&match), ds_cstr(&actions));
6237 }
6238 }
6239 }
6240 }
6241
6242 HMAP_FOR_EACH (od, key_node, datapaths) {
6243 if (!od->nbr) {
6244 continue;
6245 }
6246
6247 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip4",
6248 "get_arp(outport, reg0); next;");
6249
6250 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip6",
6251 "get_nd(outport, xxreg0); next;");
6252 }
6253
6254 /* Logical router ingress table 9: Gateway redirect.
6255 *
6256 * For traffic with outport equal to the l3dgw_port
6257 * on a distributed router, this table redirects a subset
6258 * of the traffic to the l3redirect_port which represents
6259 * the central instance of the l3dgw_port.
6260 */
6261 HMAP_FOR_EACH (od, key_node, datapaths) {
6262 if (!od->nbr) {
6263 continue;
6264 }
6265 if (od->l3dgw_port && od->l3redirect_port) {
6266 /* For traffic with outport == l3dgw_port, if the
6267 * packet did not match any higher priority redirect
6268 * rule, then the traffic is redirected to the central
6269 * instance of the l3dgw_port. */
6270 ds_clear(&match);
6271 ds_put_format(&match, "outport == %s",
6272 od->l3dgw_port->json_key);
6273 ds_clear(&actions);
6274 ds_put_format(&actions, "outport = %s; next;",
6275 od->l3redirect_port->json_key);
6276 ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 50,
6277 ds_cstr(&match), ds_cstr(&actions));
6278
6279 /* If the Ethernet destination has not been resolved,
6280 * redirect to the central instance of the l3dgw_port.
6281 * Such traffic will be replaced by an ARP request or ND
6282 * Neighbor Solicitation in the ARP request ingress
6283 * table, before being redirected to the central instance.
6284 */
6285 ds_put_format(&match, " && eth.dst == 00:00:00:00:00:00");
6286 ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 150,
6287 ds_cstr(&match), ds_cstr(&actions));
6288 }
6289
6290 /* Packets are allowed by default. */
6291 ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 0, "1", "next;");
6292 }
6293
6294 /* Local router ingress table 10: ARP request.
6295 *
6296 * In the common case where the Ethernet destination has been resolved,
6297 * this table outputs the packet (priority 0). Otherwise, it composes
6298 * and sends an ARP/IPv6 NA request (priority 100). */
6299 HMAP_FOR_EACH (od, key_node, datapaths) {
6300 if (!od->nbr) {
6301 continue;
6302 }
6303
6304 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100,
6305 "eth.dst == 00:00:00:00:00:00",
6306 "arp { "
6307 "eth.dst = ff:ff:ff:ff:ff:ff; "
6308 "arp.spa = reg1; "
6309 "arp.tpa = reg0; "
6310 "arp.op = 1; " /* ARP request */
6311 "output; "
6312 "};");
6313 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100,
6314 "eth.dst == 00:00:00:00:00:00",
6315 "nd_ns { "
6316 "nd.target = xxreg0; "
6317 "output; "
6318 "};");
6319 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1", "output;");
6320 }
6321
6322 /* Logical router egress table 1: Delivery (priority 100).
6323 *
6324 * Priority 100 rules deliver packets to enabled logical ports. */
6325 HMAP_FOR_EACH (op, key_node, ports) {
6326 if (!op->nbrp) {
6327 continue;
6328 }
6329
6330 if (!lrport_is_enabled(op->nbrp)) {
6331 /* Drop packets to disabled logical ports (since logical flow
6332 * tables are default-drop). */
6333 continue;
6334 }
6335
6336 if (op->derived) {
6337 /* No egress packets should be processed in the context of
6338 * a chassisredirect port. The chassisredirect port should
6339 * be replaced by the l3dgw port in the local output
6340 * pipeline stage before egress processing. */
6341 continue;
6342 }
6343
6344 ds_clear(&match);
6345 ds_put_format(&match, "outport == %s", op->json_key);
6346 ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100,
6347 ds_cstr(&match), "output;");
6348 }
6349
6350 ds_destroy(&match);
6351 ds_destroy(&actions);
6352 }
6353
6354 /* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
6355 * constructing their contents based on the OVN_NB database. */
6356 static void
6357 build_lflows(struct northd_context *ctx, struct hmap *datapaths,
6358 struct hmap *ports, struct hmap *port_groups)
6359 {
6360 struct hmap lflows = HMAP_INITIALIZER(&lflows);
6361 struct hmap mcgroups = HMAP_INITIALIZER(&mcgroups);
6362
6363 build_lswitch_flows(datapaths, ports, port_groups, &lflows, &mcgroups);
6364 build_lrouter_flows(datapaths, ports, &lflows);
6365
6366 /* Push changes to the Logical_Flow table to database. */
6367 const struct sbrec_logical_flow *sbflow, *next_sbflow;
6368 SBREC_LOGICAL_FLOW_FOR_EACH_SAFE (sbflow, next_sbflow, ctx->ovnsb_idl) {
6369 struct ovn_datapath *od
6370 = ovn_datapath_from_sbrec(datapaths, sbflow->logical_datapath);
6371 if (!od) {
6372 sbrec_logical_flow_delete(sbflow);
6373 continue;
6374 }
6375
6376 enum ovn_datapath_type dp_type = od->nbs ? DP_SWITCH : DP_ROUTER;
6377 enum ovn_pipeline pipeline
6378 = !strcmp(sbflow->pipeline, "ingress") ? P_IN : P_OUT;
6379 struct ovn_lflow *lflow = ovn_lflow_find(
6380 &lflows, od, ovn_stage_build(dp_type, pipeline, sbflow->table_id),
6381 sbflow->priority, sbflow->match, sbflow->actions, sbflow->hash);
6382 if (lflow) {
6383 ovn_lflow_destroy(&lflows, lflow);
6384 } else {
6385 sbrec_logical_flow_delete(sbflow);
6386 }
6387 }
6388 struct ovn_lflow *lflow, *next_lflow;
6389 HMAP_FOR_EACH_SAFE (lflow, next_lflow, hmap_node, &lflows) {
6390 const char *pipeline = ovn_stage_get_pipeline_name(lflow->stage);
6391 uint8_t table = ovn_stage_get_table(lflow->stage);
6392
6393 sbflow = sbrec_logical_flow_insert(ctx->ovnsb_txn);
6394 sbrec_logical_flow_set_logical_datapath(sbflow, lflow->od->sb);
6395 sbrec_logical_flow_set_pipeline(sbflow, pipeline);
6396 sbrec_logical_flow_set_table_id(sbflow, table);
6397 sbrec_logical_flow_set_priority(sbflow, lflow->priority);
6398 sbrec_logical_flow_set_match(sbflow, lflow->match);
6399 sbrec_logical_flow_set_actions(sbflow, lflow->actions);
6400
6401 /* Trim the source locator lflow->where, which looks something like
6402 * "ovn/northd/ovn-northd.c:1234", down to just the part following the
6403 * last slash, e.g. "ovn-northd.c:1234". */
6404 const char *slash = strrchr(lflow->where, '/');
6405 #if _WIN32
6406 const char *backslash = strrchr(lflow->where, '\\');
6407 if (!slash || backslash > slash) {
6408 slash = backslash;
6409 }
6410 #endif
6411 const char *where = slash ? slash + 1 : lflow->where;
6412
6413 struct smap ids = SMAP_INITIALIZER(&ids);
6414 smap_add(&ids, "stage-name", ovn_stage_to_str(lflow->stage));
6415 smap_add(&ids, "source", where);
6416 if (lflow->stage_hint) {
6417 smap_add(&ids, "stage-hint", lflow->stage_hint);
6418 }
6419 sbrec_logical_flow_set_external_ids(sbflow, &ids);
6420 smap_destroy(&ids);
6421
6422 ovn_lflow_destroy(&lflows, lflow);
6423 }
6424 hmap_destroy(&lflows);
6425
6426 /* Push changes to the Multicast_Group table to database. */
6427 const struct sbrec_multicast_group *sbmc, *next_sbmc;
6428 SBREC_MULTICAST_GROUP_FOR_EACH_SAFE (sbmc, next_sbmc, ctx->ovnsb_idl) {
6429 struct ovn_datapath *od = ovn_datapath_from_sbrec(datapaths,
6430 sbmc->datapath);
6431 if (!od) {
6432 sbrec_multicast_group_delete(sbmc);
6433 continue;
6434 }
6435
6436 struct multicast_group group = { .name = sbmc->name,
6437 .key = sbmc->tunnel_key };
6438 struct ovn_multicast *mc = ovn_multicast_find(&mcgroups, od, &group);
6439 if (mc) {
6440 ovn_multicast_update_sbrec(mc, sbmc);
6441 ovn_multicast_destroy(&mcgroups, mc);
6442 } else {
6443 sbrec_multicast_group_delete(sbmc);
6444 }
6445 }
6446 struct ovn_multicast *mc, *next_mc;
6447 HMAP_FOR_EACH_SAFE (mc, next_mc, hmap_node, &mcgroups) {
6448 sbmc = sbrec_multicast_group_insert(ctx->ovnsb_txn);
6449 sbrec_multicast_group_set_datapath(sbmc, mc->datapath->sb);
6450 sbrec_multicast_group_set_name(sbmc, mc->group->name);
6451 sbrec_multicast_group_set_tunnel_key(sbmc, mc->group->key);
6452 ovn_multicast_update_sbrec(mc, sbmc);
6453 ovn_multicast_destroy(&mcgroups, mc);
6454 }
6455 hmap_destroy(&mcgroups);
6456 }
6457
6458 static void
6459 sync_address_set(struct northd_context *ctx, const char *name,
6460 const char **addrs, size_t n_addrs,
6461 struct shash *sb_address_sets)
6462 {
6463 const struct sbrec_address_set *sb_address_set;
6464 sb_address_set = shash_find_and_delete(sb_address_sets,
6465 name);
6466 if (!sb_address_set) {
6467 sb_address_set = sbrec_address_set_insert(ctx->ovnsb_txn);
6468 sbrec_address_set_set_name(sb_address_set, name);
6469 }
6470
6471 sbrec_address_set_set_addresses(sb_address_set,
6472 addrs, n_addrs);
6473 }
6474
6475 /* OVN_Southbound Address_Set table contains same records as in north
6476 * bound, plus the records generated from Port_Group table in north bound.
6477 *
6478 * There are 2 records generated from each port group, one for IPv4, and
6479 * one for IPv6, named in the format: <port group name>_ip4 and
6480 * <port group name>_ip6 respectively. MAC addresses are ignored.
6481 *
6482 * We always update OVN_Southbound to match the Address_Set and Port_Group
6483 * in OVN_Northbound, so that the address sets used in Logical_Flows in
6484 * OVN_Southbound is checked against the proper set.*/
6485 static void
6486 sync_address_sets(struct northd_context *ctx)
6487 {
6488 struct shash sb_address_sets = SHASH_INITIALIZER(&sb_address_sets);
6489
6490 const struct sbrec_address_set *sb_address_set;
6491 SBREC_ADDRESS_SET_FOR_EACH (sb_address_set, ctx->ovnsb_idl) {
6492 shash_add(&sb_address_sets, sb_address_set->name, sb_address_set);
6493 }
6494
6495 /* sync port group generated address sets first */
6496 const struct nbrec_port_group *nb_port_group;
6497 NBREC_PORT_GROUP_FOR_EACH (nb_port_group, ctx->ovnnb_idl) {
6498 char **ipv4_addrs = xcalloc(1, sizeof *ipv4_addrs);
6499 size_t n_ipv4_addrs = 0;
6500 size_t n_ipv4_addrs_buf = 1;
6501 char **ipv6_addrs = xcalloc(1, sizeof *ipv6_addrs);
6502 size_t n_ipv6_addrs = 0;
6503 size_t n_ipv6_addrs_buf = 1;
6504 for (size_t i = 0; i < nb_port_group->n_ports; i++) {
6505 for (size_t j = 0; j < nb_port_group->ports[i]->n_addresses; j++) {
6506 struct lport_addresses laddrs;
6507 extract_lsp_addresses(nb_port_group->ports[i]->addresses[j],
6508 &laddrs);
6509 while (n_ipv4_addrs_buf < n_ipv4_addrs + laddrs.n_ipv4_addrs) {
6510 n_ipv4_addrs_buf *= 2;
6511 ipv4_addrs = xrealloc(ipv4_addrs,
6512 n_ipv4_addrs_buf * sizeof *ipv4_addrs);
6513 }
6514 for (size_t k = 0; k < laddrs.n_ipv4_addrs; k++) {
6515 ipv4_addrs[n_ipv4_addrs++] =
6516 xstrdup(laddrs.ipv4_addrs[k].addr_s);
6517 }
6518 while (n_ipv6_addrs_buf < n_ipv6_addrs + laddrs.n_ipv6_addrs) {
6519 n_ipv6_addrs_buf *= 2;
6520 ipv6_addrs = xrealloc(ipv6_addrs,
6521 n_ipv6_addrs_buf * sizeof *ipv6_addrs);
6522 }
6523 for (size_t k = 0; k < laddrs.n_ipv6_addrs; k++) {
6524 ipv6_addrs[n_ipv6_addrs++] =
6525 xstrdup(laddrs.ipv6_addrs[k].addr_s);
6526 }
6527 destroy_lport_addresses(&laddrs);
6528 }
6529 }
6530 char *ipv4_addrs_name = xasprintf("%s_ip4", nb_port_group->name);
6531 char *ipv6_addrs_name = xasprintf("%s_ip6", nb_port_group->name);
6532 sync_address_set(ctx, ipv4_addrs_name, (const char **)ipv4_addrs,
6533 n_ipv4_addrs, &sb_address_sets);
6534 sync_address_set(ctx, ipv6_addrs_name, (const char **)ipv6_addrs,
6535 n_ipv6_addrs, &sb_address_sets);
6536 free(ipv4_addrs_name);
6537 free(ipv6_addrs_name);
6538 for (size_t i = 0; i < n_ipv4_addrs; i++) {
6539 free(ipv4_addrs[i]);
6540 }
6541 free(ipv4_addrs);
6542 for (size_t i = 0; i < n_ipv6_addrs; i++) {
6543 free(ipv6_addrs[i]);
6544 }
6545 free(ipv6_addrs);
6546 }
6547
6548 /* sync user defined address sets, which may overwrite port group
6549 * generated address sets if same name is used */
6550 const struct nbrec_address_set *nb_address_set;
6551 NBREC_ADDRESS_SET_FOR_EACH (nb_address_set, ctx->ovnnb_idl) {
6552 sync_address_set(ctx, nb_address_set->name,
6553 /* "char **" is not compatible with "const char **" */
6554 (const char **)nb_address_set->addresses,
6555 nb_address_set->n_addresses, &sb_address_sets);
6556 }
6557
6558 struct shash_node *node, *next;
6559 SHASH_FOR_EACH_SAFE (node, next, &sb_address_sets) {
6560 sbrec_address_set_delete(node->data);
6561 shash_delete(&sb_address_sets, node);
6562 }
6563 shash_destroy(&sb_address_sets);
6564 }
6565
6566 /* Each port group in Port_Group table in OVN_Northbound has a corresponding
6567 * entry in Port_Group table in OVN_Southbound. In OVN_Northbound the entries
6568 * contains lport uuids, while in OVN_Southbound we store the lport names.
6569 */
6570 static void
6571 sync_port_groups(struct northd_context *ctx)
6572 {
6573 struct shash sb_port_groups = SHASH_INITIALIZER(&sb_port_groups);
6574
6575 const struct sbrec_port_group *sb_port_group;
6576 SBREC_PORT_GROUP_FOR_EACH (sb_port_group, ctx->ovnsb_idl) {
6577 shash_add(&sb_port_groups, sb_port_group->name, sb_port_group);
6578 }
6579
6580 const struct nbrec_port_group *nb_port_group;
6581 NBREC_PORT_GROUP_FOR_EACH (nb_port_group, ctx->ovnnb_idl) {
6582 sb_port_group = shash_find_and_delete(&sb_port_groups,
6583 nb_port_group->name);
6584 if (!sb_port_group) {
6585 sb_port_group = sbrec_port_group_insert(ctx->ovnsb_txn);
6586 sbrec_port_group_set_name(sb_port_group, nb_port_group->name);
6587 }
6588
6589 const char **nb_port_names = xcalloc(nb_port_group->n_ports,
6590 sizeof *nb_port_names);
6591 int i;
6592 for (i = 0; i < nb_port_group->n_ports; i++) {
6593 nb_port_names[i] = nb_port_group->ports[i]->name;
6594 }
6595 sbrec_port_group_set_ports(sb_port_group,
6596 nb_port_names,
6597 nb_port_group->n_ports);
6598 free(nb_port_names);
6599 }
6600
6601 struct shash_node *node, *next;
6602 SHASH_FOR_EACH_SAFE (node, next, &sb_port_groups) {
6603 sbrec_port_group_delete(node->data);
6604 shash_delete(&sb_port_groups, node);
6605 }
6606 shash_destroy(&sb_port_groups);
6607 }
6608
6609 /*
6610 * struct 'dns_info' is used to sync the DNS records between OVN Northbound db
6611 * and Southbound db.
6612 */
6613 struct dns_info {
6614 struct hmap_node hmap_node;
6615 const struct nbrec_dns *nb_dns; /* DNS record in the Northbound db. */
6616 const struct sbrec_dns *sb_dns; /* DNS record in the Soutbound db. */
6617
6618 /* Datapaths to which the DNS entry is associated with it. */
6619 const struct sbrec_datapath_binding **sbs;
6620 size_t n_sbs;
6621 };
6622
6623 static inline struct dns_info *
6624 get_dns_info_from_hmap(struct hmap *dns_map, struct uuid *uuid)
6625 {
6626 struct dns_info *dns_info;
6627 size_t hash = uuid_hash(uuid);
6628 HMAP_FOR_EACH_WITH_HASH (dns_info, hmap_node, hash, dns_map) {
6629 if (uuid_equals(&dns_info->nb_dns->header_.uuid, uuid)) {
6630 return dns_info;
6631 }
6632 }
6633
6634 return NULL;
6635 }
6636
6637 static void
6638 sync_dns_entries(struct northd_context *ctx, struct hmap *datapaths)
6639 {
6640 struct hmap dns_map = HMAP_INITIALIZER(&dns_map);
6641 struct ovn_datapath *od;
6642 HMAP_FOR_EACH (od, key_node, datapaths) {
6643 if (!od->nbs || !od->nbs->n_dns_records) {
6644 continue;
6645 }
6646
6647 for (size_t i = 0; i < od->nbs->n_dns_records; i++) {
6648 struct dns_info *dns_info = get_dns_info_from_hmap(
6649 &dns_map, &od->nbs->dns_records[i]->header_.uuid);
6650 if (!dns_info) {
6651 size_t hash = uuid_hash(
6652 &od->nbs->dns_records[i]->header_.uuid);
6653 dns_info = xzalloc(sizeof *dns_info);;
6654 dns_info->nb_dns = od->nbs->dns_records[i];
6655 hmap_insert(&dns_map, &dns_info->hmap_node, hash);
6656 }
6657
6658 dns_info->n_sbs++;
6659 dns_info->sbs = xrealloc(dns_info->sbs,
6660 dns_info->n_sbs * sizeof *dns_info->sbs);
6661 dns_info->sbs[dns_info->n_sbs - 1] = od->sb;
6662 }
6663 }
6664
6665 const struct sbrec_dns *sbrec_dns, *next;
6666 SBREC_DNS_FOR_EACH_SAFE (sbrec_dns, next, ctx->ovnsb_idl) {
6667 const char *nb_dns_uuid = smap_get(&sbrec_dns->external_ids, "dns_id");
6668 struct uuid dns_uuid;
6669 if (!nb_dns_uuid || !uuid_from_string(&dns_uuid, nb_dns_uuid)) {
6670 sbrec_dns_delete(sbrec_dns);
6671 continue;
6672 }
6673
6674 struct dns_info *dns_info =
6675 get_dns_info_from_hmap(&dns_map, &dns_uuid);
6676 if (dns_info) {
6677 dns_info->sb_dns = sbrec_dns;
6678 } else {
6679 sbrec_dns_delete(sbrec_dns);
6680 }
6681 }
6682
6683 struct dns_info *dns_info;
6684 HMAP_FOR_EACH_POP (dns_info, hmap_node, &dns_map) {
6685 if (!dns_info->sb_dns) {
6686 sbrec_dns = sbrec_dns_insert(ctx->ovnsb_txn);
6687 dns_info->sb_dns = sbrec_dns;
6688 char *dns_id = xasprintf(
6689 UUID_FMT, UUID_ARGS(&dns_info->nb_dns->header_.uuid));
6690 const struct smap external_ids =
6691 SMAP_CONST1(&external_ids, "dns_id", dns_id);
6692 sbrec_dns_set_external_ids(sbrec_dns, &external_ids);
6693 free(dns_id);
6694 }
6695
6696 /* Set the datapaths and records. If nothing has changed, then
6697 * this will be a no-op.
6698 */
6699 sbrec_dns_set_datapaths(
6700 dns_info->sb_dns,
6701 (struct sbrec_datapath_binding **)dns_info->sbs,
6702 dns_info->n_sbs);
6703 sbrec_dns_set_records(dns_info->sb_dns, &dns_info->nb_dns->records);
6704 free(dns_info->sbs);
6705 free(dns_info);
6706 }
6707 hmap_destroy(&dns_map);
6708 }
6709
6710
6711 \f
6712 static void
6713 ovnnb_db_run(struct northd_context *ctx,
6714 struct ovsdb_idl_index *sbrec_chassis_by_name,
6715 struct ovsdb_idl_loop *sb_loop)
6716 {
6717 if (!ctx->ovnsb_txn || !ctx->ovnnb_txn) {
6718 return;
6719 }
6720 struct hmap datapaths, ports, port_groups;
6721 build_datapaths(ctx, &datapaths);
6722 build_ports(ctx, sbrec_chassis_by_name, &datapaths, &ports);
6723 build_ipam(&datapaths, &ports);
6724 build_port_group_lswitches(ctx, &port_groups, &ports);
6725 build_lflows(ctx, &datapaths, &ports, &port_groups);
6726
6727 sync_address_sets(ctx);
6728 sync_port_groups(ctx);
6729 sync_dns_entries(ctx, &datapaths);
6730
6731 struct ovn_port_group *pg, *next_pg;
6732 HMAP_FOR_EACH_SAFE (pg, next_pg, key_node, &port_groups) {
6733 ovn_port_group_destroy(&port_groups, pg);
6734 }
6735 hmap_destroy(&port_groups);
6736
6737 struct ovn_datapath *dp, *next_dp;
6738 HMAP_FOR_EACH_SAFE (dp, next_dp, key_node, &datapaths) {
6739 ovn_datapath_destroy(&datapaths, dp);
6740 }
6741 hmap_destroy(&datapaths);
6742
6743 struct ovn_port *port, *next_port;
6744 HMAP_FOR_EACH_SAFE (port, next_port, key_node, &ports) {
6745 ovn_port_destroy(&ports, port);
6746 }
6747 hmap_destroy(&ports);
6748
6749 /* Copy nb_cfg from northbound to southbound database.
6750 *
6751 * Also set up to update sb_cfg once our southbound transaction commits. */
6752 const struct nbrec_nb_global *nb = nbrec_nb_global_first(ctx->ovnnb_idl);
6753 if (!nb) {
6754 nb = nbrec_nb_global_insert(ctx->ovnnb_txn);
6755 }
6756 const struct sbrec_sb_global *sb = sbrec_sb_global_first(ctx->ovnsb_idl);
6757 if (!sb) {
6758 sb = sbrec_sb_global_insert(ctx->ovnsb_txn);
6759 }
6760 sbrec_sb_global_set_nb_cfg(sb, nb->nb_cfg);
6761 sb_loop->next_cfg = nb->nb_cfg;
6762
6763 cleanup_macam(&macam);
6764 }
6765
6766 /* Handle changes to the 'chassis' column of the 'Port_Binding' table. When
6767 * this column is not empty, it means we need to set the corresponding logical
6768 * port as 'up' in the northbound DB. */
6769 static void
6770 update_logical_port_status(struct northd_context *ctx)
6771 {
6772 struct hmap lports_hmap;
6773 const struct sbrec_port_binding *sb;
6774 const struct nbrec_logical_switch_port *nbsp;
6775
6776 struct lport_hash_node {
6777 struct hmap_node node;
6778 const struct nbrec_logical_switch_port *nbsp;
6779 } *hash_node;
6780
6781 hmap_init(&lports_hmap);
6782
6783 NBREC_LOGICAL_SWITCH_PORT_FOR_EACH(nbsp, ctx->ovnnb_idl) {
6784 hash_node = xzalloc(sizeof *hash_node);
6785 hash_node->nbsp = nbsp;
6786 hmap_insert(&lports_hmap, &hash_node->node, hash_string(nbsp->name, 0));
6787 }
6788
6789 SBREC_PORT_BINDING_FOR_EACH(sb, ctx->ovnsb_idl) {
6790 nbsp = NULL;
6791 HMAP_FOR_EACH_WITH_HASH(hash_node, node,
6792 hash_string(sb->logical_port, 0),
6793 &lports_hmap) {
6794 if (!strcmp(sb->logical_port, hash_node->nbsp->name)) {
6795 nbsp = hash_node->nbsp;
6796 break;
6797 }
6798 }
6799
6800 if (!nbsp) {
6801 /* The logical port doesn't exist for this port binding. This can
6802 * happen under normal circumstances when ovn-northd hasn't gotten
6803 * around to pruning the Port_Binding yet. */
6804 continue;
6805 }
6806
6807 bool up = (sb->chassis || !strcmp(nbsp->type, "router"));
6808 if (!nbsp->up || *nbsp->up != up) {
6809 nbrec_logical_switch_port_set_up(nbsp, &up, 1);
6810 }
6811 }
6812
6813 HMAP_FOR_EACH_POP(hash_node, node, &lports_hmap) {
6814 free(hash_node);
6815 }
6816 hmap_destroy(&lports_hmap);
6817 }
6818
6819 static struct gen_opts_map supported_dhcp_opts[] = {
6820 OFFERIP,
6821 DHCP_OPT_NETMASK,
6822 DHCP_OPT_ROUTER,
6823 DHCP_OPT_DNS_SERVER,
6824 DHCP_OPT_LOG_SERVER,
6825 DHCP_OPT_LPR_SERVER,
6826 DHCP_OPT_SWAP_SERVER,
6827 DHCP_OPT_POLICY_FILTER,
6828 DHCP_OPT_ROUTER_SOLICITATION,
6829 DHCP_OPT_NIS_SERVER,
6830 DHCP_OPT_NTP_SERVER,
6831 DHCP_OPT_SERVER_ID,
6832 DHCP_OPT_TFTP_SERVER,
6833 DHCP_OPT_CLASSLESS_STATIC_ROUTE,
6834 DHCP_OPT_MS_CLASSLESS_STATIC_ROUTE,
6835 DHCP_OPT_IP_FORWARD_ENABLE,
6836 DHCP_OPT_ROUTER_DISCOVERY,
6837 DHCP_OPT_ETHERNET_ENCAP,
6838 DHCP_OPT_DEFAULT_TTL,
6839 DHCP_OPT_TCP_TTL,
6840 DHCP_OPT_MTU,
6841 DHCP_OPT_LEASE_TIME,
6842 DHCP_OPT_T1,
6843 DHCP_OPT_T2
6844 };
6845
6846 static struct gen_opts_map supported_dhcpv6_opts[] = {
6847 DHCPV6_OPT_IA_ADDR,
6848 DHCPV6_OPT_SERVER_ID,
6849 DHCPV6_OPT_DOMAIN_SEARCH,
6850 DHCPV6_OPT_DNS_SERVER
6851 };
6852
6853 static void
6854 check_and_add_supported_dhcp_opts_to_sb_db(struct northd_context *ctx)
6855 {
6856 struct hmap dhcp_opts_to_add = HMAP_INITIALIZER(&dhcp_opts_to_add);
6857 for (size_t i = 0; (i < sizeof(supported_dhcp_opts) /
6858 sizeof(supported_dhcp_opts[0])); i++) {
6859 hmap_insert(&dhcp_opts_to_add, &supported_dhcp_opts[i].hmap_node,
6860 dhcp_opt_hash(supported_dhcp_opts[i].name));
6861 }
6862
6863 const struct sbrec_dhcp_options *opt_row, *opt_row_next;
6864 SBREC_DHCP_OPTIONS_FOR_EACH_SAFE(opt_row, opt_row_next, ctx->ovnsb_idl) {
6865 struct gen_opts_map *dhcp_opt =
6866 dhcp_opts_find(&dhcp_opts_to_add, opt_row->name);
6867 if (dhcp_opt) {
6868 hmap_remove(&dhcp_opts_to_add, &dhcp_opt->hmap_node);
6869 } else {
6870 sbrec_dhcp_options_delete(opt_row);
6871 }
6872 }
6873
6874 struct gen_opts_map *opt;
6875 HMAP_FOR_EACH (opt, hmap_node, &dhcp_opts_to_add) {
6876 struct sbrec_dhcp_options *sbrec_dhcp_option =
6877 sbrec_dhcp_options_insert(ctx->ovnsb_txn);
6878 sbrec_dhcp_options_set_name(sbrec_dhcp_option, opt->name);
6879 sbrec_dhcp_options_set_code(sbrec_dhcp_option, opt->code);
6880 sbrec_dhcp_options_set_type(sbrec_dhcp_option, opt->type);
6881 }
6882
6883 hmap_destroy(&dhcp_opts_to_add);
6884 }
6885
6886 static void
6887 check_and_add_supported_dhcpv6_opts_to_sb_db(struct northd_context *ctx)
6888 {
6889 struct hmap dhcpv6_opts_to_add = HMAP_INITIALIZER(&dhcpv6_opts_to_add);
6890 for (size_t i = 0; (i < sizeof(supported_dhcpv6_opts) /
6891 sizeof(supported_dhcpv6_opts[0])); i++) {
6892 hmap_insert(&dhcpv6_opts_to_add, &supported_dhcpv6_opts[i].hmap_node,
6893 dhcp_opt_hash(supported_dhcpv6_opts[i].name));
6894 }
6895
6896 const struct sbrec_dhcpv6_options *opt_row, *opt_row_next;
6897 SBREC_DHCPV6_OPTIONS_FOR_EACH_SAFE(opt_row, opt_row_next, ctx->ovnsb_idl) {
6898 struct gen_opts_map *dhcp_opt =
6899 dhcp_opts_find(&dhcpv6_opts_to_add, opt_row->name);
6900 if (dhcp_opt) {
6901 hmap_remove(&dhcpv6_opts_to_add, &dhcp_opt->hmap_node);
6902 } else {
6903 sbrec_dhcpv6_options_delete(opt_row);
6904 }
6905 }
6906
6907 struct gen_opts_map *opt;
6908 HMAP_FOR_EACH(opt, hmap_node, &dhcpv6_opts_to_add) {
6909 struct sbrec_dhcpv6_options *sbrec_dhcpv6_option =
6910 sbrec_dhcpv6_options_insert(ctx->ovnsb_txn);
6911 sbrec_dhcpv6_options_set_name(sbrec_dhcpv6_option, opt->name);
6912 sbrec_dhcpv6_options_set_code(sbrec_dhcpv6_option, opt->code);
6913 sbrec_dhcpv6_options_set_type(sbrec_dhcpv6_option, opt->type);
6914 }
6915
6916 hmap_destroy(&dhcpv6_opts_to_add);
6917 }
6918
6919 static const char *rbac_chassis_auth[] =
6920 {"name"};
6921 static const char *rbac_chassis_update[] =
6922 {"nb_cfg", "external_ids", "encaps", "vtep_logical_switches"};
6923
6924 static const char *rbac_encap_auth[] =
6925 {"chassis_name"};
6926 static const char *rbac_encap_update[] =
6927 {"type", "options", "ip"};
6928
6929 static const char *rbac_port_binding_auth[] =
6930 {""};
6931 static const char *rbac_port_binding_update[] =
6932 {"chassis"};
6933
6934 static const char *rbac_mac_binding_auth[] =
6935 {""};
6936 static const char *rbac_mac_binding_update[] =
6937 {"logical_port", "ip", "mac", "datapath"};
6938
6939 static struct rbac_perm_cfg {
6940 const char *table;
6941 const char **auth;
6942 int n_auth;
6943 bool insdel;
6944 const char **update;
6945 int n_update;
6946 const struct sbrec_rbac_permission *row;
6947 } rbac_perm_cfg[] = {
6948 {
6949 .table = "Chassis",
6950 .auth = rbac_chassis_auth,
6951 .n_auth = ARRAY_SIZE(rbac_chassis_auth),
6952 .insdel = true,
6953 .update = rbac_chassis_update,
6954 .n_update = ARRAY_SIZE(rbac_chassis_update),
6955 .row = NULL
6956 },{
6957 .table = "Encap",
6958 .auth = rbac_encap_auth,
6959 .n_auth = ARRAY_SIZE(rbac_encap_auth),
6960 .insdel = true,
6961 .update = rbac_encap_update,
6962 .n_update = ARRAY_SIZE(rbac_encap_update),
6963 .row = NULL
6964 },{
6965 .table = "Port_Binding",
6966 .auth = rbac_port_binding_auth,
6967 .n_auth = ARRAY_SIZE(rbac_port_binding_auth),
6968 .insdel = false,
6969 .update = rbac_port_binding_update,
6970 .n_update = ARRAY_SIZE(rbac_port_binding_update),
6971 .row = NULL
6972 },{
6973 .table = "MAC_Binding",
6974 .auth = rbac_mac_binding_auth,
6975 .n_auth = ARRAY_SIZE(rbac_mac_binding_auth),
6976 .insdel = true,
6977 .update = rbac_mac_binding_update,
6978 .n_update = ARRAY_SIZE(rbac_mac_binding_update),
6979 .row = NULL
6980 },{
6981 .table = NULL,
6982 .auth = NULL,
6983 .n_auth = 0,
6984 .insdel = false,
6985 .update = NULL,
6986 .n_update = 0,
6987 .row = NULL
6988 }
6989 };
6990
6991 static bool
6992 ovn_rbac_validate_perm(const struct sbrec_rbac_permission *perm)
6993 {
6994 struct rbac_perm_cfg *pcfg;
6995 int i, j, n_found;
6996
6997 for (pcfg = rbac_perm_cfg; pcfg->table; pcfg++) {
6998 if (!strcmp(perm->table, pcfg->table)) {
6999 break;
7000 }
7001 }
7002 if (!pcfg->table) {
7003 return false;
7004 }
7005 if (perm->n_authorization != pcfg->n_auth ||
7006 perm->n_update != pcfg->n_update) {
7007 return false;
7008 }
7009 if (perm->insert_delete != pcfg->insdel) {
7010 return false;
7011 }
7012 /* verify perm->authorization vs. pcfg->auth */
7013 n_found = 0;
7014 for (i = 0; i < pcfg->n_auth; i++) {
7015 for (j = 0; j < perm->n_authorization; j++) {
7016 if (!strcmp(pcfg->auth[i], perm->authorization[j])) {
7017 n_found++;
7018 break;
7019 }
7020 }
7021 }
7022 if (n_found != pcfg->n_auth) {
7023 return false;
7024 }
7025
7026 /* verify perm->update vs. pcfg->update */
7027 n_found = 0;
7028 for (i = 0; i < pcfg->n_update; i++) {
7029 for (j = 0; j < perm->n_update; j++) {
7030 if (!strcmp(pcfg->update[i], perm->update[j])) {
7031 n_found++;
7032 break;
7033 }
7034 }
7035 }
7036 if (n_found != pcfg->n_update) {
7037 return false;
7038 }
7039
7040 /* Success, db state matches expected state */
7041 pcfg->row = perm;
7042 return true;
7043 }
7044
7045 static void
7046 ovn_rbac_create_perm(struct rbac_perm_cfg *pcfg,
7047 struct northd_context *ctx,
7048 const struct sbrec_rbac_role *rbac_role)
7049 {
7050 struct sbrec_rbac_permission *rbac_perm;
7051
7052 rbac_perm = sbrec_rbac_permission_insert(ctx->ovnsb_txn);
7053 sbrec_rbac_permission_set_table(rbac_perm, pcfg->table);
7054 sbrec_rbac_permission_set_authorization(rbac_perm,
7055 pcfg->auth,
7056 pcfg->n_auth);
7057 sbrec_rbac_permission_set_insert_delete(rbac_perm, pcfg->insdel);
7058 sbrec_rbac_permission_set_update(rbac_perm,
7059 pcfg->update,
7060 pcfg->n_update);
7061 sbrec_rbac_role_update_permissions_setkey(rbac_role, pcfg->table,
7062 rbac_perm);
7063 }
7064
7065 static void
7066 check_and_update_rbac(struct northd_context *ctx)
7067 {
7068 const struct sbrec_rbac_role *rbac_role = NULL;
7069 const struct sbrec_rbac_permission *perm_row, *perm_next;
7070 const struct sbrec_rbac_role *role_row, *role_row_next;
7071 struct rbac_perm_cfg *pcfg;
7072
7073 for (pcfg = rbac_perm_cfg; pcfg->table; pcfg++) {
7074 pcfg->row = NULL;
7075 }
7076
7077 SBREC_RBAC_PERMISSION_FOR_EACH_SAFE (perm_row, perm_next, ctx->ovnsb_idl) {
7078 if (!ovn_rbac_validate_perm(perm_row)) {
7079 sbrec_rbac_permission_delete(perm_row);
7080 }
7081 }
7082 SBREC_RBAC_ROLE_FOR_EACH_SAFE (role_row, role_row_next, ctx->ovnsb_idl) {
7083 if (strcmp(role_row->name, "ovn-controller")) {
7084 sbrec_rbac_role_delete(role_row);
7085 } else {
7086 rbac_role = role_row;
7087 }
7088 }
7089
7090 if (!rbac_role) {
7091 rbac_role = sbrec_rbac_role_insert(ctx->ovnsb_txn);
7092 sbrec_rbac_role_set_name(rbac_role, "ovn-controller");
7093 }
7094
7095 for (pcfg = rbac_perm_cfg; pcfg->table; pcfg++) {
7096 if (!pcfg->row) {
7097 ovn_rbac_create_perm(pcfg, ctx, rbac_role);
7098 }
7099 }
7100 }
7101
7102 /* Updates the sb_cfg and hv_cfg columns in the northbound NB_Global table. */
7103 static void
7104 update_northbound_cfg(struct northd_context *ctx,
7105 struct ovsdb_idl_loop *sb_loop)
7106 {
7107 /* Update northbound sb_cfg if appropriate. */
7108 const struct nbrec_nb_global *nbg = nbrec_nb_global_first(ctx->ovnnb_idl);
7109 int64_t sb_cfg = sb_loop->cur_cfg;
7110 if (nbg && sb_cfg && nbg->sb_cfg != sb_cfg) {
7111 nbrec_nb_global_set_sb_cfg(nbg, sb_cfg);
7112 }
7113
7114 /* Update northbound hv_cfg if appropriate. */
7115 if (nbg) {
7116 /* Find minimum nb_cfg among all chassis. */
7117 const struct sbrec_chassis *chassis;
7118 int64_t hv_cfg = nbg->nb_cfg;
7119 SBREC_CHASSIS_FOR_EACH (chassis, ctx->ovnsb_idl) {
7120 if (chassis->nb_cfg < hv_cfg) {
7121 hv_cfg = chassis->nb_cfg;
7122 }
7123 }
7124
7125 /* Update hv_cfg. */
7126 if (nbg->hv_cfg != hv_cfg) {
7127 nbrec_nb_global_set_hv_cfg(nbg, hv_cfg);
7128 }
7129 }
7130 }
7131
7132 /* Handle a fairly small set of changes in the southbound database. */
7133 static void
7134 ovnsb_db_run(struct northd_context *ctx, struct ovsdb_idl_loop *sb_loop)
7135 {
7136 if (!ctx->ovnnb_txn || !ovsdb_idl_has_ever_connected(ctx->ovnsb_idl)) {
7137 return;
7138 }
7139
7140 update_logical_port_status(ctx);
7141 update_northbound_cfg(ctx, sb_loop);
7142 }
7143 \f
7144 static void
7145 parse_options(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
7146 {
7147 enum {
7148 DAEMON_OPTION_ENUMS,
7149 VLOG_OPTION_ENUMS,
7150 SSL_OPTION_ENUMS,
7151 };
7152 static const struct option long_options[] = {
7153 {"ovnsb-db", required_argument, NULL, 'd'},
7154 {"ovnnb-db", required_argument, NULL, 'D'},
7155 {"unixctl", required_argument, NULL, 'u'},
7156 {"help", no_argument, NULL, 'h'},
7157 {"options", no_argument, NULL, 'o'},
7158 {"version", no_argument, NULL, 'V'},
7159 DAEMON_LONG_OPTIONS,
7160 VLOG_LONG_OPTIONS,
7161 STREAM_SSL_LONG_OPTIONS,
7162 {NULL, 0, NULL, 0},
7163 };
7164 char *short_options = ovs_cmdl_long_options_to_short_options(long_options);
7165
7166 for (;;) {
7167 int c;
7168
7169 c = getopt_long(argc, argv, short_options, long_options, NULL);
7170 if (c == -1) {
7171 break;
7172 }
7173
7174 switch (c) {
7175 DAEMON_OPTION_HANDLERS;
7176 VLOG_OPTION_HANDLERS;
7177 STREAM_SSL_OPTION_HANDLERS;
7178
7179 case 'd':
7180 ovnsb_db = optarg;
7181 break;
7182
7183 case 'D':
7184 ovnnb_db = optarg;
7185 break;
7186
7187 case 'u':
7188 unixctl_path = optarg;
7189 break;
7190
7191 case 'h':
7192 usage();
7193 exit(EXIT_SUCCESS);
7194
7195 case 'o':
7196 ovs_cmdl_print_options(long_options);
7197 exit(EXIT_SUCCESS);
7198
7199 case 'V':
7200 ovs_print_version(0, 0);
7201 exit(EXIT_SUCCESS);
7202
7203 default:
7204 break;
7205 }
7206 }
7207
7208 if (!ovnsb_db) {
7209 ovnsb_db = default_sb_db();
7210 }
7211
7212 if (!ovnnb_db) {
7213 ovnnb_db = default_nb_db();
7214 }
7215
7216 free(short_options);
7217 }
7218
7219 static void
7220 add_column_noalert(struct ovsdb_idl *idl,
7221 const struct ovsdb_idl_column *column)
7222 {
7223 ovsdb_idl_add_column(idl, column);
7224 ovsdb_idl_omit_alert(idl, column);
7225 }
7226
7227 int
7228 main(int argc, char *argv[])
7229 {
7230 int res = EXIT_SUCCESS;
7231 struct unixctl_server *unixctl;
7232 int retval;
7233 bool exiting;
7234
7235 fatal_ignore_sigpipe();
7236 ovs_cmdl_proctitle_init(argc, argv);
7237 set_program_name(argv[0]);
7238 service_start(&argc, &argv);
7239 parse_options(argc, argv);
7240
7241 daemonize_start(false);
7242
7243 retval = unixctl_server_create(unixctl_path, &unixctl);
7244 if (retval) {
7245 exit(EXIT_FAILURE);
7246 }
7247 unixctl_command_register("exit", "", 0, 0, ovn_northd_exit, &exiting);
7248
7249 daemonize_complete();
7250
7251 /* We want to detect (almost) all changes to the ovn-nb db. */
7252 struct ovsdb_idl_loop ovnnb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
7253 ovsdb_idl_create(ovnnb_db, &nbrec_idl_class, true, true));
7254 ovsdb_idl_omit_alert(ovnnb_idl_loop.idl, &nbrec_nb_global_col_sb_cfg);
7255 ovsdb_idl_omit_alert(ovnnb_idl_loop.idl, &nbrec_nb_global_col_hv_cfg);
7256
7257 /* We want to detect only selected changes to the ovn-sb db. */
7258 struct ovsdb_idl_loop ovnsb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
7259 ovsdb_idl_create(ovnsb_db, &sbrec_idl_class, false, true));
7260
7261 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_sb_global);
7262 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_sb_global_col_nb_cfg);
7263
7264 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_logical_flow);
7265 add_column_noalert(ovnsb_idl_loop.idl,
7266 &sbrec_logical_flow_col_logical_datapath);
7267 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_pipeline);
7268 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_table_id);
7269 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_priority);
7270 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_match);
7271 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_actions);
7272
7273 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_multicast_group);
7274 add_column_noalert(ovnsb_idl_loop.idl,
7275 &sbrec_multicast_group_col_datapath);
7276 add_column_noalert(ovnsb_idl_loop.idl,
7277 &sbrec_multicast_group_col_tunnel_key);
7278 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_name);
7279 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_ports);
7280
7281 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_datapath_binding);
7282 add_column_noalert(ovnsb_idl_loop.idl,
7283 &sbrec_datapath_binding_col_tunnel_key);
7284 add_column_noalert(ovnsb_idl_loop.idl,
7285 &sbrec_datapath_binding_col_external_ids);
7286
7287 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_port_binding);
7288 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_datapath);
7289 add_column_noalert(ovnsb_idl_loop.idl,
7290 &sbrec_port_binding_col_logical_port);
7291 add_column_noalert(ovnsb_idl_loop.idl,
7292 &sbrec_port_binding_col_tunnel_key);
7293 add_column_noalert(ovnsb_idl_loop.idl,
7294 &sbrec_port_binding_col_parent_port);
7295 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_tag);
7296 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_type);
7297 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_options);
7298 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_mac);
7299 add_column_noalert(ovnsb_idl_loop.idl,
7300 &sbrec_port_binding_col_nat_addresses);
7301 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_port_binding_col_chassis);
7302 ovsdb_idl_add_column(ovnsb_idl_loop.idl,
7303 &sbrec_port_binding_col_gateway_chassis);
7304 ovsdb_idl_add_column(ovnsb_idl_loop.idl,
7305 &sbrec_gateway_chassis_col_chassis);
7306 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_gateway_chassis_col_name);
7307 ovsdb_idl_add_column(ovnsb_idl_loop.idl,
7308 &sbrec_gateway_chassis_col_priority);
7309 ovsdb_idl_add_column(ovnsb_idl_loop.idl,
7310 &sbrec_gateway_chassis_col_external_ids);
7311 ovsdb_idl_add_column(ovnsb_idl_loop.idl,
7312 &sbrec_gateway_chassis_col_options);
7313 add_column_noalert(ovnsb_idl_loop.idl,
7314 &sbrec_port_binding_col_external_ids);
7315 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_mac_binding);
7316 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_datapath);
7317 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_ip);
7318 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_mac);
7319 add_column_noalert(ovnsb_idl_loop.idl,
7320 &sbrec_mac_binding_col_logical_port);
7321 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_dhcp_options);
7322 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_code);
7323 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_type);
7324 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_name);
7325 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_dhcpv6_options);
7326 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_code);
7327 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_type);
7328 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_name);
7329 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_address_set);
7330 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_name);
7331 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_addresses);
7332 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_port_group);
7333 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_group_col_name);
7334 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_group_col_ports);
7335
7336 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_dns);
7337 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dns_col_datapaths);
7338 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dns_col_records);
7339 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dns_col_external_ids);
7340
7341 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_rbac_role);
7342 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_rbac_role_col_name);
7343 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_rbac_role_col_permissions);
7344
7345 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_rbac_permission);
7346 add_column_noalert(ovnsb_idl_loop.idl,
7347 &sbrec_rbac_permission_col_table);
7348 add_column_noalert(ovnsb_idl_loop.idl,
7349 &sbrec_rbac_permission_col_authorization);
7350 add_column_noalert(ovnsb_idl_loop.idl,
7351 &sbrec_rbac_permission_col_insert_delete);
7352 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_rbac_permission_col_update);
7353
7354 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_chassis);
7355 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_nb_cfg);
7356 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_name);
7357
7358 struct ovsdb_idl_index *sbrec_chassis_by_name
7359 = chassis_index_create(ovnsb_idl_loop.idl);
7360
7361 /* Ensure that only a single ovn-northd is active in the deployment by
7362 * acquiring a lock called "ovn_northd" on the southbound database
7363 * and then only performing DB transactions if the lock is held. */
7364 ovsdb_idl_set_lock(ovnsb_idl_loop.idl, "ovn_northd");
7365 bool had_lock = false;
7366
7367 /* Main loop. */
7368 exiting = false;
7369 while (!exiting) {
7370 struct northd_context ctx = {
7371 .ovnnb_idl = ovnnb_idl_loop.idl,
7372 .ovnnb_txn = ovsdb_idl_loop_run(&ovnnb_idl_loop),
7373 .ovnsb_idl = ovnsb_idl_loop.idl,
7374 .ovnsb_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop),
7375 };
7376
7377 if (!had_lock && ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) {
7378 VLOG_INFO("ovn-northd lock acquired. "
7379 "This ovn-northd instance is now active.");
7380 had_lock = true;
7381 } else if (had_lock && !ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) {
7382 VLOG_INFO("ovn-northd lock lost. "
7383 "This ovn-northd instance is now on standby.");
7384 had_lock = false;
7385 }
7386
7387 if (ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) {
7388 ovnnb_db_run(&ctx, sbrec_chassis_by_name, &ovnsb_idl_loop);
7389 ovnsb_db_run(&ctx, &ovnsb_idl_loop);
7390 if (ctx.ovnsb_txn) {
7391 check_and_add_supported_dhcp_opts_to_sb_db(&ctx);
7392 check_and_add_supported_dhcpv6_opts_to_sb_db(&ctx);
7393 check_and_update_rbac(&ctx);
7394 }
7395 }
7396
7397 unixctl_server_run(unixctl);
7398 unixctl_server_wait(unixctl);
7399 if (exiting) {
7400 poll_immediate_wake();
7401 }
7402 ovsdb_idl_loop_commit_and_wait(&ovnnb_idl_loop);
7403 ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);
7404
7405 poll_block();
7406 if (should_service_stop()) {
7407 exiting = true;
7408 }
7409 }
7410
7411 unixctl_server_destroy(unixctl);
7412 ovsdb_idl_loop_destroy(&ovnnb_idl_loop);
7413 ovsdb_idl_loop_destroy(&ovnsb_idl_loop);
7414 service_stop();
7415
7416 exit(res);
7417 }
7418
7419 static void
7420 ovn_northd_exit(struct unixctl_conn *conn, int argc OVS_UNUSED,
7421 const char *argv[] OVS_UNUSED, void *exiting_)
7422 {
7423 bool *exiting = exiting_;
7424 *exiting = true;
7425
7426 unixctl_command_reply(conn, NULL);
7427 }