]> git.proxmox.com Git - mirror_ovs.git/blob - ovn/northd/ovn-northd.c
ovn-northd: Improve hashing for chassis queues.
[mirror_ovs.git] / ovn / northd / ovn-northd.c
1 /*
2 * Licensed under the Apache License, Version 2.0 (the "License");
3 * you may not use this file except in compliance with the License.
4 * You may obtain a copy of the License at:
5 *
6 * http://www.apache.org/licenses/LICENSE-2.0
7 *
8 * Unless required by applicable law or agreed to in writing, software
9 * distributed under the License is distributed on an "AS IS" BASIS,
10 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 * See the License for the specific language governing permissions and
12 * limitations under the License.
13 */
14
15 #include <config.h>
16
17 #include <getopt.h>
18 #include <stdlib.h>
19 #include <stdio.h>
20
21 #include "bitmap.h"
22 #include "command-line.h"
23 #include "daemon.h"
24 #include "dirs.h"
25 #include "openvswitch/dynamic-string.h"
26 #include "fatal-signal.h"
27 #include "hash.h"
28 #include "openvswitch/hmap.h"
29 #include "openvswitch/json.h"
30 #include "ovn/lex.h"
31 #include "ovn/lib/chassis-index.h"
32 #include "ovn/lib/logical-fields.h"
33 #include "ovn/lib/ovn-l7.h"
34 #include "ovn/lib/ovn-nb-idl.h"
35 #include "ovn/lib/ovn-sb-idl.h"
36 #include "ovn/lib/ovn-util.h"
37 #include "ovn/actions.h"
38 #include "packets.h"
39 #include "openvswitch/poll-loop.h"
40 #include "smap.h"
41 #include "sset.h"
42 #include "svec.h"
43 #include "stream.h"
44 #include "stream-ssl.h"
45 #include "unixctl.h"
46 #include "util.h"
47 #include "uuid.h"
48 #include "openvswitch/vlog.h"
49
50 VLOG_DEFINE_THIS_MODULE(ovn_northd);
51
52 static unixctl_cb_func ovn_northd_exit;
53
54 struct northd_context {
55 struct ovsdb_idl *ovnnb_idl;
56 struct ovsdb_idl *ovnsb_idl;
57 struct ovsdb_idl_txn *ovnnb_txn;
58 struct ovsdb_idl_txn *ovnsb_txn;
59 };
60
61 static const char *ovnnb_db;
62 static const char *ovnsb_db;
63 static const char *unixctl_path;
64
65 #define MAC_ADDR_PREFIX 0x0A0000000000ULL
66 #define MAC_ADDR_SPACE 0xffffff
67
68 /* MAC address management (macam) table of "struct eth_addr"s, that holds the
69 * MAC addresses allocated by the OVN ipam module. */
70 static struct hmap macam = HMAP_INITIALIZER(&macam);
71
72 #define MAX_OVN_TAGS 4096
73 \f
74 /* Pipeline stages. */
75
76 /* The two pipelines in an OVN logical flow table. */
77 enum ovn_pipeline {
78 P_IN, /* Ingress pipeline. */
79 P_OUT /* Egress pipeline. */
80 };
81
82 /* The two purposes for which ovn-northd uses OVN logical datapaths. */
83 enum ovn_datapath_type {
84 DP_SWITCH, /* OVN logical switch. */
85 DP_ROUTER /* OVN logical router. */
86 };
87
88 /* Returns an "enum ovn_stage" built from the arguments.
89 *
90 * (It's better to use ovn_stage_build() for type-safety reasons, but inline
91 * functions can't be used in enums or switch cases.) */
92 #define OVN_STAGE_BUILD(DP_TYPE, PIPELINE, TABLE) \
93 (((DP_TYPE) << 9) | ((PIPELINE) << 8) | (TABLE))
94
95 /* A stage within an OVN logical switch or router.
96 *
97 * An "enum ovn_stage" indicates whether the stage is part of a logical switch
98 * or router, whether the stage is part of the ingress or egress pipeline, and
99 * the table within that pipeline. The first three components are combined to
100 * form the stage's full name, e.g. S_SWITCH_IN_PORT_SEC_L2,
101 * S_ROUTER_OUT_DELIVERY. */
102 enum ovn_stage {
103 #define PIPELINE_STAGES \
104 /* Logical switch ingress stages. */ \
105 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_L2, 0, "ls_in_port_sec_l2") \
106 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_IP, 1, "ls_in_port_sec_ip") \
107 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_ND, 2, "ls_in_port_sec_nd") \
108 PIPELINE_STAGE(SWITCH, IN, PRE_ACL, 3, "ls_in_pre_acl") \
109 PIPELINE_STAGE(SWITCH, IN, PRE_LB, 4, "ls_in_pre_lb") \
110 PIPELINE_STAGE(SWITCH, IN, PRE_STATEFUL, 5, "ls_in_pre_stateful") \
111 PIPELINE_STAGE(SWITCH, IN, ACL, 6, "ls_in_acl") \
112 PIPELINE_STAGE(SWITCH, IN, QOS_MARK, 7, "ls_in_qos_mark") \
113 PIPELINE_STAGE(SWITCH, IN, QOS_METER, 8, "ls_in_qos_meter") \
114 PIPELINE_STAGE(SWITCH, IN, LB, 9, "ls_in_lb") \
115 PIPELINE_STAGE(SWITCH, IN, STATEFUL, 10, "ls_in_stateful") \
116 PIPELINE_STAGE(SWITCH, IN, ARP_ND_RSP, 11, "ls_in_arp_rsp") \
117 PIPELINE_STAGE(SWITCH, IN, DHCP_OPTIONS, 12, "ls_in_dhcp_options") \
118 PIPELINE_STAGE(SWITCH, IN, DHCP_RESPONSE, 13, "ls_in_dhcp_response") \
119 PIPELINE_STAGE(SWITCH, IN, DNS_LOOKUP, 14, "ls_in_dns_lookup") \
120 PIPELINE_STAGE(SWITCH, IN, DNS_RESPONSE, 15, "ls_in_dns_response") \
121 PIPELINE_STAGE(SWITCH, IN, L2_LKUP, 16, "ls_in_l2_lkup") \
122 \
123 /* Logical switch egress stages. */ \
124 PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 0, "ls_out_pre_lb") \
125 PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 1, "ls_out_pre_acl") \
126 PIPELINE_STAGE(SWITCH, OUT, PRE_STATEFUL, 2, "ls_out_pre_stateful") \
127 PIPELINE_STAGE(SWITCH, OUT, LB, 3, "ls_out_lb") \
128 PIPELINE_STAGE(SWITCH, OUT, ACL, 4, "ls_out_acl") \
129 PIPELINE_STAGE(SWITCH, OUT, QOS_MARK, 5, "ls_out_qos_mark") \
130 PIPELINE_STAGE(SWITCH, OUT, QOS_METER, 6, "ls_out_qos_meter") \
131 PIPELINE_STAGE(SWITCH, OUT, STATEFUL, 7, "ls_out_stateful") \
132 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_IP, 8, "ls_out_port_sec_ip") \
133 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_L2, 9, "ls_out_port_sec_l2") \
134 \
135 /* Logical router ingress stages. */ \
136 PIPELINE_STAGE(ROUTER, IN, ADMISSION, 0, "lr_in_admission") \
137 PIPELINE_STAGE(ROUTER, IN, IP_INPUT, 1, "lr_in_ip_input") \
138 PIPELINE_STAGE(ROUTER, IN, DEFRAG, 2, "lr_in_defrag") \
139 PIPELINE_STAGE(ROUTER, IN, UNSNAT, 3, "lr_in_unsnat") \
140 PIPELINE_STAGE(ROUTER, IN, DNAT, 4, "lr_in_dnat") \
141 PIPELINE_STAGE(ROUTER, IN, ND_RA_OPTIONS, 5, "lr_in_nd_ra_options") \
142 PIPELINE_STAGE(ROUTER, IN, ND_RA_RESPONSE, 6, "lr_in_nd_ra_response") \
143 PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 7, "lr_in_ip_routing") \
144 PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 8, "lr_in_arp_resolve") \
145 PIPELINE_STAGE(ROUTER, IN, GW_REDIRECT, 9, "lr_in_gw_redirect") \
146 PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 10, "lr_in_arp_request") \
147 \
148 /* Logical router egress stages. */ \
149 PIPELINE_STAGE(ROUTER, OUT, UNDNAT, 0, "lr_out_undnat") \
150 PIPELINE_STAGE(ROUTER, OUT, SNAT, 1, "lr_out_snat") \
151 PIPELINE_STAGE(ROUTER, OUT, EGR_LOOP, 2, "lr_out_egr_loop") \
152 PIPELINE_STAGE(ROUTER, OUT, DELIVERY, 3, "lr_out_delivery")
153
154 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
155 S_##DP_TYPE##_##PIPELINE##_##STAGE \
156 = OVN_STAGE_BUILD(DP_##DP_TYPE, P_##PIPELINE, TABLE),
157 PIPELINE_STAGES
158 #undef PIPELINE_STAGE
159 };
160
161 /* Due to various hard-coded priorities need to implement ACLs, the
162 * northbound database supports a smaller range of ACL priorities than
163 * are available to logical flows. This value is added to an ACL
164 * priority to determine the ACL's logical flow priority. */
165 #define OVN_ACL_PRI_OFFSET 1000
166
167 /* Register definitions specific to switches. */
168 #define REGBIT_CONNTRACK_DEFRAG "reg0[0]"
169 #define REGBIT_CONNTRACK_COMMIT "reg0[1]"
170 #define REGBIT_CONNTRACK_NAT "reg0[2]"
171 #define REGBIT_DHCP_OPTS_RESULT "reg0[3]"
172 #define REGBIT_DNS_LOOKUP_RESULT "reg0[4]"
173 #define REGBIT_ND_RA_OPTS_RESULT "reg0[5]"
174
175 /* Register definitions for switches and routers. */
176 #define REGBIT_NAT_REDIRECT "reg9[0]"
177 /* Indicate that this packet has been recirculated using egress
178 * loopback. This allows certain checks to be bypassed, such as a
179 * logical router dropping packets with source IP address equals
180 * one of the logical router's own IP addresses. */
181 #define REGBIT_EGRESS_LOOPBACK "reg9[1]"
182
183 /* Returns an "enum ovn_stage" built from the arguments. */
184 static enum ovn_stage
185 ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline,
186 uint8_t table)
187 {
188 return OVN_STAGE_BUILD(dp_type, pipeline, table);
189 }
190
191 /* Returns the pipeline to which 'stage' belongs. */
192 static enum ovn_pipeline
193 ovn_stage_get_pipeline(enum ovn_stage stage)
194 {
195 return (stage >> 8) & 1;
196 }
197
198 /* Returns the pipeline name to which 'stage' belongs. */
199 static const char *
200 ovn_stage_get_pipeline_name(enum ovn_stage stage)
201 {
202 return ovn_stage_get_pipeline(stage) == P_IN ? "ingress" : "egress";
203 }
204
205 /* Returns the table to which 'stage' belongs. */
206 static uint8_t
207 ovn_stage_get_table(enum ovn_stage stage)
208 {
209 return stage & 0xff;
210 }
211
212 /* Returns a string name for 'stage'. */
213 static const char *
214 ovn_stage_to_str(enum ovn_stage stage)
215 {
216 switch (stage) {
217 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
218 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return NAME;
219 PIPELINE_STAGES
220 #undef PIPELINE_STAGE
221 default: return "<unknown>";
222 }
223 }
224
225 /* Returns the type of the datapath to which a flow with the given 'stage' may
226 * be added. */
227 static enum ovn_datapath_type
228 ovn_stage_to_datapath_type(enum ovn_stage stage)
229 {
230 switch (stage) {
231 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
232 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return DP_##DP_TYPE;
233 PIPELINE_STAGES
234 #undef PIPELINE_STAGE
235 default: OVS_NOT_REACHED();
236 }
237 }
238 \f
239 static void
240 usage(void)
241 {
242 printf("\
243 %s: OVN northbound management daemon\n\
244 usage: %s [OPTIONS]\n\
245 \n\
246 Options:\n\
247 --ovnnb-db=DATABASE connect to ovn-nb database at DATABASE\n\
248 (default: %s)\n\
249 --ovnsb-db=DATABASE connect to ovn-sb database at DATABASE\n\
250 (default: %s)\n\
251 --unixctl=SOCKET override default control socket name\n\
252 -h, --help display this help message\n\
253 -o, --options list available options\n\
254 -V, --version display version information\n\
255 ", program_name, program_name, default_nb_db(), default_sb_db());
256 daemon_usage();
257 vlog_usage();
258 stream_usage("database", true, true, false);
259 }
260 \f
261 struct tnlid_node {
262 struct hmap_node hmap_node;
263 uint32_t tnlid;
264 };
265
266 static void
267 destroy_tnlids(struct hmap *tnlids)
268 {
269 struct tnlid_node *node;
270 HMAP_FOR_EACH_POP (node, hmap_node, tnlids) {
271 free(node);
272 }
273 hmap_destroy(tnlids);
274 }
275
276 static void
277 add_tnlid(struct hmap *set, uint32_t tnlid)
278 {
279 struct tnlid_node *node = xmalloc(sizeof *node);
280 hmap_insert(set, &node->hmap_node, hash_int(tnlid, 0));
281 node->tnlid = tnlid;
282 }
283
284 static bool
285 tnlid_in_use(const struct hmap *set, uint32_t tnlid)
286 {
287 const struct tnlid_node *node;
288 HMAP_FOR_EACH_IN_BUCKET (node, hmap_node, hash_int(tnlid, 0), set) {
289 if (node->tnlid == tnlid) {
290 return true;
291 }
292 }
293 return false;
294 }
295
296 static uint32_t
297 next_tnlid(uint32_t tnlid, uint32_t max)
298 {
299 return tnlid + 1 <= max ? tnlid + 1 : 1;
300 }
301
302 static uint32_t
303 allocate_tnlid(struct hmap *set, const char *name, uint32_t max,
304 uint32_t *hint)
305 {
306 for (uint32_t tnlid = next_tnlid(*hint, max); tnlid != *hint;
307 tnlid = next_tnlid(tnlid, max)) {
308 if (!tnlid_in_use(set, tnlid)) {
309 add_tnlid(set, tnlid);
310 *hint = tnlid;
311 return tnlid;
312 }
313 }
314
315 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
316 VLOG_WARN_RL(&rl, "all %s tunnel ids exhausted", name);
317 return 0;
318 }
319 \f
320 struct ovn_chassis_qdisc_queues {
321 struct hmap_node key_node;
322 uint32_t queue_id;
323 struct uuid chassis_uuid;
324 };
325
326 static uint32_t
327 hash_chassis_queue(const struct uuid *chassis_uuid, uint32_t queue_id)
328 {
329 return hash_2words(uuid_hash(chassis_uuid), queue_id);
330 }
331
332 static void
333 destroy_chassis_queues(struct hmap *set)
334 {
335 struct ovn_chassis_qdisc_queues *node;
336 HMAP_FOR_EACH_POP (node, key_node, set) {
337 free(node);
338 }
339 hmap_destroy(set);
340 }
341
342 static void
343 add_chassis_queue(struct hmap *set, struct uuid *chassis_uuid,
344 uint32_t queue_id)
345 {
346 struct ovn_chassis_qdisc_queues *node = xmalloc(sizeof *node);
347 node->queue_id = queue_id;
348 memcpy(&node->chassis_uuid, chassis_uuid, sizeof node->chassis_uuid);
349 hmap_insert(set, &node->key_node,
350 hash_chassis_queue(chassis_uuid, queue_id));
351 }
352
353 static bool
354 chassis_queueid_in_use(const struct hmap *set, struct uuid *chassis_uuid,
355 uint32_t queue_id)
356 {
357 const struct ovn_chassis_qdisc_queues *node;
358 HMAP_FOR_EACH_WITH_HASH (node, key_node,
359 hash_chassis_queue(chassis_uuid, queue_id), set) {
360 if (uuid_equals(chassis_uuid, &node->chassis_uuid)
361 && node->queue_id == queue_id) {
362 return true;
363 }
364 }
365 return false;
366 }
367
368 static uint32_t
369 allocate_chassis_queueid(struct hmap *set, struct sbrec_chassis *chassis)
370 {
371 for (uint32_t queue_id = QDISC_MIN_QUEUE_ID + 1;
372 queue_id <= QDISC_MAX_QUEUE_ID;
373 queue_id++) {
374 if (!chassis_queueid_in_use(set, &chassis->header_.uuid, queue_id)) {
375 add_chassis_queue(set, &chassis->header_.uuid, queue_id);
376 return queue_id;
377 }
378 }
379
380 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
381 VLOG_WARN_RL(&rl, "all %s queue ids exhausted", chassis->name);
382 return 0;
383 }
384
385 static void
386 free_chassis_queueid(struct hmap *set, struct sbrec_chassis *chassis,
387 uint32_t queue_id)
388 {
389 const struct uuid *chassis_uuid = &chassis->header_.uuid;
390 struct ovn_chassis_qdisc_queues *node;
391 HMAP_FOR_EACH_WITH_HASH (node, key_node,
392 hash_chassis_queue(chassis_uuid, queue_id), set) {
393 if (uuid_equals(chassis_uuid, &node->chassis_uuid)
394 && node->queue_id == queue_id) {
395 hmap_remove(set, &node->key_node);
396 break;
397 }
398 }
399 }
400
401 static inline bool
402 port_has_qos_params(const struct smap *opts)
403 {
404 return (smap_get(opts, "qos_max_rate") ||
405 smap_get(opts, "qos_burst"));
406 }
407 \f
408
409 struct ipam_info {
410 uint32_t start_ipv4;
411 size_t total_ipv4s;
412 unsigned long *allocated_ipv4s; /* A bitmap of allocated IPv4s */
413 bool ipv6_prefix_set;
414 struct in6_addr ipv6_prefix;
415 };
416
417 /* The 'key' comes from nbs->header_.uuid or nbr->header_.uuid or
418 * sb->external_ids:logical-switch. */
419 struct ovn_datapath {
420 struct hmap_node key_node; /* Index on 'key'. */
421 struct uuid key; /* (nbs/nbr)->header_.uuid. */
422
423 const struct nbrec_logical_switch *nbs; /* May be NULL. */
424 const struct nbrec_logical_router *nbr; /* May be NULL. */
425 const struct sbrec_datapath_binding *sb; /* May be NULL. */
426
427 struct ovs_list list; /* In list of similar records. */
428
429 /* Logical switch data. */
430 struct ovn_port **router_ports;
431 size_t n_router_ports;
432
433 struct hmap port_tnlids;
434 uint32_t port_key_hint;
435
436 bool has_unknown;
437
438 /* IPAM data. */
439 struct ipam_info ipam_info;
440
441 /* OVN northd only needs to know about the logical router gateway port for
442 * NAT on a distributed router. This "distributed gateway port" is
443 * populated only when there is a "redirect-chassis" specified for one of
444 * the ports on the logical router. Otherwise this will be NULL. */
445 struct ovn_port *l3dgw_port;
446 /* The "derived" OVN port representing the instance of l3dgw_port on
447 * the "redirect-chassis". */
448 struct ovn_port *l3redirect_port;
449 struct ovn_port *localnet_port;
450
451 /* Port groups related to the datapath, used only when nbs is NOT NULL. */
452 struct hmap nb_pgs;
453 };
454
455 struct macam_node {
456 struct hmap_node hmap_node;
457 struct eth_addr mac_addr; /* Allocated MAC address. */
458 };
459
460 static void
461 cleanup_macam(struct hmap *macam_)
462 {
463 struct macam_node *node;
464 HMAP_FOR_EACH_POP (node, hmap_node, macam_) {
465 free(node);
466 }
467 }
468
469 static struct ovn_datapath *
470 ovn_datapath_create(struct hmap *datapaths, const struct uuid *key,
471 const struct nbrec_logical_switch *nbs,
472 const struct nbrec_logical_router *nbr,
473 const struct sbrec_datapath_binding *sb)
474 {
475 struct ovn_datapath *od = xzalloc(sizeof *od);
476 od->key = *key;
477 od->sb = sb;
478 od->nbs = nbs;
479 od->nbr = nbr;
480 hmap_init(&od->port_tnlids);
481 hmap_init(&od->nb_pgs);
482 od->port_key_hint = 0;
483 hmap_insert(datapaths, &od->key_node, uuid_hash(&od->key));
484 return od;
485 }
486
487 static void ovn_ls_port_group_destroy(struct hmap *nb_pgs);
488
489 static void
490 ovn_datapath_destroy(struct hmap *datapaths, struct ovn_datapath *od)
491 {
492 if (od) {
493 /* Don't remove od->list. It is used within build_datapaths() as a
494 * private list and once we've exited that function it is not safe to
495 * use it. */
496 hmap_remove(datapaths, &od->key_node);
497 destroy_tnlids(&od->port_tnlids);
498 bitmap_free(od->ipam_info.allocated_ipv4s);
499 free(od->router_ports);
500 ovn_ls_port_group_destroy(&od->nb_pgs);
501 free(od);
502 }
503 }
504
505 /* Returns 'od''s datapath type. */
506 static enum ovn_datapath_type
507 ovn_datapath_get_type(const struct ovn_datapath *od)
508 {
509 return od->nbs ? DP_SWITCH : DP_ROUTER;
510 }
511
512 static struct ovn_datapath *
513 ovn_datapath_find(struct hmap *datapaths, const struct uuid *uuid)
514 {
515 struct ovn_datapath *od;
516
517 HMAP_FOR_EACH_WITH_HASH (od, key_node, uuid_hash(uuid), datapaths) {
518 if (uuid_equals(uuid, &od->key)) {
519 return od;
520 }
521 }
522 return NULL;
523 }
524
525 static struct ovn_datapath *
526 ovn_datapath_from_sbrec(struct hmap *datapaths,
527 const struct sbrec_datapath_binding *sb)
528 {
529 struct uuid key;
530
531 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
532 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
533 return NULL;
534 }
535 return ovn_datapath_find(datapaths, &key);
536 }
537
538 static bool
539 lrouter_is_enabled(const struct nbrec_logical_router *lrouter)
540 {
541 return !lrouter->enabled || *lrouter->enabled;
542 }
543
544 static void
545 init_ipam_info_for_datapath(struct ovn_datapath *od)
546 {
547 if (!od->nbs) {
548 return;
549 }
550
551 const char *subnet_str = smap_get(&od->nbs->other_config, "subnet");
552 const char *ipv6_prefix = smap_get(&od->nbs->other_config, "ipv6_prefix");
553
554 if (ipv6_prefix) {
555 od->ipam_info.ipv6_prefix_set = ipv6_parse(
556 ipv6_prefix, &od->ipam_info.ipv6_prefix);
557 }
558
559 if (!subnet_str) {
560 return;
561 }
562
563 ovs_be32 subnet, mask;
564 char *error = ip_parse_masked(subnet_str, &subnet, &mask);
565 if (error || mask == OVS_BE32_MAX || !ip_is_cidr(mask)) {
566 static struct vlog_rate_limit rl
567 = VLOG_RATE_LIMIT_INIT(5, 1);
568 VLOG_WARN_RL(&rl, "bad 'subnet' %s", subnet_str);
569 free(error);
570 return;
571 }
572
573 od->ipam_info.start_ipv4 = ntohl(subnet) + 1;
574 od->ipam_info.total_ipv4s = ~ntohl(mask);
575 od->ipam_info.allocated_ipv4s =
576 bitmap_allocate(od->ipam_info.total_ipv4s);
577
578 /* Mark first IP as taken */
579 bitmap_set1(od->ipam_info.allocated_ipv4s, 0);
580
581 /* Check if there are any reserver IPs (list) to be excluded from IPAM */
582 const char *exclude_ip_list = smap_get(&od->nbs->other_config,
583 "exclude_ips");
584 if (!exclude_ip_list) {
585 return;
586 }
587
588 struct lexer lexer;
589 lexer_init(&lexer, exclude_ip_list);
590 /* exclude_ip_list could be in the format -
591 * "10.0.0.4 10.0.0.10 10.0.0.20..10.0.0.50 10.0.0.100..10.0.0.110".
592 */
593 lexer_get(&lexer);
594 while (lexer.token.type != LEX_T_END) {
595 if (lexer.token.type != LEX_T_INTEGER) {
596 lexer_syntax_error(&lexer, "expecting address");
597 break;
598 }
599 uint32_t start = ntohl(lexer.token.value.ipv4);
600 lexer_get(&lexer);
601
602 uint32_t end = start + 1;
603 if (lexer_match(&lexer, LEX_T_ELLIPSIS)) {
604 if (lexer.token.type != LEX_T_INTEGER) {
605 lexer_syntax_error(&lexer, "expecting address range");
606 break;
607 }
608 end = ntohl(lexer.token.value.ipv4) + 1;
609 lexer_get(&lexer);
610 }
611
612 /* Clamp start...end to fit the subnet. */
613 start = MAX(od->ipam_info.start_ipv4, start);
614 end = MIN(od->ipam_info.start_ipv4 + od->ipam_info.total_ipv4s, end);
615 if (end > start) {
616 bitmap_set_multiple(od->ipam_info.allocated_ipv4s,
617 start - od->ipam_info.start_ipv4,
618 end - start, 1);
619 } else {
620 lexer_error(&lexer, "excluded addresses not in subnet");
621 }
622 }
623 if (lexer.error) {
624 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
625 VLOG_WARN_RL(&rl, "logical switch "UUID_FMT": bad exclude_ips (%s)",
626 UUID_ARGS(&od->key), lexer.error);
627 }
628 lexer_destroy(&lexer);
629 }
630
631 static void
632 ovn_datapath_update_external_ids(struct ovn_datapath *od)
633 {
634 /* Get the logical-switch or logical-router UUID to set in
635 * external-ids. */
636 char uuid_s[UUID_LEN + 1];
637 sprintf(uuid_s, UUID_FMT, UUID_ARGS(&od->key));
638 const char *key = od->nbs ? "logical-switch" : "logical-router";
639
640 /* Get names to set in external-ids. */
641 const char *name = od->nbs ? od->nbs->name : od->nbr->name;
642 const char *name2 = (od->nbs
643 ? smap_get(&od->nbs->external_ids,
644 "neutron:network_name")
645 : smap_get(&od->nbr->external_ids,
646 "neutron:router_name"));
647
648 /* Set external-ids. */
649 struct smap ids = SMAP_INITIALIZER(&ids);
650 smap_add(&ids, key, uuid_s);
651 smap_add(&ids, "name", name);
652 if (name2 && name2[0]) {
653 smap_add(&ids, "name2", name2);
654 }
655 sbrec_datapath_binding_set_external_ids(od->sb, &ids);
656 smap_destroy(&ids);
657 }
658
659 static void
660 join_datapaths(struct northd_context *ctx, struct hmap *datapaths,
661 struct ovs_list *sb_only, struct ovs_list *nb_only,
662 struct ovs_list *both)
663 {
664 hmap_init(datapaths);
665 ovs_list_init(sb_only);
666 ovs_list_init(nb_only);
667 ovs_list_init(both);
668
669 const struct sbrec_datapath_binding *sb, *sb_next;
670 SBREC_DATAPATH_BINDING_FOR_EACH_SAFE (sb, sb_next, ctx->ovnsb_idl) {
671 struct uuid key;
672 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
673 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
674 ovsdb_idl_txn_add_comment(
675 ctx->ovnsb_txn,
676 "deleting Datapath_Binding "UUID_FMT" that lacks "
677 "external-ids:logical-switch and "
678 "external-ids:logical-router",
679 UUID_ARGS(&sb->header_.uuid));
680 sbrec_datapath_binding_delete(sb);
681 continue;
682 }
683
684 if (ovn_datapath_find(datapaths, &key)) {
685 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
686 VLOG_INFO_RL(
687 &rl, "deleting Datapath_Binding "UUID_FMT" with "
688 "duplicate external-ids:logical-switch/router "UUID_FMT,
689 UUID_ARGS(&sb->header_.uuid), UUID_ARGS(&key));
690 sbrec_datapath_binding_delete(sb);
691 continue;
692 }
693
694 struct ovn_datapath *od = ovn_datapath_create(datapaths, &key,
695 NULL, NULL, sb);
696 ovs_list_push_back(sb_only, &od->list);
697 }
698
699 const struct nbrec_logical_switch *nbs;
700 NBREC_LOGICAL_SWITCH_FOR_EACH (nbs, ctx->ovnnb_idl) {
701 struct ovn_datapath *od = ovn_datapath_find(datapaths,
702 &nbs->header_.uuid);
703 if (od) {
704 od->nbs = nbs;
705 ovs_list_remove(&od->list);
706 ovs_list_push_back(both, &od->list);
707 ovn_datapath_update_external_ids(od);
708 } else {
709 od = ovn_datapath_create(datapaths, &nbs->header_.uuid,
710 nbs, NULL, NULL);
711 ovs_list_push_back(nb_only, &od->list);
712 }
713
714 init_ipam_info_for_datapath(od);
715 }
716
717 const struct nbrec_logical_router *nbr;
718 NBREC_LOGICAL_ROUTER_FOR_EACH (nbr, ctx->ovnnb_idl) {
719 if (!lrouter_is_enabled(nbr)) {
720 continue;
721 }
722
723 struct ovn_datapath *od = ovn_datapath_find(datapaths,
724 &nbr->header_.uuid);
725 if (od) {
726 if (!od->nbs) {
727 od->nbr = nbr;
728 ovs_list_remove(&od->list);
729 ovs_list_push_back(both, &od->list);
730 ovn_datapath_update_external_ids(od);
731 } else {
732 /* Can't happen! */
733 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
734 VLOG_WARN_RL(&rl,
735 "duplicate UUID "UUID_FMT" in OVN_Northbound",
736 UUID_ARGS(&nbr->header_.uuid));
737 continue;
738 }
739 } else {
740 od = ovn_datapath_create(datapaths, &nbr->header_.uuid,
741 NULL, nbr, NULL);
742 ovs_list_push_back(nb_only, &od->list);
743 }
744 }
745 }
746
747 static uint32_t
748 ovn_datapath_allocate_key(struct hmap *dp_tnlids)
749 {
750 static uint32_t hint;
751 return allocate_tnlid(dp_tnlids, "datapath", (1u << 24) - 1, &hint);
752 }
753
754 /* Updates the southbound Datapath_Binding table so that it contains the
755 * logical switches and routers specified by the northbound database.
756 *
757 * Initializes 'datapaths' to contain a "struct ovn_datapath" for every logical
758 * switch and router. */
759 static void
760 build_datapaths(struct northd_context *ctx, struct hmap *datapaths)
761 {
762 struct ovs_list sb_only, nb_only, both;
763
764 join_datapaths(ctx, datapaths, &sb_only, &nb_only, &both);
765
766 if (!ovs_list_is_empty(&nb_only)) {
767 /* First index the in-use datapath tunnel IDs. */
768 struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
769 struct ovn_datapath *od;
770 LIST_FOR_EACH (od, list, &both) {
771 add_tnlid(&dp_tnlids, od->sb->tunnel_key);
772 }
773
774 /* Add southbound record for each unmatched northbound record. */
775 LIST_FOR_EACH (od, list, &nb_only) {
776 uint16_t tunnel_key = ovn_datapath_allocate_key(&dp_tnlids);
777 if (!tunnel_key) {
778 break;
779 }
780
781 od->sb = sbrec_datapath_binding_insert(ctx->ovnsb_txn);
782 ovn_datapath_update_external_ids(od);
783 sbrec_datapath_binding_set_tunnel_key(od->sb, tunnel_key);
784 }
785 destroy_tnlids(&dp_tnlids);
786 }
787
788 /* Delete southbound records without northbound matches. */
789 struct ovn_datapath *od, *next;
790 LIST_FOR_EACH_SAFE (od, next, list, &sb_only) {
791 ovs_list_remove(&od->list);
792 sbrec_datapath_binding_delete(od->sb);
793 ovn_datapath_destroy(datapaths, od);
794 }
795 }
796 \f
797 struct ovn_port {
798 struct hmap_node key_node; /* Index on 'key'. */
799 char *key; /* nbs->name, nbr->name, sb->logical_port. */
800 char *json_key; /* 'key', quoted for use in JSON. */
801
802 const struct sbrec_port_binding *sb; /* May be NULL. */
803
804 /* Logical switch port data. */
805 const struct nbrec_logical_switch_port *nbsp; /* May be NULL. */
806
807 struct lport_addresses *lsp_addrs; /* Logical switch port addresses. */
808 unsigned int n_lsp_addrs;
809
810 struct lport_addresses *ps_addrs; /* Port security addresses. */
811 unsigned int n_ps_addrs;
812
813 /* Logical router port data. */
814 const struct nbrec_logical_router_port *nbrp; /* May be NULL. */
815
816 struct lport_addresses lrp_networks;
817
818 bool derived; /* Indicates whether this is an additional port
819 * derived from nbsp or nbrp. */
820
821 /* The port's peer:
822 *
823 * - A switch port S of type "router" has a router port R as a peer,
824 * and R in turn has S has its peer.
825 *
826 * - Two connected logical router ports have each other as peer. */
827 struct ovn_port *peer;
828
829 struct ovn_datapath *od;
830
831 struct ovs_list list; /* In list of similar records. */
832 };
833
834 static struct ovn_port *
835 ovn_port_create(struct hmap *ports, const char *key,
836 const struct nbrec_logical_switch_port *nbsp,
837 const struct nbrec_logical_router_port *nbrp,
838 const struct sbrec_port_binding *sb)
839 {
840 struct ovn_port *op = xzalloc(sizeof *op);
841
842 struct ds json_key = DS_EMPTY_INITIALIZER;
843 json_string_escape(key, &json_key);
844 op->json_key = ds_steal_cstr(&json_key);
845
846 op->key = xstrdup(key);
847 op->sb = sb;
848 op->nbsp = nbsp;
849 op->nbrp = nbrp;
850 op->derived = false;
851 hmap_insert(ports, &op->key_node, hash_string(op->key, 0));
852 return op;
853 }
854
855 static void
856 ovn_port_destroy(struct hmap *ports, struct ovn_port *port)
857 {
858 if (port) {
859 /* Don't remove port->list. It is used within build_ports() as a
860 * private list and once we've exited that function it is not safe to
861 * use it. */
862 hmap_remove(ports, &port->key_node);
863
864 for (int i = 0; i < port->n_lsp_addrs; i++) {
865 destroy_lport_addresses(&port->lsp_addrs[i]);
866 }
867 free(port->lsp_addrs);
868
869 for (int i = 0; i < port->n_ps_addrs; i++) {
870 destroy_lport_addresses(&port->ps_addrs[i]);
871 }
872 free(port->ps_addrs);
873
874 destroy_lport_addresses(&port->lrp_networks);
875 free(port->json_key);
876 free(port->key);
877 free(port);
878 }
879 }
880
881 static struct ovn_port *
882 ovn_port_find(struct hmap *ports, const char *name)
883 {
884 struct ovn_port *op;
885
886 HMAP_FOR_EACH_WITH_HASH (op, key_node, hash_string(name, 0), ports) {
887 if (!strcmp(op->key, name)) {
888 return op;
889 }
890 }
891 return NULL;
892 }
893
894 static uint32_t
895 ovn_port_allocate_key(struct ovn_datapath *od)
896 {
897 return allocate_tnlid(&od->port_tnlids, "port",
898 (1u << 15) - 1, &od->port_key_hint);
899 }
900
901 static char *
902 chassis_redirect_name(const char *port_name)
903 {
904 return xasprintf("cr-%s", port_name);
905 }
906
907 static bool
908 ipam_is_duplicate_mac(struct eth_addr *ea, uint64_t mac64, bool warn)
909 {
910 struct macam_node *macam_node;
911 HMAP_FOR_EACH_WITH_HASH (macam_node, hmap_node, hash_uint64(mac64),
912 &macam) {
913 if (eth_addr_equals(*ea, macam_node->mac_addr)) {
914 if (warn) {
915 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
916 VLOG_WARN_RL(&rl, "Duplicate MAC set: "ETH_ADDR_FMT,
917 ETH_ADDR_ARGS(macam_node->mac_addr));
918 }
919 return true;
920 }
921 }
922 return false;
923 }
924
925 static void
926 ipam_insert_mac(struct eth_addr *ea, bool check)
927 {
928 if (!ea) {
929 return;
930 }
931
932 uint64_t mac64 = eth_addr_to_uint64(*ea);
933 /* If the new MAC was not assigned by this address management system or
934 * check is true and the new MAC is a duplicate, do not insert it into the
935 * macam hmap. */
936 if (((mac64 ^ MAC_ADDR_PREFIX) >> 24)
937 || (check && ipam_is_duplicate_mac(ea, mac64, true))) {
938 return;
939 }
940
941 struct macam_node *new_macam_node = xmalloc(sizeof *new_macam_node);
942 new_macam_node->mac_addr = *ea;
943 hmap_insert(&macam, &new_macam_node->hmap_node, hash_uint64(mac64));
944 }
945
946 static void
947 ipam_insert_ip(struct ovn_datapath *od, uint32_t ip)
948 {
949 if (!od || !od->ipam_info.allocated_ipv4s) {
950 return;
951 }
952
953 if (ip >= od->ipam_info.start_ipv4 &&
954 ip < (od->ipam_info.start_ipv4 + od->ipam_info.total_ipv4s)) {
955 if (bitmap_is_set(od->ipam_info.allocated_ipv4s,
956 ip - od->ipam_info.start_ipv4)) {
957 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
958 VLOG_WARN_RL(&rl, "Duplicate IP set on switch %s: "IP_FMT,
959 od->nbs->name, IP_ARGS(htonl(ip)));
960 }
961 bitmap_set1(od->ipam_info.allocated_ipv4s,
962 ip - od->ipam_info.start_ipv4);
963 }
964 }
965
966 static void
967 ipam_insert_lsp_addresses(struct ovn_datapath *od, struct ovn_port *op,
968 char *address)
969 {
970 if (!od || !op || !address || !strcmp(address, "unknown")
971 || !strcmp(address, "router") || is_dynamic_lsp_address(address)) {
972 return;
973 }
974
975 struct lport_addresses laddrs;
976 if (!extract_lsp_addresses(address, &laddrs)) {
977 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
978 VLOG_WARN_RL(&rl, "Extract addresses failed.");
979 return;
980 }
981 ipam_insert_mac(&laddrs.ea, true);
982
983 /* IP is only added to IPAM if the switch's subnet option
984 * is set, whereas MAC is always added to MACAM. */
985 if (!od->ipam_info.allocated_ipv4s) {
986 destroy_lport_addresses(&laddrs);
987 return;
988 }
989
990 for (size_t j = 0; j < laddrs.n_ipv4_addrs; j++) {
991 uint32_t ip = ntohl(laddrs.ipv4_addrs[j].addr);
992 ipam_insert_ip(od, ip);
993 }
994
995 destroy_lport_addresses(&laddrs);
996 }
997
998 static void
999 ipam_add_port_addresses(struct ovn_datapath *od, struct ovn_port *op)
1000 {
1001 if (!od || !op) {
1002 return;
1003 }
1004
1005 if (op->nbsp) {
1006 /* Add all the port's addresses to address data structures. */
1007 for (size_t i = 0; i < op->nbsp->n_addresses; i++) {
1008 ipam_insert_lsp_addresses(od, op, op->nbsp->addresses[i]);
1009 }
1010 } else if (op->nbrp) {
1011 struct lport_addresses lrp_networks;
1012 if (!extract_lrp_networks(op->nbrp, &lrp_networks)) {
1013 static struct vlog_rate_limit rl
1014 = VLOG_RATE_LIMIT_INIT(1, 1);
1015 VLOG_WARN_RL(&rl, "Extract addresses failed.");
1016 return;
1017 }
1018 ipam_insert_mac(&lrp_networks.ea, true);
1019
1020 if (!op->peer || !op->peer->nbsp || !op->peer->od || !op->peer->od->nbs
1021 || !smap_get(&op->peer->od->nbs->other_config, "subnet")) {
1022 destroy_lport_addresses(&lrp_networks);
1023 return;
1024 }
1025
1026 for (size_t i = 0; i < lrp_networks.n_ipv4_addrs; i++) {
1027 uint32_t ip = ntohl(lrp_networks.ipv4_addrs[i].addr);
1028 ipam_insert_ip(op->peer->od, ip);
1029 }
1030
1031 destroy_lport_addresses(&lrp_networks);
1032 }
1033 }
1034
1035 static uint64_t
1036 ipam_get_unused_mac(void)
1037 {
1038 /* Stores the suffix of the most recently ipam-allocated MAC address. */
1039 static uint32_t last_mac;
1040
1041 uint64_t mac64;
1042 struct eth_addr mac;
1043 uint32_t mac_addr_suffix, i;
1044 for (i = 0; i < MAC_ADDR_SPACE - 1; i++) {
1045 /* The tentative MAC's suffix will be in the interval (1, 0xfffffe). */
1046 mac_addr_suffix = ((last_mac + i) % (MAC_ADDR_SPACE - 1)) + 1;
1047 mac64 = MAC_ADDR_PREFIX | mac_addr_suffix;
1048 eth_addr_from_uint64(mac64, &mac);
1049 if (!ipam_is_duplicate_mac(&mac, mac64, false)) {
1050 last_mac = mac_addr_suffix;
1051 break;
1052 }
1053 }
1054
1055 if (i == MAC_ADDR_SPACE) {
1056 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1057 VLOG_WARN_RL(&rl, "MAC address space exhausted.");
1058 mac64 = 0;
1059 }
1060
1061 return mac64;
1062 }
1063
1064 static uint32_t
1065 ipam_get_unused_ip(struct ovn_datapath *od)
1066 {
1067 if (!od || !od->ipam_info.allocated_ipv4s) {
1068 return 0;
1069 }
1070
1071 size_t new_ip_index = bitmap_scan(od->ipam_info.allocated_ipv4s, 0, 0,
1072 od->ipam_info.total_ipv4s - 1);
1073 if (new_ip_index == od->ipam_info.total_ipv4s - 1) {
1074 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1075 VLOG_WARN_RL( &rl, "Subnet address space has been exhausted.");
1076 return 0;
1077 }
1078
1079 return od->ipam_info.start_ipv4 + new_ip_index;
1080 }
1081
1082 enum dynamic_update_type {
1083 NONE, /* No change to the address */
1084 REMOVE, /* Address is no longer dynamic */
1085 STATIC, /* Use static address (MAC only) */
1086 DYNAMIC, /* Assign a new dynamic address */
1087 };
1088
1089 struct dynamic_address_update {
1090 struct ovs_list node; /* In build_ipam()'s list of updates. */
1091
1092 struct ovn_port *op;
1093
1094 struct lport_addresses current_addresses;
1095 struct eth_addr static_mac;
1096 enum dynamic_update_type mac;
1097 enum dynamic_update_type ipv4;
1098 enum dynamic_update_type ipv6;
1099 };
1100
1101 static enum dynamic_update_type
1102 dynamic_mac_changed(const char *lsp_addresses,
1103 struct dynamic_address_update *update)
1104 {
1105 struct eth_addr ea;
1106
1107 if (ovs_scan(lsp_addresses, ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(ea))) {
1108 if (eth_addr_equals(ea, update->current_addresses.ea)) {
1109 return NONE;
1110 } else {
1111 /* MAC is still static, but it has changed */
1112 update->static_mac = ea;
1113 return STATIC;
1114 }
1115 }
1116
1117 uint64_t mac64 = eth_addr_to_uint64(update->current_addresses.ea);
1118 if ((mac64 ^ MAC_ADDR_PREFIX) >> 24) {
1119 return DYNAMIC;
1120 } else {
1121 return NONE;
1122 }
1123 }
1124
1125 static enum dynamic_update_type
1126 dynamic_ip4_changed(struct dynamic_address_update *update)
1127 {
1128 const struct ipam_info *ipam = &update->op->od->ipam_info;
1129 const struct lport_addresses *cur_addresses = &update->current_addresses;
1130 bool dynamic_ip4 = ipam->allocated_ipv4s != NULL;
1131
1132 if (!dynamic_ip4) {
1133 if (update->current_addresses.n_ipv4_addrs) {
1134 return REMOVE;
1135 } else {
1136 return NONE;
1137 }
1138 }
1139
1140 if (!cur_addresses->n_ipv4_addrs) {
1141 /* IPv4 was previously static but now is dynamic */
1142 return DYNAMIC;
1143 }
1144
1145 uint32_t ip4 = ntohl(cur_addresses->ipv4_addrs[0].addr);
1146 if (ip4 < ipam->start_ipv4) {
1147 return DYNAMIC;
1148 }
1149
1150 uint32_t index = ip4 - ipam->start_ipv4;
1151 if (index > ipam->total_ipv4s ||
1152 bitmap_is_set(ipam->allocated_ipv4s, index)) {
1153 /* Previously assigned dynamic IPv4 address can no longer be used.
1154 * It's either outside the subnet, conflicts with an excluded IP,
1155 * or conflicts with a statically-assigned address on the switch
1156 */
1157 return DYNAMIC;
1158 } else {
1159 return NONE;
1160 }
1161 }
1162
1163 static enum dynamic_update_type
1164 dynamic_ip6_changed(struct dynamic_address_update *update)
1165 {
1166 bool dynamic_ip6 = update->op->od->ipam_info.ipv6_prefix_set;
1167
1168 if (!dynamic_ip6) {
1169 if (update->current_addresses.n_ipv6_addrs) {
1170 /* IPv6 was dynamic but now is not */
1171 return REMOVE;
1172 } else {
1173 /* IPv6 has never been dynamic */
1174 return NONE;
1175 }
1176 }
1177
1178 if (update->mac != NONE) {
1179 /* IPv6 address is based on MAC, so if MAC has been updated,
1180 * then we have to update IPv6 address too.
1181 */
1182 return DYNAMIC;
1183 }
1184
1185 if (!update->current_addresses.n_ipv6_addrs) {
1186 /* IPv6 was previously static but now is dynamic */
1187 return DYNAMIC;
1188 }
1189
1190 struct in6_addr masked = ipv6_addr_bitand(
1191 &update->current_addresses.ipv6_addrs[0].addr,
1192 &update->op->od->ipam_info.ipv6_prefix);
1193 if (!IN6_ARE_ADDR_EQUAL(&masked, &update->op->od->ipam_info.ipv6_prefix)) {
1194 return DYNAMIC;
1195 }
1196
1197 return NONE;
1198 }
1199
1200 /* Check previously assigned dynamic addresses for validity. This will
1201 * check if the assigned addresses need to change.
1202 *
1203 * Returns true if any changes to dynamic addresses are required
1204 */
1205 static bool
1206 dynamic_addresses_check_for_updates(const char *lsp_addrs,
1207 struct dynamic_address_update *update)
1208 {
1209 update->mac = dynamic_mac_changed(lsp_addrs, update);
1210 update->ipv4 = dynamic_ip4_changed(update);
1211 update->ipv6 = dynamic_ip6_changed(update);
1212 if (update->mac == NONE &&
1213 update->ipv4 == NONE &&
1214 update->ipv6 == NONE) {
1215 return false;
1216 } else {
1217 return true;
1218 }
1219 }
1220
1221 /* For addresses that do not need to be updated, go ahead and insert them
1222 * into IPAM. This way, their addresses will be claimed and cannot be assigned
1223 * elsewhere later.
1224 */
1225 static void
1226 update_unchanged_dynamic_addresses(struct dynamic_address_update *update)
1227 {
1228 if (update->mac == NONE) {
1229 ipam_insert_mac(&update->current_addresses.ea, false);
1230 }
1231 if (update->ipv4 == NONE && update->current_addresses.n_ipv4_addrs) {
1232 ipam_insert_ip(update->op->od,
1233 ntohl(update->current_addresses.ipv4_addrs[0].addr));
1234 }
1235 }
1236
1237 static void
1238 set_lsp_dynamic_addresses(const char *dynamic_addresses, struct ovn_port *op)
1239 {
1240 extract_lsp_addresses(dynamic_addresses, &op->lsp_addrs[op->n_lsp_addrs]);
1241 op->n_lsp_addrs++;
1242 }
1243
1244 /* Determines which components (MAC, IPv4, and IPv6) of dynamic
1245 * addresses need to be assigned. This is used exclusively for
1246 * ports that do not have dynamic addresses already assigned.
1247 */
1248 static void
1249 set_dynamic_updates(const char *addrspec,
1250 struct dynamic_address_update *update)
1251 {
1252 struct eth_addr mac;
1253 int n = 0;
1254 if (ovs_scan(addrspec, ETH_ADDR_SCAN_FMT" dynamic%n",
1255 ETH_ADDR_SCAN_ARGS(mac), &n)
1256 && addrspec[n] == '\0') {
1257 update->mac = STATIC;
1258 update->static_mac = mac;
1259 } else {
1260 update->mac = DYNAMIC;
1261 }
1262 if (update->op->od->ipam_info.allocated_ipv4s) {
1263 update->ipv4 = DYNAMIC;
1264 } else {
1265 update->ipv4 = NONE;
1266 }
1267 if (update->op->od->ipam_info.ipv6_prefix_set) {
1268 update->ipv6 = DYNAMIC;
1269 } else {
1270 update->ipv6 = NONE;
1271 }
1272 }
1273
1274 static void
1275 update_dynamic_addresses(struct ovn_datapath *od,
1276 struct dynamic_address_update *update)
1277 {
1278 struct eth_addr mac;
1279 switch (update->mac) {
1280 case NONE:
1281 mac = update->current_addresses.ea;
1282 break;
1283 case REMOVE:
1284 OVS_NOT_REACHED();
1285 case STATIC:
1286 mac = update->static_mac;
1287 break;
1288 case DYNAMIC:
1289 eth_addr_from_uint64(ipam_get_unused_mac(), &mac);
1290 break;
1291 }
1292
1293 ovs_be32 ip4 = 0;
1294 switch (update->ipv4) {
1295 case NONE:
1296 if (update->current_addresses.n_ipv4_addrs) {
1297 ip4 = update->current_addresses.ipv4_addrs[0].addr;
1298 }
1299 break;
1300 case REMOVE:
1301 break;
1302 case STATIC:
1303 OVS_NOT_REACHED();
1304 case DYNAMIC:
1305 ip4 = htonl(ipam_get_unused_ip(od));
1306 }
1307
1308 struct in6_addr ip6 = in6addr_any;
1309 switch (update->ipv6) {
1310 case NONE:
1311 if (update->current_addresses.n_ipv6_addrs) {
1312 ip6 = update->current_addresses.ipv6_addrs[0].addr;
1313 }
1314 break;
1315 case REMOVE:
1316 break;
1317 case STATIC:
1318 OVS_NOT_REACHED();
1319 case DYNAMIC:
1320 in6_generate_eui64(mac, &od->ipam_info.ipv6_prefix, &ip6);
1321 break;
1322 }
1323
1324 struct ds new_addr = DS_EMPTY_INITIALIZER;
1325 ds_put_format(&new_addr, ETH_ADDR_FMT, ETH_ADDR_ARGS(mac));
1326 if (ip4) {
1327 ipam_insert_ip(od, ntohl(ip4));
1328 ds_put_format(&new_addr, " "IP_FMT, IP_ARGS(ip4));
1329 }
1330 if (!IN6_ARE_ADDR_EQUAL(&ip6, &in6addr_any)) {
1331 char ip6_s[INET6_ADDRSTRLEN + 1];
1332 ipv6_string_mapped(ip6_s, &ip6);
1333 ds_put_format(&new_addr, " %s", ip6_s);
1334 }
1335 nbrec_logical_switch_port_set_dynamic_addresses(update->op->nbsp,
1336 ds_cstr(&new_addr));
1337 set_lsp_dynamic_addresses(ds_cstr(&new_addr), update->op);
1338 ds_destroy(&new_addr);
1339 }
1340
1341 static void
1342 build_ipam(struct hmap *datapaths, struct hmap *ports)
1343 {
1344 /* IPAM generally stands for IP address management. In non-virtualized
1345 * world, MAC addresses come with the hardware. But, with virtualized
1346 * workloads, they need to be assigned and managed. This function
1347 * does both IP address management (ipam) and MAC address management
1348 * (macam). */
1349
1350 /* If the switch's other_config:subnet is set, allocate new addresses for
1351 * ports that have the "dynamic" keyword in their addresses column. */
1352 struct ovn_datapath *od;
1353 HMAP_FOR_EACH (od, key_node, datapaths) {
1354 if (!od->nbs) {
1355 continue;
1356 }
1357
1358 struct ovs_list updates;
1359 ovs_list_init(&updates);
1360 for (size_t i = 0; i < od->nbs->n_ports; i++) {
1361 const struct nbrec_logical_switch_port *nbsp = od->nbs->ports[i];
1362
1363 if (!od->ipam_info.allocated_ipv4s &&
1364 !od->ipam_info.ipv6_prefix_set) {
1365 if (nbsp->dynamic_addresses) {
1366 nbrec_logical_switch_port_set_dynamic_addresses(nbsp,
1367 NULL);
1368 }
1369 continue;
1370 }
1371
1372 struct ovn_port *op = ovn_port_find(ports, nbsp->name);
1373 if (!op || op->nbsp != nbsp || op->peer) {
1374 /* Do not allocate addresses for logical switch ports that
1375 * have a peer. */
1376 continue;
1377 }
1378
1379 int num_dynamic_addresses = 0;
1380 for (size_t j = 0; j < nbsp->n_addresses; j++) {
1381 if (!is_dynamic_lsp_address(nbsp->addresses[j])) {
1382 continue;
1383 }
1384 if (num_dynamic_addresses) {
1385 static struct vlog_rate_limit rl
1386 = VLOG_RATE_LIMIT_INIT(1, 1);
1387 VLOG_WARN_RL(&rl, "More than one dynamic address "
1388 "configured for logical switch port '%s'",
1389 nbsp->name);
1390 continue;
1391 }
1392 num_dynamic_addresses++;
1393 struct dynamic_address_update *update
1394 = xzalloc(sizeof *update);
1395 update->op = op;
1396 if (nbsp->dynamic_addresses) {
1397 bool any_changed;
1398 extract_lsp_addresses(nbsp->dynamic_addresses,
1399 &update->current_addresses);
1400 any_changed = dynamic_addresses_check_for_updates(
1401 nbsp->addresses[j], update);
1402 update_unchanged_dynamic_addresses(update);
1403 if (any_changed) {
1404 ovs_list_push_back(&updates, &update->node);
1405 } else {
1406 /* No changes to dynamic addresses */
1407 set_lsp_dynamic_addresses(nbsp->dynamic_addresses, op);
1408 destroy_lport_addresses(&update->current_addresses);
1409 free(update);
1410 }
1411 } else {
1412 set_dynamic_updates(nbsp->addresses[j], update);
1413 ovs_list_push_back(&updates, &update->node);
1414 }
1415 }
1416
1417 if (!nbsp->n_addresses && nbsp->dynamic_addresses) {
1418 nbrec_logical_switch_port_set_dynamic_addresses(nbsp, NULL);
1419 }
1420 }
1421
1422 /* After retaining all unchanged dynamic addresses, now assign
1423 * new ones.
1424 */
1425 struct dynamic_address_update *update;
1426 LIST_FOR_EACH_POP (update, node, &updates) {
1427 update_dynamic_addresses(od, update);
1428 destroy_lport_addresses(&update->current_addresses);
1429 free(update);
1430 }
1431 }
1432 }
1433 \f
1434 /* Tag allocation for nested containers.
1435 *
1436 * For a logical switch port with 'parent_name' and a request to allocate tags,
1437 * keeps a track of all allocated tags. */
1438 struct tag_alloc_node {
1439 struct hmap_node hmap_node;
1440 char *parent_name;
1441 unsigned long *allocated_tags; /* A bitmap to track allocated tags. */
1442 };
1443
1444 static void
1445 tag_alloc_destroy(struct hmap *tag_alloc_table)
1446 {
1447 struct tag_alloc_node *node;
1448 HMAP_FOR_EACH_POP (node, hmap_node, tag_alloc_table) {
1449 bitmap_free(node->allocated_tags);
1450 free(node->parent_name);
1451 free(node);
1452 }
1453 hmap_destroy(tag_alloc_table);
1454 }
1455
1456 static struct tag_alloc_node *
1457 tag_alloc_get_node(struct hmap *tag_alloc_table, const char *parent_name)
1458 {
1459 /* If a node for the 'parent_name' exists, return it. */
1460 struct tag_alloc_node *tag_alloc_node;
1461 HMAP_FOR_EACH_WITH_HASH (tag_alloc_node, hmap_node,
1462 hash_string(parent_name, 0),
1463 tag_alloc_table) {
1464 if (!strcmp(tag_alloc_node->parent_name, parent_name)) {
1465 return tag_alloc_node;
1466 }
1467 }
1468
1469 /* Create a new node. */
1470 tag_alloc_node = xmalloc(sizeof *tag_alloc_node);
1471 tag_alloc_node->parent_name = xstrdup(parent_name);
1472 tag_alloc_node->allocated_tags = bitmap_allocate(MAX_OVN_TAGS);
1473 /* Tag 0 is invalid for nested containers. */
1474 bitmap_set1(tag_alloc_node->allocated_tags, 0);
1475 hmap_insert(tag_alloc_table, &tag_alloc_node->hmap_node,
1476 hash_string(parent_name, 0));
1477
1478 return tag_alloc_node;
1479 }
1480
1481 static void
1482 tag_alloc_add_existing_tags(struct hmap *tag_alloc_table,
1483 const struct nbrec_logical_switch_port *nbsp)
1484 {
1485 /* Add the tags of already existing nested containers. If there is no
1486 * 'nbsp->parent_name' or no 'nbsp->tag' set, there is nothing to do. */
1487 if (!nbsp->parent_name || !nbsp->parent_name[0] || !nbsp->tag) {
1488 return;
1489 }
1490
1491 struct tag_alloc_node *tag_alloc_node;
1492 tag_alloc_node = tag_alloc_get_node(tag_alloc_table, nbsp->parent_name);
1493 bitmap_set1(tag_alloc_node->allocated_tags, *nbsp->tag);
1494 }
1495
1496 static void
1497 tag_alloc_create_new_tag(struct hmap *tag_alloc_table,
1498 const struct nbrec_logical_switch_port *nbsp)
1499 {
1500 if (!nbsp->tag_request) {
1501 return;
1502 }
1503
1504 if (nbsp->parent_name && nbsp->parent_name[0]
1505 && *nbsp->tag_request == 0) {
1506 /* For nested containers that need allocation, do the allocation. */
1507
1508 if (nbsp->tag) {
1509 /* This has already been allocated. */
1510 return;
1511 }
1512
1513 struct tag_alloc_node *tag_alloc_node;
1514 int64_t tag;
1515 tag_alloc_node = tag_alloc_get_node(tag_alloc_table,
1516 nbsp->parent_name);
1517 tag = bitmap_scan(tag_alloc_node->allocated_tags, 0, 1, MAX_OVN_TAGS);
1518 if (tag == MAX_OVN_TAGS) {
1519 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1520 VLOG_ERR_RL(&rl, "out of vlans for logical switch ports with "
1521 "parent %s", nbsp->parent_name);
1522 return;
1523 }
1524 bitmap_set1(tag_alloc_node->allocated_tags, tag);
1525 nbrec_logical_switch_port_set_tag(nbsp, &tag, 1);
1526 } else if (*nbsp->tag_request != 0) {
1527 /* For everything else, copy the contents of 'tag_request' to 'tag'. */
1528 nbrec_logical_switch_port_set_tag(nbsp, nbsp->tag_request, 1);
1529 }
1530 }
1531 \f
1532
1533 static void
1534 join_logical_ports(struct northd_context *ctx,
1535 struct hmap *datapaths, struct hmap *ports,
1536 struct hmap *chassis_qdisc_queues,
1537 struct hmap *tag_alloc_table, struct ovs_list *sb_only,
1538 struct ovs_list *nb_only, struct ovs_list *both)
1539 {
1540 hmap_init(ports);
1541 ovs_list_init(sb_only);
1542 ovs_list_init(nb_only);
1543 ovs_list_init(both);
1544
1545 const struct sbrec_port_binding *sb;
1546 SBREC_PORT_BINDING_FOR_EACH (sb, ctx->ovnsb_idl) {
1547 struct ovn_port *op = ovn_port_create(ports, sb->logical_port,
1548 NULL, NULL, sb);
1549 ovs_list_push_back(sb_only, &op->list);
1550 }
1551
1552 struct ovn_datapath *od;
1553 HMAP_FOR_EACH (od, key_node, datapaths) {
1554 if (od->nbs) {
1555 for (size_t i = 0; i < od->nbs->n_ports; i++) {
1556 const struct nbrec_logical_switch_port *nbsp
1557 = od->nbs->ports[i];
1558 struct ovn_port *op = ovn_port_find(ports, nbsp->name);
1559 if (op) {
1560 if (op->nbsp || op->nbrp) {
1561 static struct vlog_rate_limit rl
1562 = VLOG_RATE_LIMIT_INIT(5, 1);
1563 VLOG_WARN_RL(&rl, "duplicate logical port %s",
1564 nbsp->name);
1565 continue;
1566 }
1567 op->nbsp = nbsp;
1568 ovs_list_remove(&op->list);
1569
1570 uint32_t queue_id = smap_get_int(&op->sb->options,
1571 "qdisc_queue_id", 0);
1572 if (queue_id && op->sb->chassis) {
1573 add_chassis_queue(
1574 chassis_qdisc_queues, &op->sb->chassis->header_.uuid,
1575 queue_id);
1576 }
1577
1578 ovs_list_push_back(both, &op->list);
1579
1580 /* This port exists due to a SB binding, but should
1581 * not have been initialized fully. */
1582 ovs_assert(!op->n_lsp_addrs && !op->n_ps_addrs);
1583 } else {
1584 op = ovn_port_create(ports, nbsp->name, nbsp, NULL, NULL);
1585 ovs_list_push_back(nb_only, &op->list);
1586 }
1587
1588 if (!strcmp(nbsp->type, "localnet")) {
1589 od->localnet_port = op;
1590 }
1591
1592 op->lsp_addrs
1593 = xmalloc(sizeof *op->lsp_addrs * nbsp->n_addresses);
1594 for (size_t j = 0; j < nbsp->n_addresses; j++) {
1595 if (!strcmp(nbsp->addresses[j], "unknown")
1596 || !strcmp(nbsp->addresses[j], "router")) {
1597 continue;
1598 }
1599 if (is_dynamic_lsp_address(nbsp->addresses[j])) {
1600 continue;
1601 } else if (!extract_lsp_addresses(nbsp->addresses[j],
1602 &op->lsp_addrs[op->n_lsp_addrs])) {
1603 static struct vlog_rate_limit rl
1604 = VLOG_RATE_LIMIT_INIT(1, 1);
1605 VLOG_INFO_RL(&rl, "invalid syntax '%s' in logical "
1606 "switch port addresses. No MAC "
1607 "address found",
1608 op->nbsp->addresses[j]);
1609 continue;
1610 }
1611 op->n_lsp_addrs++;
1612 }
1613
1614 op->ps_addrs
1615 = xmalloc(sizeof *op->ps_addrs * nbsp->n_port_security);
1616 for (size_t j = 0; j < nbsp->n_port_security; j++) {
1617 if (!extract_lsp_addresses(nbsp->port_security[j],
1618 &op->ps_addrs[op->n_ps_addrs])) {
1619 static struct vlog_rate_limit rl
1620 = VLOG_RATE_LIMIT_INIT(1, 1);
1621 VLOG_INFO_RL(&rl, "invalid syntax '%s' in port "
1622 "security. No MAC address found",
1623 op->nbsp->port_security[j]);
1624 continue;
1625 }
1626 op->n_ps_addrs++;
1627 }
1628
1629 op->od = od;
1630 ipam_add_port_addresses(od, op);
1631 tag_alloc_add_existing_tags(tag_alloc_table, nbsp);
1632 }
1633 } else {
1634 for (size_t i = 0; i < od->nbr->n_ports; i++) {
1635 const struct nbrec_logical_router_port *nbrp
1636 = od->nbr->ports[i];
1637
1638 struct lport_addresses lrp_networks;
1639 if (!extract_lrp_networks(nbrp, &lrp_networks)) {
1640 static struct vlog_rate_limit rl
1641 = VLOG_RATE_LIMIT_INIT(5, 1);
1642 VLOG_WARN_RL(&rl, "bad 'mac' %s", nbrp->mac);
1643 continue;
1644 }
1645
1646 if (!lrp_networks.n_ipv4_addrs && !lrp_networks.n_ipv6_addrs) {
1647 continue;
1648 }
1649
1650 struct ovn_port *op = ovn_port_find(ports, nbrp->name);
1651 if (op) {
1652 if (op->nbsp || op->nbrp) {
1653 static struct vlog_rate_limit rl
1654 = VLOG_RATE_LIMIT_INIT(5, 1);
1655 VLOG_WARN_RL(&rl, "duplicate logical router port %s",
1656 nbrp->name);
1657 continue;
1658 }
1659 op->nbrp = nbrp;
1660 ovs_list_remove(&op->list);
1661 ovs_list_push_back(both, &op->list);
1662
1663 /* This port exists but should not have been
1664 * initialized fully. */
1665 ovs_assert(!op->lrp_networks.n_ipv4_addrs
1666 && !op->lrp_networks.n_ipv6_addrs);
1667 } else {
1668 op = ovn_port_create(ports, nbrp->name, NULL, nbrp, NULL);
1669 ovs_list_push_back(nb_only, &op->list);
1670 }
1671
1672 op->lrp_networks = lrp_networks;
1673 op->od = od;
1674 ipam_add_port_addresses(op->od, op);
1675
1676 const char *redirect_chassis = smap_get(&op->nbrp->options,
1677 "redirect-chassis");
1678 if (redirect_chassis || op->nbrp->n_gateway_chassis) {
1679 /* Additional "derived" ovn_port crp represents the
1680 * instance of op on the "redirect-chassis". */
1681 const char *gw_chassis = smap_get(&op->od->nbr->options,
1682 "chassis");
1683 if (gw_chassis) {
1684 static struct vlog_rate_limit rl
1685 = VLOG_RATE_LIMIT_INIT(1, 1);
1686 VLOG_WARN_RL(&rl, "Bad configuration: "
1687 "redirect-chassis configured on port %s "
1688 "on L3 gateway router", nbrp->name);
1689 continue;
1690 }
1691 if (od->l3dgw_port || od->l3redirect_port) {
1692 static struct vlog_rate_limit rl
1693 = VLOG_RATE_LIMIT_INIT(1, 1);
1694 VLOG_WARN_RL(&rl, "Bad configuration: multiple ports "
1695 "with redirect-chassis on same logical "
1696 "router %s", od->nbr->name);
1697 continue;
1698 }
1699
1700 char *redirect_name = chassis_redirect_name(nbrp->name);
1701 struct ovn_port *crp = ovn_port_find(ports, redirect_name);
1702 if (crp) {
1703 crp->derived = true;
1704 crp->nbrp = nbrp;
1705 ovs_list_remove(&crp->list);
1706 ovs_list_push_back(both, &crp->list);
1707 } else {
1708 crp = ovn_port_create(ports, redirect_name,
1709 NULL, nbrp, NULL);
1710 crp->derived = true;
1711 ovs_list_push_back(nb_only, &crp->list);
1712 }
1713 crp->od = od;
1714 free(redirect_name);
1715
1716 /* Set l3dgw_port and l3redirect_port in od, for later
1717 * use during flow creation. */
1718 od->l3dgw_port = op;
1719 od->l3redirect_port = crp;
1720 }
1721 }
1722 }
1723 }
1724
1725 /* Connect logical router ports, and logical switch ports of type "router",
1726 * to their peers. */
1727 struct ovn_port *op;
1728 HMAP_FOR_EACH (op, key_node, ports) {
1729 if (op->nbsp && !strcmp(op->nbsp->type, "router") && !op->derived) {
1730 const char *peer_name = smap_get(&op->nbsp->options, "router-port");
1731 if (!peer_name) {
1732 continue;
1733 }
1734
1735 struct ovn_port *peer = ovn_port_find(ports, peer_name);
1736 if (!peer || !peer->nbrp) {
1737 continue;
1738 }
1739
1740 peer->peer = op;
1741 op->peer = peer;
1742 op->od->router_ports = xrealloc(
1743 op->od->router_ports,
1744 sizeof *op->od->router_ports * (op->od->n_router_ports + 1));
1745 op->od->router_ports[op->od->n_router_ports++] = op;
1746
1747 /* Fill op->lsp_addrs for op->nbsp->addresses[] with
1748 * contents "router", which was skipped in the loop above. */
1749 for (size_t j = 0; j < op->nbsp->n_addresses; j++) {
1750 if (!strcmp(op->nbsp->addresses[j], "router")) {
1751 if (extract_lrp_networks(peer->nbrp,
1752 &op->lsp_addrs[op->n_lsp_addrs])) {
1753 op->n_lsp_addrs++;
1754 }
1755 break;
1756 }
1757 }
1758 } else if (op->nbrp && op->nbrp->peer && !op->derived) {
1759 struct ovn_port *peer = ovn_port_find(ports, op->nbrp->peer);
1760 if (peer) {
1761 if (peer->nbrp) {
1762 op->peer = peer;
1763 } else if (peer->nbsp) {
1764 /* An ovn_port for a switch port of type "router" does have
1765 * a router port as its peer (see the case above for
1766 * "router" ports), but this is set via options:router-port
1767 * in Logical_Switch_Port and does not involve the
1768 * Logical_Router_Port's 'peer' column. */
1769 static struct vlog_rate_limit rl =
1770 VLOG_RATE_LIMIT_INIT(5, 1);
1771 VLOG_WARN_RL(&rl, "Bad configuration: The peer of router "
1772 "port %s is a switch port", op->key);
1773 }
1774 }
1775 }
1776 }
1777 }
1778
1779 static void
1780 ip_address_and_port_from_lb_key(const char *key, char **ip_address,
1781 uint16_t *port, int *addr_family);
1782
1783 static void
1784 get_router_load_balancer_ips(const struct ovn_datapath *od,
1785 struct sset *all_ips, int *addr_family)
1786 {
1787 if (!od->nbr) {
1788 return;
1789 }
1790
1791 for (int i = 0; i < od->nbr->n_load_balancer; i++) {
1792 struct nbrec_load_balancer *lb = od->nbr->load_balancer[i];
1793 struct smap *vips = &lb->vips;
1794 struct smap_node *node;
1795
1796 SMAP_FOR_EACH (node, vips) {
1797 /* node->key contains IP:port or just IP. */
1798 char *ip_address = NULL;
1799 uint16_t port;
1800
1801 ip_address_and_port_from_lb_key(node->key, &ip_address, &port,
1802 addr_family);
1803 if (!ip_address) {
1804 continue;
1805 }
1806
1807 if (!sset_contains(all_ips, ip_address)) {
1808 sset_add(all_ips, ip_address);
1809 }
1810
1811 free(ip_address);
1812 }
1813 }
1814 }
1815
1816 /* Returns an array of strings, each consisting of a MAC address followed
1817 * by one or more IP addresses, and if the port is a distributed gateway
1818 * port, followed by 'is_chassis_resident("LPORT_NAME")', where the
1819 * LPORT_NAME is the name of the L3 redirect port or the name of the
1820 * logical_port specified in a NAT rule. These strings include the
1821 * external IP addresses of all NAT rules defined on that router, and all
1822 * of the IP addresses used in load balancer VIPs defined on that router.
1823 *
1824 * The caller must free each of the n returned strings with free(),
1825 * and must free the returned array when it is no longer needed. */
1826 static char **
1827 get_nat_addresses(const struct ovn_port *op, size_t *n)
1828 {
1829 size_t n_nats = 0;
1830 struct eth_addr mac;
1831 if (!op->nbrp || !op->od || !op->od->nbr
1832 || (!op->od->nbr->n_nat && !op->od->nbr->n_load_balancer)
1833 || !eth_addr_from_string(op->nbrp->mac, &mac)) {
1834 *n = n_nats;
1835 return NULL;
1836 }
1837
1838 struct ds c_addresses = DS_EMPTY_INITIALIZER;
1839 ds_put_format(&c_addresses, ETH_ADDR_FMT, ETH_ADDR_ARGS(mac));
1840 bool central_ip_address = false;
1841
1842 char **addresses;
1843 addresses = xmalloc(sizeof *addresses * (op->od->nbr->n_nat + 1));
1844
1845 /* Get NAT IP addresses. */
1846 for (size_t i = 0; i < op->od->nbr->n_nat; i++) {
1847 const struct nbrec_nat *nat = op->od->nbr->nat[i];
1848 ovs_be32 ip, mask;
1849
1850 char *error = ip_parse_masked(nat->external_ip, &ip, &mask);
1851 if (error || mask != OVS_BE32_MAX) {
1852 free(error);
1853 continue;
1854 }
1855
1856 /* Determine whether this NAT rule satisfies the conditions for
1857 * distributed NAT processing. */
1858 if (op->od->l3redirect_port && !strcmp(nat->type, "dnat_and_snat")
1859 && nat->logical_port && nat->external_mac) {
1860 /* Distributed NAT rule. */
1861 if (eth_addr_from_string(nat->external_mac, &mac)) {
1862 struct ds address = DS_EMPTY_INITIALIZER;
1863 ds_put_format(&address, ETH_ADDR_FMT, ETH_ADDR_ARGS(mac));
1864 ds_put_format(&address, " %s", nat->external_ip);
1865 ds_put_format(&address, " is_chassis_resident(\"%s\")",
1866 nat->logical_port);
1867 addresses[n_nats++] = ds_steal_cstr(&address);
1868 }
1869 } else {
1870 /* Centralized NAT rule, either on gateway router or distributed
1871 * router. */
1872 ds_put_format(&c_addresses, " %s", nat->external_ip);
1873 central_ip_address = true;
1874 }
1875 }
1876
1877 /* A set to hold all load-balancer vips. */
1878 struct sset all_ips = SSET_INITIALIZER(&all_ips);
1879 int addr_family;
1880 get_router_load_balancer_ips(op->od, &all_ips, &addr_family);
1881
1882 const char *ip_address;
1883 SSET_FOR_EACH (ip_address, &all_ips) {
1884 ds_put_format(&c_addresses, " %s", ip_address);
1885 central_ip_address = true;
1886 }
1887 sset_destroy(&all_ips);
1888
1889 if (central_ip_address) {
1890 /* Gratuitous ARP for centralized NAT rules on distributed gateway
1891 * ports should be restricted to the "redirect-chassis". */
1892 if (op->od->l3redirect_port) {
1893 ds_put_format(&c_addresses, " is_chassis_resident(%s)",
1894 op->od->l3redirect_port->json_key);
1895 }
1896
1897 addresses[n_nats++] = ds_steal_cstr(&c_addresses);
1898 }
1899
1900 *n = n_nats;
1901
1902 return addresses;
1903 }
1904
1905 static bool
1906 gateway_chassis_equal(const struct nbrec_gateway_chassis *nb_gwc,
1907 const struct sbrec_chassis *nb_gwc_c,
1908 const struct sbrec_gateway_chassis *sb_gwc)
1909 {
1910 bool equal = !strcmp(nb_gwc->name, sb_gwc->name)
1911 && nb_gwc->priority == sb_gwc->priority
1912 && smap_equal(&nb_gwc->options, &sb_gwc->options)
1913 && smap_equal(&nb_gwc->external_ids, &sb_gwc->external_ids);
1914
1915 if (!equal) {
1916 return false;
1917 }
1918
1919 /* If everything else matched and we were unable to find the SBDB
1920 * Chassis entry at this time, assume a match and return true.
1921 * This happens when an ovn-controller is restarting and the Chassis
1922 * entry is gone away momentarily */
1923 return !nb_gwc_c
1924 || (sb_gwc->chassis && !strcmp(nb_gwc_c->name,
1925 sb_gwc->chassis->name));
1926 }
1927
1928 static bool
1929 sbpb_gw_chassis_needs_update(
1930 struct ovsdb_idl_index *sbrec_chassis_by_name,
1931 const struct sbrec_port_binding *port_binding,
1932 const struct nbrec_logical_router_port *lrp)
1933 {
1934 if (!lrp || !port_binding) {
1935 return false;
1936 }
1937
1938 /* These arrays are used to collect valid Gateway_Chassis and valid
1939 * Chassis records from the Logical_Router_Port Gateway_Chassis list,
1940 * we ignore the ones we can't match on the SBDB */
1941 struct nbrec_gateway_chassis **lrp_gwc = xzalloc(lrp->n_gateway_chassis *
1942 sizeof *lrp_gwc);
1943 const struct sbrec_chassis **lrp_gwc_c = xzalloc(lrp->n_gateway_chassis *
1944 sizeof *lrp_gwc_c);
1945
1946 /* Count the number of gateway chassis chassis names from the logical
1947 * router port that we are able to match on the southbound database */
1948 int lrp_n_gateway_chassis = 0;
1949 int n;
1950 for (n = 0; n < lrp->n_gateway_chassis; n++) {
1951
1952 if (!lrp->gateway_chassis[n]->chassis_name) {
1953 continue;
1954 }
1955
1956 const struct sbrec_chassis *chassis =
1957 chassis_lookup_by_name(sbrec_chassis_by_name,
1958 lrp->gateway_chassis[n]->chassis_name);
1959
1960 lrp_gwc_c[lrp_n_gateway_chassis] = chassis;
1961 lrp_gwc[lrp_n_gateway_chassis] = lrp->gateway_chassis[n];
1962 lrp_n_gateway_chassis++;
1963 if (!chassis) {
1964 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1965 VLOG_WARN_RL(
1966 &rl, "Chassis name %s referenced in NBDB via Gateway_Chassis "
1967 "on logical router port %s does not exist in SBDB",
1968 lrp->gateway_chassis[n]->chassis_name, lrp->name);
1969 }
1970 }
1971
1972 /* Basic check, different amount of Gateway_Chassis means that we
1973 * need to update southbound database Port_Binding */
1974 if (lrp_n_gateway_chassis != port_binding->n_gateway_chassis) {
1975 free(lrp_gwc_c);
1976 free(lrp_gwc);
1977 return true;
1978 }
1979
1980 for (n = 0; n < lrp_n_gateway_chassis; n++) {
1981 int i;
1982 /* For each of the valid gw chassis on the lrp, check if there's
1983 * a match on the Port_Binding list, we assume order is not
1984 * persisted */
1985 for (i = 0; i < port_binding->n_gateway_chassis; i++) {
1986 if (gateway_chassis_equal(lrp_gwc[n],
1987 lrp_gwc_c[n],
1988 port_binding->gateway_chassis[i])) {
1989 break; /* we found a match */
1990 }
1991 }
1992
1993 /* if no Port_Binding gateway chassis matched for the entry... */
1994 if (i == port_binding->n_gateway_chassis) {
1995 free(lrp_gwc_c);
1996 free(lrp_gwc);
1997 return true; /* found no match for this gateway chassis on lrp */
1998 }
1999 }
2000
2001 /* no need for update, all ports matched */
2002 free(lrp_gwc_c);
2003 free(lrp_gwc);
2004 return false;
2005 }
2006
2007 /* This functions translates the gw chassis on the nb database
2008 * to sb database entries, the only difference is that SB database
2009 * Gateway_Chassis table references the chassis directly instead
2010 * of using the name */
2011 static void
2012 copy_gw_chassis_from_nbrp_to_sbpb(
2013 struct northd_context *ctx,
2014 struct ovsdb_idl_index *sbrec_chassis_by_name,
2015 const struct nbrec_logical_router_port *lrp,
2016 const struct sbrec_port_binding *port_binding) {
2017
2018 if (!lrp || !port_binding || !lrp->n_gateway_chassis) {
2019 return;
2020 }
2021
2022 struct sbrec_gateway_chassis **gw_chassis = NULL;
2023 int n_gwc = 0;
2024 int n;
2025
2026 /* XXX: This can be improved. This code will generate a set of new
2027 * Gateway_Chassis and push them all in a single transaction, instead
2028 * this would be more optimal if we just add/update/remove the rows in
2029 * the southbound db that need to change. We don't expect lots of
2030 * changes to the Gateway_Chassis table, but if that proves to be wrong
2031 * we should optimize this. */
2032 for (n = 0; n < lrp->n_gateway_chassis; n++) {
2033 struct nbrec_gateway_chassis *lrp_gwc = lrp->gateway_chassis[n];
2034 if (!lrp_gwc->chassis_name) {
2035 continue;
2036 }
2037
2038 const struct sbrec_chassis *chassis =
2039 chassis_lookup_by_name(sbrec_chassis_by_name,
2040 lrp_gwc->chassis_name);
2041
2042 gw_chassis = xrealloc(gw_chassis, (n_gwc + 1) * sizeof *gw_chassis);
2043
2044 struct sbrec_gateway_chassis *pb_gwc =
2045 sbrec_gateway_chassis_insert(ctx->ovnsb_txn);
2046
2047 sbrec_gateway_chassis_set_name(pb_gwc, lrp_gwc->name);
2048 sbrec_gateway_chassis_set_priority(pb_gwc, lrp_gwc->priority);
2049 sbrec_gateway_chassis_set_chassis(pb_gwc, chassis);
2050 sbrec_gateway_chassis_set_options(pb_gwc, &lrp_gwc->options);
2051 sbrec_gateway_chassis_set_external_ids(pb_gwc, &lrp_gwc->external_ids);
2052
2053 gw_chassis[n_gwc++] = pb_gwc;
2054 }
2055 sbrec_port_binding_set_gateway_chassis(port_binding, gw_chassis, n_gwc);
2056 free(gw_chassis);
2057 }
2058
2059 static void
2060 ovn_port_update_sbrec(struct northd_context *ctx,
2061 struct ovsdb_idl_index *sbrec_chassis_by_name,
2062 const struct ovn_port *op,
2063 struct hmap *chassis_qdisc_queues)
2064 {
2065 sbrec_port_binding_set_datapath(op->sb, op->od->sb);
2066 if (op->nbrp) {
2067 /* If the router is for l3 gateway, it resides on a chassis
2068 * and its port type is "l3gateway". */
2069 const char *chassis_name = smap_get(&op->od->nbr->options, "chassis");
2070 if (op->derived) {
2071 sbrec_port_binding_set_type(op->sb, "chassisredirect");
2072 } else if (chassis_name) {
2073 sbrec_port_binding_set_type(op->sb, "l3gateway");
2074 } else {
2075 sbrec_port_binding_set_type(op->sb, "patch");
2076 }
2077
2078 struct smap new;
2079 smap_init(&new);
2080 if (op->derived) {
2081 const char *redirect_chassis = smap_get(&op->nbrp->options,
2082 "redirect-chassis");
2083 if (op->nbrp->n_gateway_chassis && redirect_chassis) {
2084 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
2085 VLOG_WARN_RL(
2086 &rl, "logical router port %s has both options:"
2087 "redirect-chassis and gateway_chassis populated "
2088 "redirect-chassis will be ignored in favour of "
2089 "gateway chassis", op->nbrp->name);
2090 }
2091
2092 if (op->nbrp->n_gateway_chassis) {
2093 if (sbpb_gw_chassis_needs_update(sbrec_chassis_by_name,
2094 op->sb, op->nbrp)) {
2095 copy_gw_chassis_from_nbrp_to_sbpb(ctx,
2096 sbrec_chassis_by_name,
2097 op->nbrp, op->sb);
2098 }
2099
2100 } else if (redirect_chassis) {
2101 /* Handle ports that had redirect-chassis option attached
2102 * to them, and for backwards compatibility convert them
2103 * to a single Gateway_Chassis entry */
2104 const struct sbrec_chassis *chassis =
2105 chassis_lookup_by_name(sbrec_chassis_by_name,
2106 redirect_chassis);
2107 if (chassis) {
2108 /* If we found the chassis, and the gw chassis on record
2109 * differs from what we expect go ahead and update */
2110 if (op->sb->n_gateway_chassis != 1
2111 || !op->sb->gateway_chassis[0]->chassis
2112 || strcmp(op->sb->gateway_chassis[0]->chassis->name,
2113 chassis->name)
2114 || op->sb->gateway_chassis[0]->priority != 0) {
2115 /* Construct a single Gateway_Chassis entry on the
2116 * Port_Binding attached to the redirect_chassis
2117 * name */
2118 struct sbrec_gateway_chassis *gw_chassis =
2119 sbrec_gateway_chassis_insert(ctx->ovnsb_txn);
2120
2121 char *gwc_name = xasprintf("%s_%s", op->nbrp->name,
2122 chassis->name);
2123
2124 /* XXX: Again, here, we could just update an existing
2125 * Gateway_Chassis, instead of creating a new one
2126 * and replacing it */
2127 sbrec_gateway_chassis_set_name(gw_chassis, gwc_name);
2128 sbrec_gateway_chassis_set_priority(gw_chassis, 0);
2129 sbrec_gateway_chassis_set_chassis(gw_chassis, chassis);
2130 sbrec_gateway_chassis_set_external_ids(gw_chassis,
2131 &op->nbrp->external_ids);
2132 sbrec_port_binding_set_gateway_chassis(op->sb,
2133 &gw_chassis, 1);
2134 free(gwc_name);
2135 }
2136 } else {
2137 VLOG_WARN("chassis name '%s' from redirect from logical "
2138 " router port '%s' redirect-chassis not found",
2139 redirect_chassis, op->nbrp->name);
2140 if (op->sb->n_gateway_chassis) {
2141 sbrec_port_binding_set_gateway_chassis(op->sb, NULL,
2142 0);
2143 }
2144 }
2145 }
2146 smap_add(&new, "distributed-port", op->nbrp->name);
2147 } else {
2148 if (op->peer) {
2149 smap_add(&new, "peer", op->peer->key);
2150 }
2151 if (chassis_name) {
2152 smap_add(&new, "l3gateway-chassis", chassis_name);
2153 }
2154 }
2155 sbrec_port_binding_set_options(op->sb, &new);
2156 smap_destroy(&new);
2157
2158 sbrec_port_binding_set_parent_port(op->sb, NULL);
2159 sbrec_port_binding_set_tag(op->sb, NULL, 0);
2160
2161 struct ds s = DS_EMPTY_INITIALIZER;
2162 ds_put_cstr(&s, op->nbrp->mac);
2163 for (int i = 0; i < op->nbrp->n_networks; ++i) {
2164 ds_put_format(&s, " %s", op->nbrp->networks[i]);
2165 }
2166 const char *addresses = ds_cstr(&s);
2167 sbrec_port_binding_set_mac(op->sb, &addresses, 1);
2168 ds_destroy(&s);
2169
2170 struct smap ids = SMAP_INITIALIZER(&ids);
2171 sbrec_port_binding_set_external_ids(op->sb, &ids);
2172 } else {
2173 if (strcmp(op->nbsp->type, "router")) {
2174 uint32_t queue_id = smap_get_int(
2175 &op->sb->options, "qdisc_queue_id", 0);
2176 bool has_qos = port_has_qos_params(&op->nbsp->options);
2177 struct smap options;
2178
2179 if (op->sb->chassis && has_qos && !queue_id) {
2180 queue_id = allocate_chassis_queueid(chassis_qdisc_queues,
2181 op->sb->chassis);
2182 } else if (!has_qos && queue_id) {
2183 free_chassis_queueid(chassis_qdisc_queues,
2184 op->sb->chassis,
2185 queue_id);
2186 queue_id = 0;
2187 }
2188
2189 smap_clone(&options, &op->nbsp->options);
2190 if (queue_id) {
2191 smap_add_format(&options,
2192 "qdisc_queue_id", "%d", queue_id);
2193 }
2194 sbrec_port_binding_set_options(op->sb, &options);
2195 smap_destroy(&options);
2196 if (ovn_is_known_nb_lsp_type(op->nbsp->type)) {
2197 sbrec_port_binding_set_type(op->sb, op->nbsp->type);
2198 } else {
2199 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
2200 VLOG_WARN_RL(
2201 &rl, "Unknown port type '%s' set on logical switch '%s'.",
2202 op->nbsp->type, op->nbsp->name);
2203 }
2204 } else {
2205 const char *chassis = NULL;
2206 if (op->peer && op->peer->od && op->peer->od->nbr) {
2207 chassis = smap_get(&op->peer->od->nbr->options, "chassis");
2208 }
2209
2210 /* A switch port connected to a gateway router is also of
2211 * type "l3gateway". */
2212 if (chassis) {
2213 sbrec_port_binding_set_type(op->sb, "l3gateway");
2214 } else {
2215 sbrec_port_binding_set_type(op->sb, "patch");
2216 }
2217
2218 const char *router_port = smap_get(&op->nbsp->options,
2219 "router-port");
2220 if (router_port || chassis) {
2221 struct smap new;
2222 smap_init(&new);
2223 if (router_port) {
2224 smap_add(&new, "peer", router_port);
2225 }
2226 if (chassis) {
2227 smap_add(&new, "l3gateway-chassis", chassis);
2228 }
2229 sbrec_port_binding_set_options(op->sb, &new);
2230 smap_destroy(&new);
2231 }
2232
2233 const char *nat_addresses = smap_get(&op->nbsp->options,
2234 "nat-addresses");
2235 if (nat_addresses && !strcmp(nat_addresses, "router")) {
2236 if (op->peer && op->peer->od
2237 && (chassis || op->peer->od->l3redirect_port)) {
2238 size_t n_nats;
2239 char **nats = get_nat_addresses(op->peer, &n_nats);
2240 if (n_nats) {
2241 sbrec_port_binding_set_nat_addresses(op->sb,
2242 (const char **) nats, n_nats);
2243 for (size_t i = 0; i < n_nats; i++) {
2244 free(nats[i]);
2245 }
2246 free(nats);
2247 } else {
2248 sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0);
2249 }
2250 } else {
2251 sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0);
2252 }
2253 /* Only accept manual specification of ethernet address
2254 * followed by IPv4 addresses on type "l3gateway" ports. */
2255 } else if (nat_addresses && chassis) {
2256 struct lport_addresses laddrs;
2257 if (!extract_lsp_addresses(nat_addresses, &laddrs)) {
2258 static struct vlog_rate_limit rl =
2259 VLOG_RATE_LIMIT_INIT(1, 1);
2260 VLOG_WARN_RL(&rl, "Error extracting nat-addresses.");
2261 sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0);
2262 } else {
2263 sbrec_port_binding_set_nat_addresses(op->sb,
2264 &nat_addresses, 1);
2265 destroy_lport_addresses(&laddrs);
2266 }
2267 } else {
2268 sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0);
2269 }
2270 }
2271 sbrec_port_binding_set_parent_port(op->sb, op->nbsp->parent_name);
2272 sbrec_port_binding_set_tag(op->sb, op->nbsp->tag, op->nbsp->n_tag);
2273 sbrec_port_binding_set_mac(op->sb, (const char **) op->nbsp->addresses,
2274 op->nbsp->n_addresses);
2275
2276 struct smap ids = SMAP_INITIALIZER(&ids);
2277 smap_clone(&ids, &op->nbsp->external_ids);
2278 const char *name = smap_get(&ids, "neutron:port_name");
2279 if (name && name[0]) {
2280 smap_add(&ids, "name", name);
2281 }
2282 sbrec_port_binding_set_external_ids(op->sb, &ids);
2283 smap_destroy(&ids);
2284 }
2285 }
2286
2287 /* Remove mac_binding entries that refer to logical_ports which are
2288 * deleted. */
2289 static void
2290 cleanup_mac_bindings(struct northd_context *ctx, struct hmap *ports)
2291 {
2292 const struct sbrec_mac_binding *b, *n;
2293 SBREC_MAC_BINDING_FOR_EACH_SAFE (b, n, ctx->ovnsb_idl) {
2294 if (!ovn_port_find(ports, b->logical_port)) {
2295 sbrec_mac_binding_delete(b);
2296 }
2297 }
2298 }
2299
2300 /* Updates the southbound Port_Binding table so that it contains the logical
2301 * switch ports specified by the northbound database.
2302 *
2303 * Initializes 'ports' to contain a "struct ovn_port" for every logical port,
2304 * using the "struct ovn_datapath"s in 'datapaths' to look up logical
2305 * datapaths. */
2306 static void
2307 build_ports(struct northd_context *ctx,
2308 struct ovsdb_idl_index *sbrec_chassis_by_name,
2309 struct hmap *datapaths, struct hmap *ports)
2310 {
2311 struct ovs_list sb_only, nb_only, both;
2312 struct hmap tag_alloc_table = HMAP_INITIALIZER(&tag_alloc_table);
2313 struct hmap chassis_qdisc_queues = HMAP_INITIALIZER(&chassis_qdisc_queues);
2314
2315 join_logical_ports(ctx, datapaths, ports, &chassis_qdisc_queues,
2316 &tag_alloc_table, &sb_only, &nb_only, &both);
2317
2318 struct ovn_port *op, *next;
2319 /* For logical ports that are in both databases, update the southbound
2320 * record based on northbound data. Also index the in-use tunnel_keys.
2321 * For logical ports that are in NB database, do any tag allocation
2322 * needed. */
2323 LIST_FOR_EACH_SAFE (op, next, list, &both) {
2324 if (op->nbsp) {
2325 tag_alloc_create_new_tag(&tag_alloc_table, op->nbsp);
2326 }
2327 ovn_port_update_sbrec(ctx, sbrec_chassis_by_name,
2328 op, &chassis_qdisc_queues);
2329
2330 add_tnlid(&op->od->port_tnlids, op->sb->tunnel_key);
2331 if (op->sb->tunnel_key > op->od->port_key_hint) {
2332 op->od->port_key_hint = op->sb->tunnel_key;
2333 }
2334 }
2335
2336 /* Add southbound record for each unmatched northbound record. */
2337 LIST_FOR_EACH_SAFE (op, next, list, &nb_only) {
2338 uint16_t tunnel_key = ovn_port_allocate_key(op->od);
2339 if (!tunnel_key) {
2340 continue;
2341 }
2342
2343 op->sb = sbrec_port_binding_insert(ctx->ovnsb_txn);
2344 ovn_port_update_sbrec(ctx, sbrec_chassis_by_name, op,
2345 &chassis_qdisc_queues);
2346
2347 sbrec_port_binding_set_logical_port(op->sb, op->key);
2348 sbrec_port_binding_set_tunnel_key(op->sb, tunnel_key);
2349 }
2350
2351 bool remove_mac_bindings = false;
2352 if (!ovs_list_is_empty(&sb_only)) {
2353 remove_mac_bindings = true;
2354 }
2355
2356 /* Delete southbound records without northbound matches. */
2357 LIST_FOR_EACH_SAFE(op, next, list, &sb_only) {
2358 ovs_list_remove(&op->list);
2359 sbrec_port_binding_delete(op->sb);
2360 ovn_port_destroy(ports, op);
2361 }
2362 if (remove_mac_bindings) {
2363 cleanup_mac_bindings(ctx, ports);
2364 }
2365
2366 tag_alloc_destroy(&tag_alloc_table);
2367 destroy_chassis_queues(&chassis_qdisc_queues);
2368 }
2369 \f
2370 #define OVN_MIN_MULTICAST 32768
2371 #define OVN_MAX_MULTICAST 65535
2372
2373 struct multicast_group {
2374 const char *name;
2375 uint16_t key; /* OVN_MIN_MULTICAST...OVN_MAX_MULTICAST. */
2376 };
2377
2378 #define MC_FLOOD "_MC_flood"
2379 static const struct multicast_group mc_flood = { MC_FLOOD, 65535 };
2380
2381 #define MC_UNKNOWN "_MC_unknown"
2382 static const struct multicast_group mc_unknown = { MC_UNKNOWN, 65534 };
2383
2384 static bool
2385 multicast_group_equal(const struct multicast_group *a,
2386 const struct multicast_group *b)
2387 {
2388 return !strcmp(a->name, b->name) && a->key == b->key;
2389 }
2390
2391 /* Multicast group entry. */
2392 struct ovn_multicast {
2393 struct hmap_node hmap_node; /* Index on 'datapath' and 'key'. */
2394 struct ovn_datapath *datapath;
2395 const struct multicast_group *group;
2396
2397 struct ovn_port **ports;
2398 size_t n_ports, allocated_ports;
2399 };
2400
2401 static uint32_t
2402 ovn_multicast_hash(const struct ovn_datapath *datapath,
2403 const struct multicast_group *group)
2404 {
2405 return hash_pointer(datapath, group->key);
2406 }
2407
2408 static struct ovn_multicast *
2409 ovn_multicast_find(struct hmap *mcgroups, struct ovn_datapath *datapath,
2410 const struct multicast_group *group)
2411 {
2412 struct ovn_multicast *mc;
2413
2414 HMAP_FOR_EACH_WITH_HASH (mc, hmap_node,
2415 ovn_multicast_hash(datapath, group), mcgroups) {
2416 if (mc->datapath == datapath
2417 && multicast_group_equal(mc->group, group)) {
2418 return mc;
2419 }
2420 }
2421 return NULL;
2422 }
2423
2424 static void
2425 ovn_multicast_add(struct hmap *mcgroups, const struct multicast_group *group,
2426 struct ovn_port *port)
2427 {
2428 struct ovn_datapath *od = port->od;
2429 struct ovn_multicast *mc = ovn_multicast_find(mcgroups, od, group);
2430 if (!mc) {
2431 mc = xmalloc(sizeof *mc);
2432 hmap_insert(mcgroups, &mc->hmap_node, ovn_multicast_hash(od, group));
2433 mc->datapath = od;
2434 mc->group = group;
2435 mc->n_ports = 0;
2436 mc->allocated_ports = 4;
2437 mc->ports = xmalloc(mc->allocated_ports * sizeof *mc->ports);
2438 }
2439 if (mc->n_ports >= mc->allocated_ports) {
2440 mc->ports = x2nrealloc(mc->ports, &mc->allocated_ports,
2441 sizeof *mc->ports);
2442 }
2443 mc->ports[mc->n_ports++] = port;
2444 }
2445
2446 static void
2447 ovn_multicast_destroy(struct hmap *mcgroups, struct ovn_multicast *mc)
2448 {
2449 if (mc) {
2450 hmap_remove(mcgroups, &mc->hmap_node);
2451 free(mc->ports);
2452 free(mc);
2453 }
2454 }
2455
2456 static void
2457 ovn_multicast_update_sbrec(const struct ovn_multicast *mc,
2458 const struct sbrec_multicast_group *sb)
2459 {
2460 struct sbrec_port_binding **ports = xmalloc(mc->n_ports * sizeof *ports);
2461 for (size_t i = 0; i < mc->n_ports; i++) {
2462 ports[i] = CONST_CAST(struct sbrec_port_binding *, mc->ports[i]->sb);
2463 }
2464 sbrec_multicast_group_set_ports(sb, ports, mc->n_ports);
2465 free(ports);
2466 }
2467 \f
2468 /* Logical flow generation.
2469 *
2470 * This code generates the Logical_Flow table in the southbound database, as a
2471 * function of most of the northbound database.
2472 */
2473
2474 struct ovn_lflow {
2475 struct hmap_node hmap_node;
2476
2477 struct ovn_datapath *od;
2478 enum ovn_stage stage;
2479 uint16_t priority;
2480 char *match;
2481 char *actions;
2482 char *stage_hint;
2483 const char *where;
2484 };
2485
2486 static size_t
2487 ovn_lflow_hash(const struct ovn_lflow *lflow)
2488 {
2489 return ovn_logical_flow_hash(&lflow->od->sb->header_.uuid,
2490 ovn_stage_get_table(lflow->stage),
2491 ovn_stage_get_pipeline_name(lflow->stage),
2492 lflow->priority, lflow->match,
2493 lflow->actions);
2494 }
2495
2496 static bool
2497 ovn_lflow_equal(const struct ovn_lflow *a, const struct ovn_lflow *b)
2498 {
2499 return (a->od == b->od
2500 && a->stage == b->stage
2501 && a->priority == b->priority
2502 && !strcmp(a->match, b->match)
2503 && !strcmp(a->actions, b->actions));
2504 }
2505
2506 static void
2507 ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
2508 enum ovn_stage stage, uint16_t priority,
2509 char *match, char *actions, char *stage_hint,
2510 const char *where)
2511 {
2512 lflow->od = od;
2513 lflow->stage = stage;
2514 lflow->priority = priority;
2515 lflow->match = match;
2516 lflow->actions = actions;
2517 lflow->stage_hint = stage_hint;
2518 lflow->where = where;
2519 }
2520
2521 /* Adds a row with the specified contents to the Logical_Flow table. */
2522 static void
2523 ovn_lflow_add_at(struct hmap *lflow_map, struct ovn_datapath *od,
2524 enum ovn_stage stage, uint16_t priority,
2525 const char *match, const char *actions,
2526 const char *stage_hint, const char *where)
2527 {
2528 ovs_assert(ovn_stage_to_datapath_type(stage) == ovn_datapath_get_type(od));
2529
2530 struct ovn_lflow *lflow = xmalloc(sizeof *lflow);
2531 ovn_lflow_init(lflow, od, stage, priority,
2532 xstrdup(match), xstrdup(actions),
2533 nullable_xstrdup(stage_hint), where);
2534 hmap_insert(lflow_map, &lflow->hmap_node, ovn_lflow_hash(lflow));
2535 }
2536
2537 /* Adds a row with the specified contents to the Logical_Flow table. */
2538 #define ovn_lflow_add_with_hint(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, \
2539 ACTIONS, STAGE_HINT) \
2540 ovn_lflow_add_at(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, ACTIONS, \
2541 STAGE_HINT, OVS_SOURCE_LOCATOR)
2542
2543 #define ovn_lflow_add(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, ACTIONS) \
2544 ovn_lflow_add_with_hint(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, \
2545 ACTIONS, NULL)
2546
2547 static struct ovn_lflow *
2548 ovn_lflow_find(struct hmap *lflows, struct ovn_datapath *od,
2549 enum ovn_stage stage, uint16_t priority,
2550 const char *match, const char *actions, uint32_t hash)
2551 {
2552 struct ovn_lflow target;
2553 ovn_lflow_init(&target, od, stage, priority,
2554 CONST_CAST(char *, match), CONST_CAST(char *, actions),
2555 NULL, NULL);
2556
2557 struct ovn_lflow *lflow;
2558 HMAP_FOR_EACH_WITH_HASH (lflow, hmap_node, hash, lflows) {
2559 if (ovn_lflow_equal(lflow, &target)) {
2560 return lflow;
2561 }
2562 }
2563 return NULL;
2564 }
2565
2566 static void
2567 ovn_lflow_destroy(struct hmap *lflows, struct ovn_lflow *lflow)
2568 {
2569 if (lflow) {
2570 hmap_remove(lflows, &lflow->hmap_node);
2571 free(lflow->match);
2572 free(lflow->actions);
2573 free(lflow->stage_hint);
2574 free(lflow);
2575 }
2576 }
2577
2578 /* Appends port security constraints on L2 address field 'eth_addr_field'
2579 * (e.g. "eth.src" or "eth.dst") to 'match'. 'ps_addrs', with 'n_ps_addrs'
2580 * elements, is the collection of port_security constraints from an
2581 * OVN_NB Logical_Switch_Port row generated by extract_lsp_addresses(). */
2582 static void
2583 build_port_security_l2(const char *eth_addr_field,
2584 struct lport_addresses *ps_addrs,
2585 unsigned int n_ps_addrs,
2586 struct ds *match)
2587 {
2588 if (!n_ps_addrs) {
2589 return;
2590 }
2591
2592 ds_put_format(match, " && %s == {", eth_addr_field);
2593
2594 for (size_t i = 0; i < n_ps_addrs; i++) {
2595 ds_put_format(match, "%s ", ps_addrs[i].ea_s);
2596 }
2597 ds_chomp(match, ' ');
2598 ds_put_cstr(match, "}");
2599 }
2600
2601 static void
2602 build_port_security_ipv6_nd_flow(
2603 struct ds *match, struct eth_addr ea, struct ipv6_netaddr *ipv6_addrs,
2604 int n_ipv6_addrs)
2605 {
2606 ds_put_format(match, " && ip6 && nd && ((nd.sll == "ETH_ADDR_FMT" || "
2607 "nd.sll == "ETH_ADDR_FMT") || ((nd.tll == "ETH_ADDR_FMT" || "
2608 "nd.tll == "ETH_ADDR_FMT")", ETH_ADDR_ARGS(eth_addr_zero),
2609 ETH_ADDR_ARGS(ea), ETH_ADDR_ARGS(eth_addr_zero),
2610 ETH_ADDR_ARGS(ea));
2611 if (!n_ipv6_addrs) {
2612 ds_put_cstr(match, "))");
2613 return;
2614 }
2615
2616 char ip6_str[INET6_ADDRSTRLEN + 1];
2617 struct in6_addr lla;
2618 in6_generate_lla(ea, &lla);
2619 memset(ip6_str, 0, sizeof(ip6_str));
2620 ipv6_string_mapped(ip6_str, &lla);
2621 ds_put_format(match, " && (nd.target == %s", ip6_str);
2622
2623 for(int i = 0; i < n_ipv6_addrs; i++) {
2624 memset(ip6_str, 0, sizeof(ip6_str));
2625 ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr);
2626 ds_put_format(match, " || nd.target == %s", ip6_str);
2627 }
2628
2629 ds_put_format(match, ")))");
2630 }
2631
2632 static void
2633 build_port_security_ipv6_flow(
2634 enum ovn_pipeline pipeline, struct ds *match, struct eth_addr ea,
2635 struct ipv6_netaddr *ipv6_addrs, int n_ipv6_addrs)
2636 {
2637 char ip6_str[INET6_ADDRSTRLEN + 1];
2638
2639 ds_put_format(match, " && %s == {",
2640 pipeline == P_IN ? "ip6.src" : "ip6.dst");
2641
2642 /* Allow link-local address. */
2643 struct in6_addr lla;
2644 in6_generate_lla(ea, &lla);
2645 ipv6_string_mapped(ip6_str, &lla);
2646 ds_put_format(match, "%s, ", ip6_str);
2647
2648 /* Allow ip6.dst=ff00::/8 for multicast packets */
2649 if (pipeline == P_OUT) {
2650 ds_put_cstr(match, "ff00::/8, ");
2651 }
2652 for(int i = 0; i < n_ipv6_addrs; i++) {
2653 ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr);
2654 ds_put_format(match, "%s, ", ip6_str);
2655 }
2656 /* Replace ", " by "}". */
2657 ds_chomp(match, ' ');
2658 ds_chomp(match, ',');
2659 ds_put_cstr(match, "}");
2660 }
2661
2662 /**
2663 * Build port security constraints on ARP and IPv6 ND fields
2664 * and add logical flows to S_SWITCH_IN_PORT_SEC_ND stage.
2665 *
2666 * For each port security of the logical port, following
2667 * logical flows are added
2668 * - If the port security has no IP (both IPv4 and IPv6) or
2669 * if it has IPv4 address(es)
2670 * - Priority 90 flow to allow ARP packets for known MAC addresses
2671 * in the eth.src and arp.spa fields. If the port security
2672 * has IPv4 addresses, allow known IPv4 addresses in the arp.tpa field.
2673 *
2674 * - If the port security has no IP (both IPv4 and IPv6) or
2675 * if it has IPv6 address(es)
2676 * - Priority 90 flow to allow IPv6 ND packets for known MAC addresses
2677 * in the eth.src and nd.sll/nd.tll fields. If the port security
2678 * has IPv6 addresses, allow known IPv6 addresses in the nd.target field
2679 * for IPv6 Neighbor Advertisement packet.
2680 *
2681 * - Priority 80 flow to drop ARP and IPv6 ND packets.
2682 */
2683 static void
2684 build_port_security_nd(struct ovn_port *op, struct hmap *lflows)
2685 {
2686 struct ds match = DS_EMPTY_INITIALIZER;
2687
2688 for (size_t i = 0; i < op->n_ps_addrs; i++) {
2689 struct lport_addresses *ps = &op->ps_addrs[i];
2690
2691 bool no_ip = !(ps->n_ipv4_addrs || ps->n_ipv6_addrs);
2692
2693 ds_clear(&match);
2694 if (ps->n_ipv4_addrs || no_ip) {
2695 ds_put_format(&match,
2696 "inport == %s && eth.src == %s && arp.sha == %s",
2697 op->json_key, ps->ea_s, ps->ea_s);
2698
2699 if (ps->n_ipv4_addrs) {
2700 ds_put_cstr(&match, " && arp.spa == {");
2701 for (size_t j = 0; j < ps->n_ipv4_addrs; j++) {
2702 /* When the netmask is applied, if the host portion is
2703 * non-zero, the host can only use the specified
2704 * address in the arp.spa. If zero, the host is allowed
2705 * to use any address in the subnet. */
2706 if (ps->ipv4_addrs[j].plen == 32
2707 || ps->ipv4_addrs[j].addr & ~ps->ipv4_addrs[j].mask) {
2708 ds_put_cstr(&match, ps->ipv4_addrs[j].addr_s);
2709 } else {
2710 ds_put_format(&match, "%s/%d",
2711 ps->ipv4_addrs[j].network_s,
2712 ps->ipv4_addrs[j].plen);
2713 }
2714 ds_put_cstr(&match, ", ");
2715 }
2716 ds_chomp(&match, ' ');
2717 ds_chomp(&match, ',');
2718 ds_put_cstr(&match, "}");
2719 }
2720 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90,
2721 ds_cstr(&match), "next;");
2722 }
2723
2724 if (ps->n_ipv6_addrs || no_ip) {
2725 ds_clear(&match);
2726 ds_put_format(&match, "inport == %s && eth.src == %s",
2727 op->json_key, ps->ea_s);
2728 build_port_security_ipv6_nd_flow(&match, ps->ea, ps->ipv6_addrs,
2729 ps->n_ipv6_addrs);
2730 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90,
2731 ds_cstr(&match), "next;");
2732 }
2733 }
2734
2735 ds_clear(&match);
2736 ds_put_format(&match, "inport == %s && (arp || nd)", op->json_key);
2737 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 80,
2738 ds_cstr(&match), "drop;");
2739 ds_destroy(&match);
2740 }
2741
2742 /**
2743 * Build port security constraints on IPv4 and IPv6 src and dst fields
2744 * and add logical flows to S_SWITCH_(IN/OUT)_PORT_SEC_IP stage.
2745 *
2746 * For each port security of the logical port, following
2747 * logical flows are added
2748 * - If the port security has IPv4 addresses,
2749 * - Priority 90 flow to allow IPv4 packets for known IPv4 addresses
2750 *
2751 * - If the port security has IPv6 addresses,
2752 * - Priority 90 flow to allow IPv6 packets for known IPv6 addresses
2753 *
2754 * - If the port security has IPv4 addresses or IPv6 addresses or both
2755 * - Priority 80 flow to drop all IPv4 and IPv6 traffic
2756 */
2757 static void
2758 build_port_security_ip(enum ovn_pipeline pipeline, struct ovn_port *op,
2759 struct hmap *lflows)
2760 {
2761 char *port_direction;
2762 enum ovn_stage stage;
2763 if (pipeline == P_IN) {
2764 port_direction = "inport";
2765 stage = S_SWITCH_IN_PORT_SEC_IP;
2766 } else {
2767 port_direction = "outport";
2768 stage = S_SWITCH_OUT_PORT_SEC_IP;
2769 }
2770
2771 for (size_t i = 0; i < op->n_ps_addrs; i++) {
2772 struct lport_addresses *ps = &op->ps_addrs[i];
2773
2774 if (!(ps->n_ipv4_addrs || ps->n_ipv6_addrs)) {
2775 continue;
2776 }
2777
2778 if (ps->n_ipv4_addrs) {
2779 struct ds match = DS_EMPTY_INITIALIZER;
2780 if (pipeline == P_IN) {
2781 /* Permit use of the unspecified address for DHCP discovery */
2782 struct ds dhcp_match = DS_EMPTY_INITIALIZER;
2783 ds_put_format(&dhcp_match, "inport == %s"
2784 " && eth.src == %s"
2785 " && ip4.src == 0.0.0.0"
2786 " && ip4.dst == 255.255.255.255"
2787 " && udp.src == 68 && udp.dst == 67",
2788 op->json_key, ps->ea_s);
2789 ovn_lflow_add(lflows, op->od, stage, 90,
2790 ds_cstr(&dhcp_match), "next;");
2791 ds_destroy(&dhcp_match);
2792 ds_put_format(&match, "inport == %s && eth.src == %s"
2793 " && ip4.src == {", op->json_key,
2794 ps->ea_s);
2795 } else {
2796 ds_put_format(&match, "outport == %s && eth.dst == %s"
2797 " && ip4.dst == {255.255.255.255, 224.0.0.0/4, ",
2798 op->json_key, ps->ea_s);
2799 }
2800
2801 for (int j = 0; j < ps->n_ipv4_addrs; j++) {
2802 ovs_be32 mask = ps->ipv4_addrs[j].mask;
2803 /* When the netmask is applied, if the host portion is
2804 * non-zero, the host can only use the specified
2805 * address. If zero, the host is allowed to use any
2806 * address in the subnet.
2807 */
2808 if (ps->ipv4_addrs[j].plen == 32
2809 || ps->ipv4_addrs[j].addr & ~mask) {
2810 ds_put_format(&match, "%s", ps->ipv4_addrs[j].addr_s);
2811 if (pipeline == P_OUT && ps->ipv4_addrs[j].plen != 32) {
2812 /* Host is also allowed to receive packets to the
2813 * broadcast address in the specified subnet. */
2814 ds_put_format(&match, ", %s",
2815 ps->ipv4_addrs[j].bcast_s);
2816 }
2817 } else {
2818 /* host portion is zero */
2819 ds_put_format(&match, "%s/%d", ps->ipv4_addrs[j].network_s,
2820 ps->ipv4_addrs[j].plen);
2821 }
2822 ds_put_cstr(&match, ", ");
2823 }
2824
2825 /* Replace ", " by "}". */
2826 ds_chomp(&match, ' ');
2827 ds_chomp(&match, ',');
2828 ds_put_cstr(&match, "}");
2829 ovn_lflow_add(lflows, op->od, stage, 90, ds_cstr(&match), "next;");
2830 ds_destroy(&match);
2831 }
2832
2833 if (ps->n_ipv6_addrs) {
2834 struct ds match = DS_EMPTY_INITIALIZER;
2835 if (pipeline == P_IN) {
2836 /* Permit use of unspecified address for duplicate address
2837 * detection */
2838 struct ds dad_match = DS_EMPTY_INITIALIZER;
2839 ds_put_format(&dad_match, "inport == %s"
2840 " && eth.src == %s"
2841 " && ip6.src == ::"
2842 " && ip6.dst == ff02::/16"
2843 " && icmp6.type == {131, 135, 143}", op->json_key,
2844 ps->ea_s);
2845 ovn_lflow_add(lflows, op->od, stage, 90,
2846 ds_cstr(&dad_match), "next;");
2847 ds_destroy(&dad_match);
2848 }
2849 ds_put_format(&match, "%s == %s && %s == %s",
2850 port_direction, op->json_key,
2851 pipeline == P_IN ? "eth.src" : "eth.dst", ps->ea_s);
2852 build_port_security_ipv6_flow(pipeline, &match, ps->ea,
2853 ps->ipv6_addrs, ps->n_ipv6_addrs);
2854 ovn_lflow_add(lflows, op->od, stage, 90,
2855 ds_cstr(&match), "next;");
2856 ds_destroy(&match);
2857 }
2858
2859 char *match = xasprintf("%s == %s && %s == %s && ip",
2860 port_direction, op->json_key,
2861 pipeline == P_IN ? "eth.src" : "eth.dst",
2862 ps->ea_s);
2863 ovn_lflow_add(lflows, op->od, stage, 80, match, "drop;");
2864 free(match);
2865 }
2866
2867 }
2868
2869 static bool
2870 lsp_is_enabled(const struct nbrec_logical_switch_port *lsp)
2871 {
2872 return !lsp->enabled || *lsp->enabled;
2873 }
2874
2875 static bool
2876 lsp_is_up(const struct nbrec_logical_switch_port *lsp)
2877 {
2878 return !lsp->up || *lsp->up;
2879 }
2880
2881 static bool
2882 build_dhcpv4_action(struct ovn_port *op, ovs_be32 offer_ip,
2883 struct ds *options_action, struct ds *response_action,
2884 struct ds *ipv4_addr_match)
2885 {
2886 if (!op->nbsp->dhcpv4_options) {
2887 /* CMS has disabled native DHCPv4 for this lport. */
2888 return false;
2889 }
2890
2891 ovs_be32 host_ip, mask;
2892 char *error = ip_parse_masked(op->nbsp->dhcpv4_options->cidr, &host_ip,
2893 &mask);
2894 if (error || ((offer_ip ^ host_ip) & mask)) {
2895 /* Either
2896 * - cidr defined is invalid or
2897 * - the offer ip of the logical port doesn't belong to the cidr
2898 * defined in the DHCPv4 options.
2899 * */
2900 free(error);
2901 return false;
2902 }
2903
2904 const char *server_ip = smap_get(
2905 &op->nbsp->dhcpv4_options->options, "server_id");
2906 const char *server_mac = smap_get(
2907 &op->nbsp->dhcpv4_options->options, "server_mac");
2908 const char *lease_time = smap_get(
2909 &op->nbsp->dhcpv4_options->options, "lease_time");
2910
2911 if (!(server_ip && server_mac && lease_time)) {
2912 /* "server_id", "server_mac" and "lease_time" should be
2913 * present in the dhcp_options. */
2914 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
2915 VLOG_WARN_RL(&rl, "Required DHCPv4 options not defined for lport - %s",
2916 op->json_key);
2917 return false;
2918 }
2919
2920 struct smap dhcpv4_options = SMAP_INITIALIZER(&dhcpv4_options);
2921 smap_clone(&dhcpv4_options, &op->nbsp->dhcpv4_options->options);
2922
2923 /* server_mac is not DHCPv4 option, delete it from the smap. */
2924 smap_remove(&dhcpv4_options, "server_mac");
2925 char *netmask = xasprintf(IP_FMT, IP_ARGS(mask));
2926 smap_add(&dhcpv4_options, "netmask", netmask);
2927 free(netmask);
2928
2929 ds_put_format(options_action,
2930 REGBIT_DHCP_OPTS_RESULT" = put_dhcp_opts(offerip = "
2931 IP_FMT", ", IP_ARGS(offer_ip));
2932
2933 /* We're not using SMAP_FOR_EACH because we want a consistent order of the
2934 * options on different architectures (big or little endian, SSE4.2) */
2935 const struct smap_node **sorted_opts = smap_sort(&dhcpv4_options);
2936 for (size_t i = 0; i < smap_count(&dhcpv4_options); i++) {
2937 const struct smap_node *node = sorted_opts[i];
2938 ds_put_format(options_action, "%s = %s, ", node->key, node->value);
2939 }
2940 free(sorted_opts);
2941
2942 ds_chomp(options_action, ' ');
2943 ds_chomp(options_action, ',');
2944 ds_put_cstr(options_action, "); next;");
2945
2946 ds_put_format(response_action, "eth.dst = eth.src; eth.src = %s; "
2947 "ip4.dst = "IP_FMT"; ip4.src = %s; udp.src = 67; "
2948 "udp.dst = 68; outport = inport; flags.loopback = 1; "
2949 "output;",
2950 server_mac, IP_ARGS(offer_ip), server_ip);
2951
2952 ds_put_format(ipv4_addr_match,
2953 "ip4.src == "IP_FMT" && ip4.dst == {%s, 255.255.255.255}",
2954 IP_ARGS(offer_ip), server_ip);
2955 smap_destroy(&dhcpv4_options);
2956 return true;
2957 }
2958
2959 static bool
2960 build_dhcpv6_action(struct ovn_port *op, struct in6_addr *offer_ip,
2961 struct ds *options_action, struct ds *response_action)
2962 {
2963 if (!op->nbsp->dhcpv6_options) {
2964 /* CMS has disabled native DHCPv6 for this lport. */
2965 return false;
2966 }
2967
2968 struct in6_addr host_ip, mask;
2969
2970 char *error = ipv6_parse_masked(op->nbsp->dhcpv6_options->cidr, &host_ip,
2971 &mask);
2972 if (error) {
2973 free(error);
2974 return false;
2975 }
2976 struct in6_addr ip6_mask = ipv6_addr_bitxor(offer_ip, &host_ip);
2977 ip6_mask = ipv6_addr_bitand(&ip6_mask, &mask);
2978 if (!ipv6_mask_is_any(&ip6_mask)) {
2979 /* offer_ip doesn't belongs to the cidr defined in lport's DHCPv6
2980 * options.*/
2981 return false;
2982 }
2983
2984 const struct smap *options_map = &op->nbsp->dhcpv6_options->options;
2985 /* "server_id" should be the MAC address. */
2986 const char *server_mac = smap_get(options_map, "server_id");
2987 struct eth_addr ea;
2988 if (!server_mac || !eth_addr_from_string(server_mac, &ea)) {
2989 /* "server_id" should be present in the dhcpv6_options. */
2990 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2991 VLOG_WARN_RL(&rl, "server_id not present in the DHCPv6 options"
2992 " for lport %s", op->json_key);
2993 return false;
2994 }
2995
2996 /* Get the link local IP of the DHCPv6 server from the server MAC. */
2997 struct in6_addr lla;
2998 in6_generate_lla(ea, &lla);
2999
3000 char server_ip[INET6_ADDRSTRLEN + 1];
3001 ipv6_string_mapped(server_ip, &lla);
3002
3003 char ia_addr[INET6_ADDRSTRLEN + 1];
3004 ipv6_string_mapped(ia_addr, offer_ip);
3005
3006 ds_put_format(options_action,
3007 REGBIT_DHCP_OPTS_RESULT" = put_dhcpv6_opts(");
3008
3009 /* Check whether the dhcpv6 options should be configured as stateful.
3010 * Only reply with ia_addr option for dhcpv6 stateful address mode. */
3011 if (!smap_get_bool(options_map, "dhcpv6_stateless", false)) {
3012 ipv6_string_mapped(ia_addr, offer_ip);
3013 ds_put_format(options_action, "ia_addr = %s, ", ia_addr);
3014 }
3015
3016 /* We're not using SMAP_FOR_EACH because we want a consistent order of the
3017 * options on different architectures (big or little endian, SSE4.2) */
3018 const struct smap_node **sorted_opts = smap_sort(options_map);
3019 for (size_t i = 0; i < smap_count(options_map); i++) {
3020 const struct smap_node *node = sorted_opts[i];
3021 if (strcmp(node->key, "dhcpv6_stateless")) {
3022 ds_put_format(options_action, "%s = %s, ", node->key, node->value);
3023 }
3024 }
3025 free(sorted_opts);
3026
3027 ds_chomp(options_action, ' ');
3028 ds_chomp(options_action, ',');
3029 ds_put_cstr(options_action, "); next;");
3030
3031 ds_put_format(response_action, "eth.dst = eth.src; eth.src = %s; "
3032 "ip6.dst = ip6.src; ip6.src = %s; udp.src = 547; "
3033 "udp.dst = 546; outport = inport; flags.loopback = 1; "
3034 "output;",
3035 server_mac, server_ip);
3036
3037 return true;
3038 }
3039
3040 struct ovn_port_group_ls {
3041 struct hmap_node key_node; /* Index on 'key'. */
3042 struct uuid key; /* nb_ls->header_.uuid. */
3043 const struct nbrec_logical_switch *nb_ls;
3044 };
3045
3046 struct ovn_port_group {
3047 struct hmap_node key_node; /* Index on 'key'. */
3048 struct uuid key; /* nb_pg->header_.uuid. */
3049 const struct nbrec_port_group *nb_pg;
3050 struct hmap nb_lswitches; /* NB lswitches related to the port group */
3051 };
3052
3053 static void
3054 ovn_port_group_ls_add(struct ovn_port_group *pg,
3055 const struct nbrec_logical_switch *nb_ls)
3056 {
3057 struct ovn_port_group_ls *pg_ls = xzalloc(sizeof *pg_ls);
3058 pg_ls->key = nb_ls->header_.uuid;
3059 pg_ls->nb_ls = nb_ls;
3060 hmap_insert(&pg->nb_lswitches, &pg_ls->key_node, uuid_hash(&pg_ls->key));
3061 }
3062
3063 static struct ovn_port_group_ls *
3064 ovn_port_group_ls_find(struct ovn_port_group *pg, const struct uuid *ls_uuid)
3065 {
3066 struct ovn_port_group_ls *pg_ls;
3067
3068 HMAP_FOR_EACH_WITH_HASH (pg_ls, key_node, uuid_hash(ls_uuid),
3069 &pg->nb_lswitches) {
3070 if (uuid_equals(ls_uuid, &pg_ls->key)) {
3071 return pg_ls;
3072 }
3073 }
3074 return NULL;
3075 }
3076
3077 struct ovn_ls_port_group {
3078 struct hmap_node key_node; /* Index on 'key'. */
3079 struct uuid key; /* nb_pg->header_.uuid. */
3080 const struct nbrec_port_group *nb_pg;
3081 };
3082
3083 static void
3084 ovn_ls_port_group_add(struct hmap *nb_pgs,
3085 const struct nbrec_port_group *nb_pg)
3086 {
3087 struct ovn_ls_port_group *ls_pg = xzalloc(sizeof *ls_pg);
3088 ls_pg->key = nb_pg->header_.uuid;
3089 ls_pg->nb_pg = nb_pg;
3090 hmap_insert(nb_pgs, &ls_pg->key_node, uuid_hash(&ls_pg->key));
3091 }
3092
3093 static void
3094 ovn_ls_port_group_destroy(struct hmap *nb_pgs)
3095 {
3096 struct ovn_ls_port_group *ls_pg;
3097 HMAP_FOR_EACH_POP (ls_pg, key_node, nb_pgs) {
3098 free(ls_pg);
3099 }
3100 hmap_destroy(nb_pgs);
3101 }
3102
3103 static bool
3104 has_stateful_acl(struct ovn_datapath *od)
3105 {
3106 for (size_t i = 0; i < od->nbs->n_acls; i++) {
3107 struct nbrec_acl *acl = od->nbs->acls[i];
3108 if (!strcmp(acl->action, "allow-related")) {
3109 return true;
3110 }
3111 }
3112
3113 struct ovn_ls_port_group *ls_pg;
3114 HMAP_FOR_EACH (ls_pg, key_node, &od->nb_pgs) {
3115 for (size_t i = 0; i < ls_pg->nb_pg->n_acls; i++) {
3116 struct nbrec_acl *acl = ls_pg->nb_pg->acls[i];
3117 if (!strcmp(acl->action, "allow-related")) {
3118 return true;
3119 }
3120 }
3121 }
3122
3123 return false;
3124 }
3125
3126 static void
3127 build_pre_acls(struct ovn_datapath *od, struct hmap *lflows)
3128 {
3129 bool has_stateful = has_stateful_acl(od);
3130
3131 /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are
3132 * allowed by default. */
3133 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 0, "1", "next;");
3134 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 0, "1", "next;");
3135
3136 /* If there are any stateful ACL rules in this datapath, we must
3137 * send all IP packets through the conntrack action, which handles
3138 * defragmentation, in order to match L4 headers. */
3139 if (has_stateful) {
3140 for (size_t i = 0; i < od->n_router_ports; i++) {
3141 struct ovn_port *op = od->router_ports[i];
3142 /* Can't use ct() for router ports. Consider the
3143 * following configuration: lp1(10.0.0.2) on
3144 * hostA--ls1--lr0--ls2--lp2(10.0.1.2) on hostB, For a
3145 * ping from lp1 to lp2, First, the response will go
3146 * through ct() with a zone for lp2 in the ls2 ingress
3147 * pipeline on hostB. That ct zone knows about this
3148 * connection. Next, it goes through ct() with the zone
3149 * for the router port in the egress pipeline of ls2 on
3150 * hostB. This zone does not know about the connection,
3151 * as the icmp request went through the logical router
3152 * on hostA, not hostB. This would only work with
3153 * distributed conntrack state across all chassis. */
3154 struct ds match_in = DS_EMPTY_INITIALIZER;
3155 struct ds match_out = DS_EMPTY_INITIALIZER;
3156
3157 ds_put_format(&match_in, "ip && inport == %s", op->json_key);
3158 ds_put_format(&match_out, "ip && outport == %s", op->json_key);
3159 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110,
3160 ds_cstr(&match_in), "next;");
3161 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110,
3162 ds_cstr(&match_out), "next;");
3163
3164 ds_destroy(&match_in);
3165 ds_destroy(&match_out);
3166 }
3167 if (od->localnet_port) {
3168 struct ds match_in = DS_EMPTY_INITIALIZER;
3169 struct ds match_out = DS_EMPTY_INITIALIZER;
3170
3171 ds_put_format(&match_in, "ip && inport == %s",
3172 od->localnet_port->json_key);
3173 ds_put_format(&match_out, "ip && outport == %s",
3174 od->localnet_port->json_key);
3175 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110,
3176 ds_cstr(&match_in), "next;");
3177 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110,
3178 ds_cstr(&match_out), "next;");
3179
3180 ds_destroy(&match_in);
3181 ds_destroy(&match_out);
3182 }
3183
3184 /* Ingress and Egress Pre-ACL Table (Priority 110).
3185 *
3186 * Not to do conntrack on ND and ICMP destination
3187 * unreachable packets. */
3188 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110,
3189 "nd || nd_rs || nd_ra || icmp4.type == 3 || "
3190 "icmp6.type == 1 || (tcp && tcp.flags == 4)",
3191 "next;");
3192 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110,
3193 "nd || nd_rs || nd_ra || icmp4.type == 3 || "
3194 "icmp6.type == 1 || (tcp && tcp.flags == 4)",
3195 "next;");
3196
3197 /* Ingress and Egress Pre-ACL Table (Priority 100).
3198 *
3199 * Regardless of whether the ACL is "from-lport" or "to-lport",
3200 * we need rules in both the ingress and egress table, because
3201 * the return traffic needs to be followed.
3202 *
3203 * 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send
3204 * it to conntrack for tracking and defragmentation. */
3205 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 100, "ip",
3206 REGBIT_CONNTRACK_DEFRAG" = 1; next;");
3207 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip",
3208 REGBIT_CONNTRACK_DEFRAG" = 1; next;");
3209 }
3210 }
3211
3212 /* For a 'key' of the form "IP:port" or just "IP", sets 'port' and
3213 * 'ip_address'. The caller must free() the memory allocated for
3214 * 'ip_address'. */
3215 static void
3216 ip_address_and_port_from_lb_key(const char *key, char **ip_address,
3217 uint16_t *port, int *addr_family)
3218 {
3219 struct sockaddr_storage ss;
3220 if (!inet_parse_active(key, 0, &ss)) {
3221 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3222 VLOG_WARN_RL(&rl, "bad ip address or port for load balancer key %s",
3223 key);
3224 return;
3225 }
3226
3227 struct ds s = DS_EMPTY_INITIALIZER;
3228 ss_format_address_nobracks(&ss, &s);
3229 *ip_address = ds_steal_cstr(&s);
3230
3231 *port = ss_get_port(&ss);
3232
3233 *addr_family = ss.ss_family;
3234 }
3235
3236 /*
3237 * Returns true if logical switch is configured with DNS records, false
3238 * otherwise.
3239 */
3240 static bool
3241 ls_has_dns_records(const struct nbrec_logical_switch *nbs)
3242 {
3243 for (size_t i = 0; i < nbs->n_dns_records; i++) {
3244 if (!smap_is_empty(&nbs->dns_records[i]->records)) {
3245 return true;
3246 }
3247 }
3248
3249 return false;
3250 }
3251
3252 static void
3253 build_pre_lb(struct ovn_datapath *od, struct hmap *lflows)
3254 {
3255 /* Do not send ND packets to conntrack */
3256 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 110,
3257 "nd || nd_rs || nd_ra", "next;");
3258 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 110,
3259 "nd || nd_rs || nd_ra", "next;");
3260
3261 /* Allow all packets to go to next tables by default. */
3262 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 0, "1", "next;");
3263 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 0, "1", "next;");
3264
3265 struct sset all_ips = SSET_INITIALIZER(&all_ips);
3266 bool vip_configured = false;
3267 int addr_family = AF_INET;
3268 for (int i = 0; i < od->nbs->n_load_balancer; i++) {
3269 struct nbrec_load_balancer *lb = od->nbs->load_balancer[i];
3270 struct smap *vips = &lb->vips;
3271 struct smap_node *node;
3272
3273 SMAP_FOR_EACH (node, vips) {
3274 vip_configured = true;
3275
3276 /* node->key contains IP:port or just IP. */
3277 char *ip_address = NULL;
3278 uint16_t port;
3279 ip_address_and_port_from_lb_key(node->key, &ip_address, &port,
3280 &addr_family);
3281 if (!ip_address) {
3282 continue;
3283 }
3284
3285 if (!sset_contains(&all_ips, ip_address)) {
3286 sset_add(&all_ips, ip_address);
3287 }
3288
3289 free(ip_address);
3290
3291 /* Ignore L4 port information in the key because fragmented packets
3292 * may not have L4 information. The pre-stateful table will send
3293 * the packet through ct() action to de-fragment. In stateful
3294 * table, we will eventually look at L4 information. */
3295 }
3296 }
3297
3298 /* 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send
3299 * packet to conntrack for defragmentation. */
3300 const char *ip_address;
3301 SSET_FOR_EACH(ip_address, &all_ips) {
3302 char *match;
3303
3304 if (addr_family == AF_INET) {
3305 match = xasprintf("ip && ip4.dst == %s", ip_address);
3306 } else {
3307 match = xasprintf("ip && ip6.dst == %s", ip_address);
3308 }
3309 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB,
3310 100, match, REGBIT_CONNTRACK_DEFRAG" = 1; next;");
3311 free(match);
3312 }
3313
3314 sset_destroy(&all_ips);
3315
3316 if (vip_configured) {
3317 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB,
3318 100, "ip", REGBIT_CONNTRACK_DEFRAG" = 1; next;");
3319 }
3320 }
3321
3322 static void
3323 build_pre_stateful(struct ovn_datapath *od, struct hmap *lflows)
3324 {
3325 /* Ingress and Egress pre-stateful Table (Priority 0): Packets are
3326 * allowed by default. */
3327 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 0, "1", "next;");
3328 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 0, "1", "next;");
3329
3330 /* If REGBIT_CONNTRACK_DEFRAG is set as 1, then the packets should be
3331 * sent to conntrack for tracking and defragmentation. */
3332 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 100,
3333 REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;");
3334 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 100,
3335 REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;");
3336 }
3337
3338 static void
3339 build_acl_log(struct ds *actions, const struct nbrec_acl *acl)
3340 {
3341 if (!acl->log) {
3342 return;
3343 }
3344
3345 ds_put_cstr(actions, "log(");
3346
3347 if (acl->name) {
3348 ds_put_format(actions, "name=\"%s\", ", acl->name);
3349 }
3350
3351 /* If a severity level isn't specified, default to "info". */
3352 if (acl->severity) {
3353 ds_put_format(actions, "severity=%s, ", acl->severity);
3354 } else {
3355 ds_put_format(actions, "severity=info, ");
3356 }
3357
3358 if (!strcmp(acl->action, "drop")) {
3359 ds_put_cstr(actions, "verdict=drop, ");
3360 } else if (!strcmp(acl->action, "reject")) {
3361 ds_put_cstr(actions, "verdict=reject, ");
3362 } else if (!strcmp(acl->action, "allow")
3363 || !strcmp(acl->action, "allow-related")) {
3364 ds_put_cstr(actions, "verdict=allow, ");
3365 }
3366
3367 if (acl->meter) {
3368 ds_put_format(actions, "meter=\"%s\", ", acl->meter);
3369 }
3370
3371 ds_chomp(actions, ' ');
3372 ds_chomp(actions, ',');
3373 ds_put_cstr(actions, "); ");
3374 }
3375
3376 static void
3377 build_reject_acl_rules(struct ovn_datapath *od, struct hmap *lflows,
3378 enum ovn_stage stage, struct nbrec_acl *acl,
3379 struct ds *extra_match, struct ds *extra_actions)
3380 {
3381 struct ds match = DS_EMPTY_INITIALIZER;
3382 struct ds actions = DS_EMPTY_INITIALIZER;
3383 bool ingress = (stage == S_SWITCH_IN_ACL);
3384
3385 /* TCP */
3386 build_acl_log(&actions, acl);
3387 if (extra_match->length > 0) {
3388 ds_put_format(&match, "(%s) && ", extra_match->string);
3389 }
3390 ds_put_format(&match, "ip4 && tcp && (%s)", acl->match);
3391 ds_put_format(&actions, "reg0 = 0; "
3392 "eth.dst <-> eth.src; ip4.dst <-> ip4.src; "
3393 "tcp_reset { outport <-> inport; %s };",
3394 ingress ? "output;" : "next(pipeline=ingress,table=0);");
3395 ovn_lflow_add(lflows, od, stage, acl->priority + OVN_ACL_PRI_OFFSET + 10,
3396 ds_cstr(&match), ds_cstr(&actions));
3397 ds_clear(&match);
3398 ds_clear(&actions);
3399 build_acl_log(&actions, acl);
3400 if (extra_match->length > 0) {
3401 ds_put_format(&match, "(%s) && ", extra_match->string);
3402 }
3403 ds_put_format(&match, "ip6 && tcp && (%s)", acl->match);
3404 ds_put_format(&actions, "reg0 = 0; "
3405 "eth.dst <-> eth.src; ip6.dst <-> ip6.src; "
3406 "tcp_reset { outport <-> inport; %s };",
3407 ingress ? "output;" : "next(pipeline=ingress,table=0);");
3408 ovn_lflow_add(lflows, od, stage, acl->priority + OVN_ACL_PRI_OFFSET + 10,
3409 ds_cstr(&match), ds_cstr(&actions));
3410
3411 /* IP traffic */
3412 ds_clear(&match);
3413 ds_clear(&actions);
3414 build_acl_log(&actions, acl);
3415 if (extra_match->length > 0) {
3416 ds_put_format(&match, "(%s) && ", extra_match->string);
3417 }
3418 ds_put_format(&match, "ip4 && (%s)", acl->match);
3419 if (extra_actions->length > 0) {
3420 ds_put_format(&actions, "%s ", extra_actions->string);
3421 }
3422 ds_put_format(&actions, "reg0 = 0; "
3423 "eth.dst <-> eth.src; ip4.dst <-> ip4.src; "
3424 "icmp4 { outport <-> inport; %s };",
3425 ingress ? "output;" : "next(pipeline=ingress,table=0);");
3426 ovn_lflow_add(lflows, od, stage, acl->priority + OVN_ACL_PRI_OFFSET,
3427 ds_cstr(&match), ds_cstr(&actions));
3428 ds_clear(&match);
3429 ds_clear(&actions);
3430 build_acl_log(&actions, acl);
3431 if (extra_match->length > 0) {
3432 ds_put_format(&match, "(%s) && ", extra_match->string);
3433 }
3434 ds_put_format(&match, "ip6 && (%s)", acl->match);
3435 if (extra_actions->length > 0) {
3436 ds_put_format(&actions, "%s ", extra_actions->string);
3437 }
3438 ds_put_format(&actions, "reg0 = 0; icmp6 { "
3439 "eth.dst <-> eth.src; ip6.dst <-> ip6.src; "
3440 "outport <-> inport; %s };",
3441 ingress ? "output;" : "next(pipeline=ingress,table=0);");
3442 ovn_lflow_add(lflows, od, stage, acl->priority + OVN_ACL_PRI_OFFSET,
3443 ds_cstr(&match), ds_cstr(&actions));
3444
3445 ds_destroy(&match);
3446 ds_destroy(&actions);
3447 }
3448
3449 static void
3450 consider_acl(struct hmap *lflows, struct ovn_datapath *od,
3451 struct nbrec_acl *acl, bool has_stateful)
3452 {
3453 bool ingress = !strcmp(acl->direction, "from-lport") ? true :false;
3454 enum ovn_stage stage = ingress ? S_SWITCH_IN_ACL : S_SWITCH_OUT_ACL;
3455
3456 char *stage_hint = xasprintf("%08x", acl->header_.uuid.parts[0]);
3457 if (!strcmp(acl->action, "allow")
3458 || !strcmp(acl->action, "allow-related")) {
3459 /* If there are any stateful flows, we must even commit "allow"
3460 * actions. This is because, while the initiater's
3461 * direction may not have any stateful rules, the server's
3462 * may and then its return traffic would not have an
3463 * associated conntrack entry and would return "+invalid". */
3464 if (!has_stateful) {
3465 struct ds actions = DS_EMPTY_INITIALIZER;
3466 build_acl_log(&actions, acl);
3467 ds_put_cstr(&actions, "next;");
3468 ovn_lflow_add_with_hint(lflows, od, stage,
3469 acl->priority + OVN_ACL_PRI_OFFSET,
3470 acl->match, ds_cstr(&actions),
3471 stage_hint);
3472 ds_destroy(&actions);
3473 } else {
3474 struct ds match = DS_EMPTY_INITIALIZER;
3475 struct ds actions = DS_EMPTY_INITIALIZER;
3476
3477 /* Commit the connection tracking entry if it's a new
3478 * connection that matches this ACL. After this commit,
3479 * the reply traffic is allowed by a flow we create at
3480 * priority 65535, defined earlier.
3481 *
3482 * It's also possible that a known connection was marked for
3483 * deletion after a policy was deleted, but the policy was
3484 * re-added while that connection is still known. We catch
3485 * that case here and un-set ct_label.blocked (which will be done
3486 * by ct_commit in the "stateful" stage) to indicate that the
3487 * connection should be allowed to resume.
3488 */
3489 ds_put_format(&match, "((ct.new && !ct.est)"
3490 " || (!ct.new && ct.est && !ct.rpl "
3491 "&& ct_label.blocked == 1)) "
3492 "&& (%s)", acl->match);
3493 ds_put_cstr(&actions, REGBIT_CONNTRACK_COMMIT" = 1; ");
3494 build_acl_log(&actions, acl);
3495 ds_put_cstr(&actions, "next;");
3496 ovn_lflow_add_with_hint(lflows, od, stage,
3497 acl->priority + OVN_ACL_PRI_OFFSET,
3498 ds_cstr(&match),
3499 ds_cstr(&actions),
3500 stage_hint);
3501
3502 /* Match on traffic in the request direction for an established
3503 * connection tracking entry that has not been marked for
3504 * deletion. There is no need to commit here, so we can just
3505 * proceed to the next table. We use this to ensure that this
3506 * connection is still allowed by the currently defined
3507 * policy. */
3508 ds_clear(&match);
3509 ds_clear(&actions);
3510 ds_put_format(&match,
3511 "!ct.new && ct.est && !ct.rpl"
3512 " && ct_label.blocked == 0 && (%s)",
3513 acl->match);
3514
3515 build_acl_log(&actions, acl);
3516 ds_put_cstr(&actions, "next;");
3517 ovn_lflow_add_with_hint(lflows, od, stage,
3518 acl->priority + OVN_ACL_PRI_OFFSET,
3519 ds_cstr(&match), ds_cstr(&actions),
3520 stage_hint);
3521
3522 ds_destroy(&match);
3523 ds_destroy(&actions);
3524 }
3525 } else if (!strcmp(acl->action, "drop")
3526 || !strcmp(acl->action, "reject")) {
3527 struct ds match = DS_EMPTY_INITIALIZER;
3528 struct ds actions = DS_EMPTY_INITIALIZER;
3529
3530 /* The implementation of "drop" differs if stateful ACLs are in
3531 * use for this datapath. In that case, the actions differ
3532 * depending on whether the connection was previously committed
3533 * to the connection tracker with ct_commit. */
3534 if (has_stateful) {
3535 /* If the packet is not part of an established connection, then
3536 * we can simply reject/drop it. */
3537 ds_put_cstr(&match,
3538 "(!ct.est || (ct.est && ct_label.blocked == 1))");
3539 if (!strcmp(acl->action, "reject")) {
3540 build_reject_acl_rules(od, lflows, stage, acl, &match,
3541 &actions);
3542 } else {
3543 ds_put_format(&match, " && (%s)", acl->match);
3544 build_acl_log(&actions, acl);
3545 ds_put_cstr(&actions, "/* drop */");
3546 ovn_lflow_add(lflows, od, stage,
3547 acl->priority + OVN_ACL_PRI_OFFSET,
3548 ds_cstr(&match), ds_cstr(&actions));
3549 }
3550 /* For an existing connection without ct_label set, we've
3551 * encountered a policy change. ACLs previously allowed
3552 * this connection and we committed the connection tracking
3553 * entry. Current policy says that we should drop this
3554 * connection. First, we set bit 0 of ct_label to indicate
3555 * that this connection is set for deletion. By not
3556 * specifying "next;", we implicitly drop the packet after
3557 * updating conntrack state. We would normally defer
3558 * ct_commit() to the "stateful" stage, but since we're
3559 * rejecting/dropping the packet, we go ahead and do it here.
3560 */
3561 ds_clear(&match);
3562 ds_clear(&actions);
3563 ds_put_cstr(&match, "ct.est && ct_label.blocked == 0");
3564 ds_put_cstr(&actions, "ct_commit(ct_label=1/1); ");
3565 if (!strcmp(acl->action, "reject")) {
3566 build_reject_acl_rules(od, lflows, stage, acl, &match,
3567 &actions);
3568 } else {
3569 ds_put_format(&match, " && (%s)", acl->match);
3570 build_acl_log(&actions, acl);
3571 ds_put_cstr(&actions, "/* drop */");
3572 ovn_lflow_add(lflows, od, stage,
3573 acl->priority + OVN_ACL_PRI_OFFSET,
3574 ds_cstr(&match), ds_cstr(&actions));
3575 }
3576 } else {
3577 /* There are no stateful ACLs in use on this datapath,
3578 * so a "reject/drop" ACL is simply the "reject/drop"
3579 * logical flow action in all cases. */
3580 if (!strcmp(acl->action, "reject")) {
3581 build_reject_acl_rules(od, lflows, stage, acl, &match,
3582 &actions);
3583 } else {
3584 build_acl_log(&actions, acl);
3585 ds_put_cstr(&actions, "/* drop */");
3586 ovn_lflow_add(lflows, od, stage,
3587 acl->priority + OVN_ACL_PRI_OFFSET,
3588 acl->match, ds_cstr(&actions));
3589 }
3590 }
3591 ds_destroy(&match);
3592 ds_destroy(&actions);
3593 }
3594 free(stage_hint);
3595 }
3596
3597 static struct ovn_port_group *
3598 ovn_port_group_create(struct hmap *pgs,
3599 const struct nbrec_port_group *nb_pg)
3600 {
3601 struct ovn_port_group *pg = xzalloc(sizeof *pg);
3602 pg->key = nb_pg->header_.uuid;
3603 pg->nb_pg = nb_pg;
3604 hmap_init(&pg->nb_lswitches);
3605 hmap_insert(pgs, &pg->key_node, uuid_hash(&pg->key));
3606 return pg;
3607 }
3608
3609 static void
3610 ovn_port_group_destroy(struct hmap *pgs, struct ovn_port_group *pg)
3611 {
3612 if (pg) {
3613 hmap_remove(pgs, &pg->key_node);
3614 struct ovn_port_group_ls *ls;
3615 HMAP_FOR_EACH_POP (ls, key_node, &pg->nb_lswitches) {
3616 free(ls);
3617 }
3618 hmap_destroy(&pg->nb_lswitches);
3619 free(pg);
3620 }
3621 }
3622
3623 static void
3624 build_port_group_lswitches(struct northd_context *ctx, struct hmap *pgs,
3625 struct hmap *ports)
3626 {
3627 hmap_init(pgs);
3628
3629 const struct nbrec_port_group *nb_pg;
3630 NBREC_PORT_GROUP_FOR_EACH (nb_pg, ctx->ovnnb_idl) {
3631 struct ovn_port_group *pg = ovn_port_group_create(pgs, nb_pg);
3632 for (size_t i = 0; i < nb_pg->n_ports; i++) {
3633 struct ovn_port *op = ovn_port_find(ports, nb_pg->ports[i]->name);
3634 if (!op) {
3635 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
3636 VLOG_ERR_RL(&rl, "lport %s in port group %s not found.",
3637 nb_pg->ports[i]->name,
3638 nb_pg->name);
3639 continue;
3640 }
3641
3642 if (!op->od->nbs) {
3643 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
3644 VLOG_WARN_RL(&rl, "lport %s in port group %s has no lswitch.",
3645 nb_pg->ports[i]->name,
3646 nb_pg->name);
3647 continue;
3648 }
3649
3650 struct ovn_port_group_ls *pg_ls =
3651 ovn_port_group_ls_find(pg, &op->od->nbs->header_.uuid);
3652 if (!pg_ls) {
3653 ovn_port_group_ls_add(pg, op->od->nbs);
3654 ovn_ls_port_group_add(&op->od->nb_pgs, nb_pg);
3655 }
3656 }
3657 }
3658 }
3659
3660 static void
3661 build_acls(struct ovn_datapath *od, struct hmap *lflows,
3662 struct hmap *port_groups)
3663 {
3664 bool has_stateful = has_stateful_acl(od);
3665
3666 /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
3667 * default. A related rule at priority 1 is added below if there
3668 * are any stateful ACLs in this datapath. */
3669 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 0, "1", "next;");
3670 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 0, "1", "next;");
3671
3672 if (has_stateful) {
3673 /* Ingress and Egress ACL Table (Priority 1).
3674 *
3675 * By default, traffic is allowed. This is partially handled by
3676 * the Priority 0 ACL flows added earlier, but we also need to
3677 * commit IP flows. This is because, while the initiater's
3678 * direction may not have any stateful rules, the server's may
3679 * and then its return traffic would not have an associated
3680 * conntrack entry and would return "+invalid".
3681 *
3682 * We use "ct_commit" for a connection that is not already known
3683 * by the connection tracker. Once a connection is committed,
3684 * subsequent packets will hit the flow at priority 0 that just
3685 * uses "next;"
3686 *
3687 * We also check for established connections that have ct_label.blocked
3688 * set on them. That's a connection that was disallowed, but is
3689 * now allowed by policy again since it hit this default-allow flow.
3690 * We need to set ct_label.blocked=0 to let the connection continue,
3691 * which will be done by ct_commit() in the "stateful" stage.
3692 * Subsequent packets will hit the flow at priority 0 that just
3693 * uses "next;". */
3694 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 1,
3695 "ip && (!ct.est || (ct.est && ct_label.blocked == 1))",
3696 REGBIT_CONNTRACK_COMMIT" = 1; next;");
3697 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 1,
3698 "ip && (!ct.est || (ct.est && ct_label.blocked == 1))",
3699 REGBIT_CONNTRACK_COMMIT" = 1; next;");
3700
3701 /* Ingress and Egress ACL Table (Priority 65535).
3702 *
3703 * Always drop traffic that's in an invalid state. Also drop
3704 * reply direction packets for connections that have been marked
3705 * for deletion (bit 0 of ct_label is set).
3706 *
3707 * This is enforced at a higher priority than ACLs can be defined. */
3708 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
3709 "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
3710 "drop;");
3711 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
3712 "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
3713 "drop;");
3714
3715 /* Ingress and Egress ACL Table (Priority 65535).
3716 *
3717 * Allow reply traffic that is part of an established
3718 * conntrack entry that has not been marked for deletion
3719 * (bit 0 of ct_label). We only match traffic in the
3720 * reply direction because we want traffic in the request
3721 * direction to hit the currently defined policy from ACLs.
3722 *
3723 * This is enforced at a higher priority than ACLs can be defined. */
3724 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
3725 "ct.est && !ct.rel && !ct.new && !ct.inv "
3726 "&& ct.rpl && ct_label.blocked == 0",
3727 "next;");
3728 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
3729 "ct.est && !ct.rel && !ct.new && !ct.inv "
3730 "&& ct.rpl && ct_label.blocked == 0",
3731 "next;");
3732
3733 /* Ingress and Egress ACL Table (Priority 65535).
3734 *
3735 * Allow traffic that is related to an existing conntrack entry that
3736 * has not been marked for deletion (bit 0 of ct_label).
3737 *
3738 * This is enforced at a higher priority than ACLs can be defined.
3739 *
3740 * NOTE: This does not support related data sessions (eg,
3741 * a dynamically negotiated FTP data channel), but will allow
3742 * related traffic such as an ICMP Port Unreachable through
3743 * that's generated from a non-listening UDP port. */
3744 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
3745 "!ct.est && ct.rel && !ct.new && !ct.inv "
3746 "&& ct_label.blocked == 0",
3747 "next;");
3748 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
3749 "!ct.est && ct.rel && !ct.new && !ct.inv "
3750 "&& ct_label.blocked == 0",
3751 "next;");
3752
3753 /* Ingress and Egress ACL Table (Priority 65535).
3754 *
3755 * Not to do conntrack on ND packets. */
3756 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, "nd", "next;");
3757 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, "nd", "next;");
3758 }
3759
3760 /* Ingress or Egress ACL Table (Various priorities). */
3761 for (size_t i = 0; i < od->nbs->n_acls; i++) {
3762 struct nbrec_acl *acl = od->nbs->acls[i];
3763 consider_acl(lflows, od, acl, has_stateful);
3764 }
3765 struct ovn_port_group *pg;
3766 HMAP_FOR_EACH (pg, key_node, port_groups) {
3767 if (ovn_port_group_ls_find(pg, &od->nbs->header_.uuid)) {
3768 for (size_t i = 0; i < pg->nb_pg->n_acls; i++) {
3769 consider_acl(lflows, od, pg->nb_pg->acls[i], has_stateful);
3770 }
3771 }
3772 }
3773
3774 /* Add 34000 priority flow to allow DHCP reply from ovn-controller to all
3775 * logical ports of the datapath if the CMS has configured DHCPv4 options.
3776 * */
3777 for (size_t i = 0; i < od->nbs->n_ports; i++) {
3778 if (od->nbs->ports[i]->dhcpv4_options) {
3779 const char *server_id = smap_get(
3780 &od->nbs->ports[i]->dhcpv4_options->options, "server_id");
3781 const char *server_mac = smap_get(
3782 &od->nbs->ports[i]->dhcpv4_options->options, "server_mac");
3783 const char *lease_time = smap_get(
3784 &od->nbs->ports[i]->dhcpv4_options->options, "lease_time");
3785 if (server_id && server_mac && lease_time) {
3786 struct ds match = DS_EMPTY_INITIALIZER;
3787 const char *actions =
3788 has_stateful ? "ct_commit; next;" : "next;";
3789 ds_put_format(&match, "outport == \"%s\" && eth.src == %s "
3790 "&& ip4.src == %s && udp && udp.src == 67 "
3791 "&& udp.dst == 68", od->nbs->ports[i]->name,
3792 server_mac, server_id);
3793 ovn_lflow_add(
3794 lflows, od, S_SWITCH_OUT_ACL, 34000, ds_cstr(&match),
3795 actions);
3796 ds_destroy(&match);
3797 }
3798 }
3799
3800 if (od->nbs->ports[i]->dhcpv6_options) {
3801 const char *server_mac = smap_get(
3802 &od->nbs->ports[i]->dhcpv6_options->options, "server_id");
3803 struct eth_addr ea;
3804 if (server_mac && eth_addr_from_string(server_mac, &ea)) {
3805 /* Get the link local IP of the DHCPv6 server from the
3806 * server MAC. */
3807 struct in6_addr lla;
3808 in6_generate_lla(ea, &lla);
3809
3810 char server_ip[INET6_ADDRSTRLEN + 1];
3811 ipv6_string_mapped(server_ip, &lla);
3812
3813 struct ds match = DS_EMPTY_INITIALIZER;
3814 const char *actions = has_stateful ? "ct_commit; next;" :
3815 "next;";
3816 ds_put_format(&match, "outport == \"%s\" && eth.src == %s "
3817 "&& ip6.src == %s && udp && udp.src == 547 "
3818 "&& udp.dst == 546", od->nbs->ports[i]->name,
3819 server_mac, server_ip);
3820 ovn_lflow_add(
3821 lflows, od, S_SWITCH_OUT_ACL, 34000, ds_cstr(&match),
3822 actions);
3823 ds_destroy(&match);
3824 }
3825 }
3826 }
3827
3828 /* Add a 34000 priority flow to advance the DNS reply from ovn-controller,
3829 * if the CMS has configured DNS records for the datapath.
3830 */
3831 if (ls_has_dns_records(od->nbs)) {
3832 const char *actions = has_stateful ? "ct_commit; next;" : "next;";
3833 ovn_lflow_add(
3834 lflows, od, S_SWITCH_OUT_ACL, 34000, "udp.src == 53",
3835 actions);
3836 }
3837 }
3838
3839 static void
3840 build_qos(struct ovn_datapath *od, struct hmap *lflows) {
3841 ovn_lflow_add(lflows, od, S_SWITCH_IN_QOS_MARK, 0, "1", "next;");
3842 ovn_lflow_add(lflows, od, S_SWITCH_OUT_QOS_MARK, 0, "1", "next;");
3843 ovn_lflow_add(lflows, od, S_SWITCH_IN_QOS_METER, 0, "1", "next;");
3844 ovn_lflow_add(lflows, od, S_SWITCH_OUT_QOS_METER, 0, "1", "next;");
3845
3846 for (size_t i = 0; i < od->nbs->n_qos_rules; i++) {
3847 struct nbrec_qos *qos = od->nbs->qos_rules[i];
3848 bool ingress = !strcmp(qos->direction, "from-lport") ? true :false;
3849 enum ovn_stage stage = ingress ? S_SWITCH_IN_QOS_MARK : S_SWITCH_OUT_QOS_MARK;
3850 int64_t rate = 0;
3851 int64_t burst = 0;
3852
3853 for (size_t j = 0; j < qos->n_action; j++) {
3854 if (!strcmp(qos->key_action[j], "dscp")) {
3855 struct ds dscp_action = DS_EMPTY_INITIALIZER;
3856
3857 ds_put_format(&dscp_action, "ip.dscp = %"PRId64"; next;",
3858 qos->value_action[j]);
3859 ovn_lflow_add(lflows, od, stage,
3860 qos->priority,
3861 qos->match, ds_cstr(&dscp_action));
3862 ds_destroy(&dscp_action);
3863 }
3864 }
3865
3866 for (size_t n = 0; n < qos->n_bandwidth; n++) {
3867 if (!strcmp(qos->key_bandwidth[n], "rate")) {
3868 rate = qos->value_bandwidth[n];
3869 } else if (!strcmp(qos->key_bandwidth[n], "burst")) {
3870 burst = qos->value_bandwidth[n];
3871 }
3872 }
3873 if (rate) {
3874 struct ds meter_action = DS_EMPTY_INITIALIZER;
3875 stage = ingress ? S_SWITCH_IN_QOS_METER : S_SWITCH_OUT_QOS_METER;
3876 if (burst) {
3877 ds_put_format(&meter_action,
3878 "set_meter(%"PRId64", %"PRId64"); next;",
3879 rate, burst);
3880 } else {
3881 ds_put_format(&meter_action,
3882 "set_meter(%"PRId64"); next;",
3883 rate);
3884 }
3885
3886 /* Ingress and Egress QoS Meter Table.
3887 *
3888 * We limit the bandwidth of this flow by adding a meter table.
3889 */
3890 ovn_lflow_add(lflows, od, stage,
3891 qos->priority,
3892 qos->match, ds_cstr(&meter_action));
3893 ds_destroy(&meter_action);
3894 }
3895 }
3896 }
3897
3898 static void
3899 build_lb(struct ovn_datapath *od, struct hmap *lflows)
3900 {
3901 /* Ingress and Egress LB Table (Priority 0): Packets are allowed by
3902 * default. */
3903 ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, 0, "1", "next;");
3904 ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, 0, "1", "next;");
3905
3906 if (od->nbs->load_balancer) {
3907 /* Ingress and Egress LB Table (Priority 65535).
3908 *
3909 * Send established traffic through conntrack for just NAT. */
3910 ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, UINT16_MAX,
3911 "ct.est && !ct.rel && !ct.new && !ct.inv",
3912 REGBIT_CONNTRACK_NAT" = 1; next;");
3913 ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, UINT16_MAX,
3914 "ct.est && !ct.rel && !ct.new && !ct.inv",
3915 REGBIT_CONNTRACK_NAT" = 1; next;");
3916 }
3917 }
3918
3919 static void
3920 build_stateful(struct ovn_datapath *od, struct hmap *lflows)
3921 {
3922 /* Ingress and Egress stateful Table (Priority 0): Packets are
3923 * allowed by default. */
3924 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 0, "1", "next;");
3925 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 0, "1", "next;");
3926
3927 /* If REGBIT_CONNTRACK_COMMIT is set as 1, then the packets should be
3928 * committed to conntrack. We always set ct_label.blocked to 0 here as
3929 * any packet that makes it this far is part of a connection we
3930 * want to allow to continue. */
3931 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100,
3932 REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit(ct_label=0/1); next;");
3933 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100,
3934 REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit(ct_label=0/1); next;");
3935
3936 /* If REGBIT_CONNTRACK_NAT is set as 1, then packets should just be sent
3937 * through nat (without committing).
3938 *
3939 * REGBIT_CONNTRACK_COMMIT is set for new connections and
3940 * REGBIT_CONNTRACK_NAT is set for established connections. So they
3941 * don't overlap.
3942 */
3943 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100,
3944 REGBIT_CONNTRACK_NAT" == 1", "ct_lb;");
3945 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100,
3946 REGBIT_CONNTRACK_NAT" == 1", "ct_lb;");
3947
3948 /* Load balancing rules for new connections get committed to conntrack
3949 * table. So even if REGBIT_CONNTRACK_COMMIT is set in a previous table
3950 * a higher priority rule for load balancing below also commits the
3951 * connection, so it is okay if we do not hit the above match on
3952 * REGBIT_CONNTRACK_COMMIT. */
3953 for (int i = 0; i < od->nbs->n_load_balancer; i++) {
3954 struct nbrec_load_balancer *lb = od->nbs->load_balancer[i];
3955 struct smap *vips = &lb->vips;
3956 struct smap_node *node;
3957
3958 SMAP_FOR_EACH (node, vips) {
3959 uint16_t port = 0;
3960 int addr_family;
3961
3962 /* node->key contains IP:port or just IP. */
3963 char *ip_address = NULL;
3964 ip_address_and_port_from_lb_key(node->key, &ip_address, &port,
3965 &addr_family);
3966 if (!ip_address) {
3967 continue;
3968 }
3969
3970 /* New connections in Ingress table. */
3971 char *action = xasprintf("ct_lb(%s);", node->value);
3972 struct ds match = DS_EMPTY_INITIALIZER;
3973 if (addr_family == AF_INET) {
3974 ds_put_format(&match, "ct.new && ip4.dst == %s", ip_address);
3975 } else {
3976 ds_put_format(&match, "ct.new && ip6.dst == %s", ip_address);
3977 }
3978 if (port) {
3979 if (lb->protocol && !strcmp(lb->protocol, "udp")) {
3980 ds_put_format(&match, " && udp.dst == %d", port);
3981 } else {
3982 ds_put_format(&match, " && tcp.dst == %d", port);
3983 }
3984 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL,
3985 120, ds_cstr(&match), action);
3986 } else {
3987 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL,
3988 110, ds_cstr(&match), action);
3989 }
3990
3991 free(ip_address);
3992 ds_destroy(&match);
3993 free(action);
3994 }
3995 }
3996 }
3997
3998 static void
3999 build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
4000 struct hmap *port_groups, struct hmap *lflows,
4001 struct hmap *mcgroups)
4002 {
4003 /* This flow table structure is documented in ovn-northd(8), so please
4004 * update ovn-northd.8.xml if you change anything. */
4005
4006 struct ds match = DS_EMPTY_INITIALIZER;
4007 struct ds actions = DS_EMPTY_INITIALIZER;
4008
4009 /* Build pre-ACL and ACL tables for both ingress and egress.
4010 * Ingress tables 3 through 10. Egress tables 0 through 7. */
4011 struct ovn_datapath *od;
4012 HMAP_FOR_EACH (od, key_node, datapaths) {
4013 if (!od->nbs) {
4014 continue;
4015 }
4016
4017 build_pre_acls(od, lflows);
4018 build_pre_lb(od, lflows);
4019 build_pre_stateful(od, lflows);
4020 build_acls(od, lflows, port_groups);
4021 build_qos(od, lflows);
4022 build_lb(od, lflows);
4023 build_stateful(od, lflows);
4024 }
4025
4026 /* Logical switch ingress table 0: Admission control framework (priority
4027 * 100). */
4028 HMAP_FOR_EACH (od, key_node, datapaths) {
4029 if (!od->nbs) {
4030 continue;
4031 }
4032
4033 /* Logical VLANs not supported. */
4034 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "vlan.present",
4035 "drop;");
4036
4037 /* Broadcast/multicast source address is invalid. */
4038 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "eth.src[40]",
4039 "drop;");
4040
4041 /* Port security flows have priority 50 (see below) and will continue
4042 * to the next table if packet source is acceptable. */
4043 }
4044
4045 /* Logical switch ingress table 0: Ingress port security - L2
4046 * (priority 50).
4047 * Ingress table 1: Ingress port security - IP (priority 90 and 80)
4048 * Ingress table 2: Ingress port security - ND (priority 90 and 80)
4049 */
4050 struct ovn_port *op;
4051 HMAP_FOR_EACH (op, key_node, ports) {
4052 if (!op->nbsp) {
4053 continue;
4054 }
4055
4056 if (!lsp_is_enabled(op->nbsp)) {
4057 /* Drop packets from disabled logical ports (since logical flow
4058 * tables are default-drop). */
4059 continue;
4060 }
4061
4062 ds_clear(&match);
4063 ds_clear(&actions);
4064 ds_put_format(&match, "inport == %s", op->json_key);
4065 build_port_security_l2("eth.src", op->ps_addrs, op->n_ps_addrs,
4066 &match);
4067
4068 const char *queue_id = smap_get(&op->sb->options, "qdisc_queue_id");
4069 if (queue_id) {
4070 ds_put_format(&actions, "set_queue(%s); ", queue_id);
4071 }
4072 ds_put_cstr(&actions, "next;");
4073 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_L2, 50,
4074 ds_cstr(&match), ds_cstr(&actions));
4075
4076 if (op->nbsp->n_port_security) {
4077 build_port_security_ip(P_IN, op, lflows);
4078 build_port_security_nd(op, lflows);
4079 }
4080 }
4081
4082 /* Ingress table 1 and 2: Port security - IP and ND, by default goto next.
4083 * (priority 0)*/
4084 HMAP_FOR_EACH (od, key_node, datapaths) {
4085 if (!od->nbs) {
4086 continue;
4087 }
4088
4089 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_ND, 0, "1", "next;");
4090 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_IP, 0, "1", "next;");
4091 }
4092
4093 /* Ingress table 11: ARP/ND responder, skip requests coming from localnet
4094 * and vtep ports. (priority 100); see ovn-northd.8.xml for the
4095 * rationale. */
4096 HMAP_FOR_EACH (op, key_node, ports) {
4097 if (!op->nbsp) {
4098 continue;
4099 }
4100
4101 if ((!strcmp(op->nbsp->type, "localnet")) ||
4102 (!strcmp(op->nbsp->type, "vtep"))) {
4103 ds_clear(&match);
4104 ds_put_format(&match, "inport == %s", op->json_key);
4105 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100,
4106 ds_cstr(&match), "next;");
4107 }
4108 }
4109
4110 /* Ingress table 11: ARP/ND responder, reply for known IPs.
4111 * (priority 50). */
4112 HMAP_FOR_EACH (op, key_node, ports) {
4113 if (!op->nbsp) {
4114 continue;
4115 }
4116
4117 /*
4118 * Add ARP/ND reply flows if either the
4119 * - port is up or
4120 * - port type is router or
4121 * - port type is localport
4122 */
4123 if (!lsp_is_up(op->nbsp) && strcmp(op->nbsp->type, "router") &&
4124 strcmp(op->nbsp->type, "localport")) {
4125 continue;
4126 }
4127
4128 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
4129 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
4130 ds_clear(&match);
4131 ds_put_format(&match, "arp.tpa == %s && arp.op == 1",
4132 op->lsp_addrs[i].ipv4_addrs[j].addr_s);
4133 ds_clear(&actions);
4134 ds_put_format(&actions,
4135 "eth.dst = eth.src; "
4136 "eth.src = %s; "
4137 "arp.op = 2; /* ARP reply */ "
4138 "arp.tha = arp.sha; "
4139 "arp.sha = %s; "
4140 "arp.tpa = arp.spa; "
4141 "arp.spa = %s; "
4142 "outport = inport; "
4143 "flags.loopback = 1; "
4144 "output;",
4145 op->lsp_addrs[i].ea_s, op->lsp_addrs[i].ea_s,
4146 op->lsp_addrs[i].ipv4_addrs[j].addr_s);
4147 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50,
4148 ds_cstr(&match), ds_cstr(&actions));
4149
4150 /* Do not reply to an ARP request from the port that owns the
4151 * address (otherwise a DHCP client that ARPs to check for a
4152 * duplicate address will fail). Instead, forward it the usual
4153 * way.
4154 *
4155 * (Another alternative would be to simply drop the packet. If
4156 * everything is working as it is configured, then this would
4157 * produce equivalent results, since no one should reply to the
4158 * request. But ARPing for one's own IP address is intended to
4159 * detect situations where the network is not working as
4160 * configured, so dropping the request would frustrate that
4161 * intent.) */
4162 ds_put_format(&match, " && inport == %s", op->json_key);
4163 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100,
4164 ds_cstr(&match), "next;");
4165 }
4166
4167 /* For ND solicitations, we need to listen for both the
4168 * unicast IPv6 address and its all-nodes multicast address,
4169 * but always respond with the unicast IPv6 address. */
4170 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
4171 ds_clear(&match);
4172 ds_put_format(&match,
4173 "nd_ns && ip6.dst == {%s, %s} && nd.target == %s",
4174 op->lsp_addrs[i].ipv6_addrs[j].addr_s,
4175 op->lsp_addrs[i].ipv6_addrs[j].sn_addr_s,
4176 op->lsp_addrs[i].ipv6_addrs[j].addr_s);
4177
4178 ds_clear(&actions);
4179 ds_put_format(&actions,
4180 "%s { "
4181 "eth.src = %s; "
4182 "ip6.src = %s; "
4183 "nd.target = %s; "
4184 "nd.tll = %s; "
4185 "outport = inport; "
4186 "flags.loopback = 1; "
4187 "output; "
4188 "};",
4189 !strcmp(op->nbsp->type, "router") ?
4190 "nd_na_router" : "nd_na",
4191 op->lsp_addrs[i].ea_s,
4192 op->lsp_addrs[i].ipv6_addrs[j].addr_s,
4193 op->lsp_addrs[i].ipv6_addrs[j].addr_s,
4194 op->lsp_addrs[i].ea_s);
4195 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50,
4196 ds_cstr(&match), ds_cstr(&actions));
4197
4198 /* Do not reply to a solicitation from the port that owns the
4199 * address (otherwise DAD detection will fail). */
4200 ds_put_format(&match, " && inport == %s", op->json_key);
4201 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100,
4202 ds_cstr(&match), "next;");
4203 }
4204 }
4205 }
4206
4207 /* Ingress table 11: ARP/ND responder, by default goto next.
4208 * (priority 0)*/
4209 HMAP_FOR_EACH (od, key_node, datapaths) {
4210 if (!od->nbs) {
4211 continue;
4212 }
4213
4214 ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_ND_RSP, 0, "1", "next;");
4215 }
4216
4217 /* Logical switch ingress table 12 and 13: DHCP options and response
4218 * priority 100 flows. */
4219 HMAP_FOR_EACH (op, key_node, ports) {
4220 if (!op->nbsp) {
4221 continue;
4222 }
4223
4224 if (!lsp_is_enabled(op->nbsp) || !strcmp(op->nbsp->type, "router")) {
4225 /* Don't add the DHCP flows if the port is not enabled or if the
4226 * port is a router port. */
4227 continue;
4228 }
4229
4230 if (!op->nbsp->dhcpv4_options && !op->nbsp->dhcpv6_options) {
4231 /* CMS has disabled both native DHCPv4 and DHCPv6 for this lport.
4232 */
4233 continue;
4234 }
4235
4236 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
4237 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
4238 struct ds options_action = DS_EMPTY_INITIALIZER;
4239 struct ds response_action = DS_EMPTY_INITIALIZER;
4240 struct ds ipv4_addr_match = DS_EMPTY_INITIALIZER;
4241 if (build_dhcpv4_action(
4242 op, op->lsp_addrs[i].ipv4_addrs[j].addr,
4243 &options_action, &response_action, &ipv4_addr_match)) {
4244 ds_clear(&match);
4245 ds_put_format(
4246 &match, "inport == %s && eth.src == %s && "
4247 "ip4.src == 0.0.0.0 && ip4.dst == 255.255.255.255 && "
4248 "udp.src == 68 && udp.dst == 67", op->json_key,
4249 op->lsp_addrs[i].ea_s);
4250
4251 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS,
4252 100, ds_cstr(&match),
4253 ds_cstr(&options_action));
4254 ds_clear(&match);
4255 /* Allow ip4.src = OFFER_IP and
4256 * ip4.dst = {SERVER_IP, 255.255.255.255} for the below
4257 * cases
4258 * - When the client wants to renew the IP by sending
4259 * the DHCPREQUEST to the server ip.
4260 * - When the client wants to renew the IP by
4261 * broadcasting the DHCPREQUEST.
4262 */
4263 ds_put_format(
4264 &match, "inport == %s && eth.src == %s && "
4265 "%s && udp.src == 68 && udp.dst == 67", op->json_key,
4266 op->lsp_addrs[i].ea_s, ds_cstr(&ipv4_addr_match));
4267
4268 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS,
4269 100, ds_cstr(&match),
4270 ds_cstr(&options_action));
4271 ds_clear(&match);
4272
4273 /* If REGBIT_DHCP_OPTS_RESULT is set, it means the
4274 * put_dhcp_opts action is successful. */
4275 ds_put_format(
4276 &match, "inport == %s && eth.src == %s && "
4277 "ip4 && udp.src == 68 && udp.dst == 67"
4278 " && "REGBIT_DHCP_OPTS_RESULT, op->json_key,
4279 op->lsp_addrs[i].ea_s);
4280 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_RESPONSE,
4281 100, ds_cstr(&match),
4282 ds_cstr(&response_action));
4283 ds_destroy(&options_action);
4284 ds_destroy(&response_action);
4285 ds_destroy(&ipv4_addr_match);
4286 break;
4287 }
4288 }
4289
4290 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
4291 struct ds options_action = DS_EMPTY_INITIALIZER;
4292 struct ds response_action = DS_EMPTY_INITIALIZER;
4293 if (build_dhcpv6_action(
4294 op, &op->lsp_addrs[i].ipv6_addrs[j].addr,
4295 &options_action, &response_action)) {
4296 ds_clear(&match);
4297 ds_put_format(
4298 &match, "inport == %s && eth.src == %s"
4299 " && ip6.dst == ff02::1:2 && udp.src == 546 &&"
4300 " udp.dst == 547", op->json_key,
4301 op->lsp_addrs[i].ea_s);
4302
4303 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS, 100,
4304 ds_cstr(&match), ds_cstr(&options_action));
4305
4306 /* If REGBIT_DHCP_OPTS_RESULT is set to 1, it means the
4307 * put_dhcpv6_opts action is successful */
4308 ds_put_cstr(&match, " && "REGBIT_DHCP_OPTS_RESULT);
4309 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_RESPONSE, 100,
4310 ds_cstr(&match), ds_cstr(&response_action));
4311 ds_destroy(&options_action);
4312 ds_destroy(&response_action);
4313 break;
4314 }
4315 }
4316 }
4317 }
4318
4319 /* Logical switch ingress table 14 and 15: DNS lookup and response
4320 * priority 100 flows.
4321 */
4322 HMAP_FOR_EACH (od, key_node, datapaths) {
4323 if (!od->nbs || !ls_has_dns_records(od->nbs)) {
4324 continue;
4325 }
4326
4327 struct ds action = DS_EMPTY_INITIALIZER;
4328
4329 ds_clear(&match);
4330 ds_put_cstr(&match, "udp.dst == 53");
4331 ds_put_format(&action,
4332 REGBIT_DNS_LOOKUP_RESULT" = dns_lookup(); next;");
4333 ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_LOOKUP, 100,
4334 ds_cstr(&match), ds_cstr(&action));
4335 ds_clear(&action);
4336 ds_put_cstr(&match, " && "REGBIT_DNS_LOOKUP_RESULT);
4337 ds_put_format(&action, "eth.dst <-> eth.src; ip4.src <-> ip4.dst; "
4338 "udp.dst = udp.src; udp.src = 53; outport = inport; "
4339 "flags.loopback = 1; output;");
4340 ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_RESPONSE, 100,
4341 ds_cstr(&match), ds_cstr(&action));
4342 ds_clear(&action);
4343 ds_put_format(&action, "eth.dst <-> eth.src; ip6.src <-> ip6.dst; "
4344 "udp.dst = udp.src; udp.src = 53; outport = inport; "
4345 "flags.loopback = 1; output;");
4346 ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_RESPONSE, 100,
4347 ds_cstr(&match), ds_cstr(&action));
4348 ds_destroy(&action);
4349 }
4350
4351 /* Ingress table 12 and 13: DHCP options and response, by default goto
4352 * next. (priority 0).
4353 * Ingress table 14 and 15: DNS lookup and response, by default goto next.
4354 * (priority 0).*/
4355
4356 HMAP_FOR_EACH (od, key_node, datapaths) {
4357 if (!od->nbs) {
4358 continue;
4359 }
4360
4361 ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_OPTIONS, 0, "1", "next;");
4362 ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_RESPONSE, 0, "1", "next;");
4363 ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_LOOKUP, 0, "1", "next;");
4364 ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_RESPONSE, 0, "1", "next;");
4365 }
4366
4367 /* Ingress table 16: Destination lookup, broadcast and multicast handling
4368 * (priority 100). */
4369 HMAP_FOR_EACH (op, key_node, ports) {
4370 if (!op->nbsp) {
4371 continue;
4372 }
4373
4374 if (lsp_is_enabled(op->nbsp)) {
4375 ovn_multicast_add(mcgroups, &mc_flood, op);
4376 }
4377 }
4378 HMAP_FOR_EACH (od, key_node, datapaths) {
4379 if (!od->nbs) {
4380 continue;
4381 }
4382
4383 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 100, "eth.mcast",
4384 "outport = \""MC_FLOOD"\"; output;");
4385 }
4386
4387 /* Ingress table 16: Destination lookup, unicast handling (priority 50), */
4388 HMAP_FOR_EACH (op, key_node, ports) {
4389 if (!op->nbsp) {
4390 continue;
4391 }
4392
4393 for (size_t i = 0; i < op->nbsp->n_addresses; i++) {
4394 /* Addresses are owned by the logical port.
4395 * Ethernet address followed by zero or more IPv4
4396 * or IPv6 addresses (or both). */
4397 struct eth_addr mac;
4398 if (ovs_scan(op->nbsp->addresses[i],
4399 ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) {
4400 ds_clear(&match);
4401 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
4402 ETH_ADDR_ARGS(mac));
4403
4404 ds_clear(&actions);
4405 ds_put_format(&actions, "outport = %s; output;", op->json_key);
4406 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
4407 ds_cstr(&match), ds_cstr(&actions));
4408 } else if (!strcmp(op->nbsp->addresses[i], "unknown")) {
4409 if (lsp_is_enabled(op->nbsp)) {
4410 ovn_multicast_add(mcgroups, &mc_unknown, op);
4411 op->od->has_unknown = true;
4412 }
4413 } else if (is_dynamic_lsp_address(op->nbsp->addresses[i])) {
4414 if (!op->nbsp->dynamic_addresses
4415 || !ovs_scan(op->nbsp->dynamic_addresses,
4416 ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) {
4417 continue;
4418 }
4419 ds_clear(&match);
4420 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
4421 ETH_ADDR_ARGS(mac));
4422
4423 ds_clear(&actions);
4424 ds_put_format(&actions, "outport = %s; output;", op->json_key);
4425 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
4426 ds_cstr(&match), ds_cstr(&actions));
4427 } else if (!strcmp(op->nbsp->addresses[i], "router")) {
4428 if (!op->peer || !op->peer->nbrp
4429 || !ovs_scan(op->peer->nbrp->mac,
4430 ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) {
4431 continue;
4432 }
4433 ds_clear(&match);
4434 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
4435 ETH_ADDR_ARGS(mac));
4436 if (op->peer->od->l3dgw_port
4437 && op->peer == op->peer->od->l3dgw_port
4438 && op->peer->od->l3redirect_port) {
4439 /* The destination lookup flow for the router's
4440 * distributed gateway port MAC address should only be
4441 * programmed on the "redirect-chassis". */
4442 ds_put_format(&match, " && is_chassis_resident(%s)",
4443 op->peer->od->l3redirect_port->json_key);
4444 }
4445
4446 ds_clear(&actions);
4447 ds_put_format(&actions, "outport = %s; output;", op->json_key);
4448 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
4449 ds_cstr(&match), ds_cstr(&actions));
4450
4451 /* Add ethernet addresses specified in NAT rules on
4452 * distributed logical routers. */
4453 if (op->peer->od->l3dgw_port
4454 && op->peer == op->peer->od->l3dgw_port) {
4455 for (int j = 0; j < op->peer->od->nbr->n_nat; j++) {
4456 const struct nbrec_nat *nat
4457 = op->peer->od->nbr->nat[j];
4458 if (!strcmp(nat->type, "dnat_and_snat")
4459 && nat->logical_port && nat->external_mac
4460 && eth_addr_from_string(nat->external_mac, &mac)) {
4461
4462 ds_clear(&match);
4463 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT
4464 " && is_chassis_resident(\"%s\")",
4465 ETH_ADDR_ARGS(mac),
4466 nat->logical_port);
4467
4468 ds_clear(&actions);
4469 ds_put_format(&actions, "outport = %s; output;",
4470 op->json_key);
4471 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP,
4472 50, ds_cstr(&match),
4473 ds_cstr(&actions));
4474 }
4475 }
4476 }
4477 } else {
4478 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
4479
4480 VLOG_INFO_RL(&rl,
4481 "%s: invalid syntax '%s' in addresses column",
4482 op->nbsp->name, op->nbsp->addresses[i]);
4483 }
4484 }
4485 }
4486
4487 /* Ingress table 16: Destination lookup for unknown MACs (priority 0). */
4488 HMAP_FOR_EACH (od, key_node, datapaths) {
4489 if (!od->nbs) {
4490 continue;
4491 }
4492
4493 if (od->has_unknown) {
4494 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1",
4495 "outport = \""MC_UNKNOWN"\"; output;");
4496 }
4497 }
4498
4499 /* Egress tables 8: Egress port security - IP (priority 0)
4500 * Egress table 9: Egress port security L2 - multicast/broadcast
4501 * (priority 100). */
4502 HMAP_FOR_EACH (od, key_node, datapaths) {
4503 if (!od->nbs) {
4504 continue;
4505 }
4506
4507 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_IP, 0, "1", "next;");
4508 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_L2, 100, "eth.mcast",
4509 "output;");
4510 }
4511
4512 /* Egress table 8: Egress port security - IP (priorities 90 and 80)
4513 * if port security enabled.
4514 *
4515 * Egress table 9: Egress port security - L2 (priorities 50 and 150).
4516 *
4517 * Priority 50 rules implement port security for enabled logical port.
4518 *
4519 * Priority 150 rules drop packets to disabled logical ports, so that they
4520 * don't even receive multicast or broadcast packets. */
4521 HMAP_FOR_EACH (op, key_node, ports) {
4522 if (!op->nbsp) {
4523 continue;
4524 }
4525
4526 ds_clear(&match);
4527 ds_put_format(&match, "outport == %s", op->json_key);
4528 if (lsp_is_enabled(op->nbsp)) {
4529 build_port_security_l2("eth.dst", op->ps_addrs, op->n_ps_addrs,
4530 &match);
4531 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 50,
4532 ds_cstr(&match), "output;");
4533 } else {
4534 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 150,
4535 ds_cstr(&match), "drop;");
4536 }
4537
4538 if (op->nbsp->n_port_security) {
4539 build_port_security_ip(P_OUT, op, lflows);
4540 }
4541 }
4542
4543 ds_destroy(&match);
4544 ds_destroy(&actions);
4545 }
4546
4547 static bool
4548 lrport_is_enabled(const struct nbrec_logical_router_port *lrport)
4549 {
4550 return !lrport->enabled || *lrport->enabled;
4551 }
4552
4553 /* Returns a string of the IP address of the router port 'op' that
4554 * overlaps with 'ip_s". If one is not found, returns NULL.
4555 *
4556 * The caller must not free the returned string. */
4557 static const char *
4558 find_lrp_member_ip(const struct ovn_port *op, const char *ip_s)
4559 {
4560 bool is_ipv4 = strchr(ip_s, '.') ? true : false;
4561
4562 if (is_ipv4) {
4563 ovs_be32 ip;
4564
4565 if (!ip_parse(ip_s, &ip)) {
4566 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4567 VLOG_WARN_RL(&rl, "bad ip address %s", ip_s);
4568 return NULL;
4569 }
4570
4571 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
4572 const struct ipv4_netaddr *na = &op->lrp_networks.ipv4_addrs[i];
4573
4574 if (!((na->network ^ ip) & na->mask)) {
4575 /* There should be only 1 interface that matches the
4576 * supplied IP. Otherwise, it's a configuration error,
4577 * because subnets of a router's interfaces should NOT
4578 * overlap. */
4579 return na->addr_s;
4580 }
4581 }
4582 } else {
4583 struct in6_addr ip6;
4584
4585 if (!ipv6_parse(ip_s, &ip6)) {
4586 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4587 VLOG_WARN_RL(&rl, "bad ipv6 address %s", ip_s);
4588 return NULL;
4589 }
4590
4591 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
4592 const struct ipv6_netaddr *na = &op->lrp_networks.ipv6_addrs[i];
4593 struct in6_addr xor_addr = ipv6_addr_bitxor(&na->network, &ip6);
4594 struct in6_addr and_addr = ipv6_addr_bitand(&xor_addr, &na->mask);
4595
4596 if (ipv6_is_zero(&and_addr)) {
4597 /* There should be only 1 interface that matches the
4598 * supplied IP. Otherwise, it's a configuration error,
4599 * because subnets of a router's interfaces should NOT
4600 * overlap. */
4601 return na->addr_s;
4602 }
4603 }
4604 }
4605
4606 return NULL;
4607 }
4608
4609 static void
4610 add_route(struct hmap *lflows, const struct ovn_port *op,
4611 const char *lrp_addr_s, const char *network_s, int plen,
4612 const char *gateway, const char *policy)
4613 {
4614 bool is_ipv4 = strchr(network_s, '.') ? true : false;
4615 struct ds match = DS_EMPTY_INITIALIZER;
4616 const char *dir;
4617 uint16_t priority;
4618
4619 if (policy && !strcmp(policy, "src-ip")) {
4620 dir = "src";
4621 priority = plen * 2;
4622 } else {
4623 dir = "dst";
4624 priority = (plen * 2) + 1;
4625 }
4626
4627 /* IPv6 link-local addresses must be scoped to the local router port. */
4628 if (!is_ipv4) {
4629 struct in6_addr network;
4630 ovs_assert(ipv6_parse(network_s, &network));
4631 if (in6_is_lla(&network)) {
4632 ds_put_format(&match, "inport == %s && ", op->json_key);
4633 }
4634 }
4635 ds_put_format(&match, "ip%s.%s == %s/%d", is_ipv4 ? "4" : "6", dir,
4636 network_s, plen);
4637
4638 struct ds actions = DS_EMPTY_INITIALIZER;
4639 ds_put_format(&actions, "ip.ttl--; %sreg0 = ", is_ipv4 ? "" : "xx");
4640
4641 if (gateway) {
4642 ds_put_cstr(&actions, gateway);
4643 } else {
4644 ds_put_format(&actions, "ip%s.dst", is_ipv4 ? "4" : "6");
4645 }
4646 ds_put_format(&actions, "; "
4647 "%sreg1 = %s; "
4648 "eth.src = %s; "
4649 "outport = %s; "
4650 "flags.loopback = 1; "
4651 "next;",
4652 is_ipv4 ? "" : "xx",
4653 lrp_addr_s,
4654 op->lrp_networks.ea_s,
4655 op->json_key);
4656
4657 /* The priority here is calculated to implement longest-prefix-match
4658 * routing. */
4659 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_ROUTING, priority,
4660 ds_cstr(&match), ds_cstr(&actions));
4661 ds_destroy(&match);
4662 ds_destroy(&actions);
4663 }
4664
4665 static void
4666 build_static_route_flow(struct hmap *lflows, struct ovn_datapath *od,
4667 struct hmap *ports,
4668 const struct nbrec_logical_router_static_route *route)
4669 {
4670 ovs_be32 nexthop;
4671 const char *lrp_addr_s = NULL;
4672 unsigned int plen;
4673 bool is_ipv4;
4674
4675 /* Verify that the next hop is an IP address with an all-ones mask. */
4676 char *error = ip_parse_cidr(route->nexthop, &nexthop, &plen);
4677 if (!error) {
4678 if (plen != 32) {
4679 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4680 VLOG_WARN_RL(&rl, "bad next hop mask %s", route->nexthop);
4681 return;
4682 }
4683 is_ipv4 = true;
4684 } else {
4685 free(error);
4686
4687 struct in6_addr ip6;
4688 error = ipv6_parse_cidr(route->nexthop, &ip6, &plen);
4689 if (!error) {
4690 if (plen != 128) {
4691 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4692 VLOG_WARN_RL(&rl, "bad next hop mask %s", route->nexthop);
4693 return;
4694 }
4695 is_ipv4 = false;
4696 } else {
4697 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4698 VLOG_WARN_RL(&rl, "bad next hop ip address %s", route->nexthop);
4699 free(error);
4700 return;
4701 }
4702 }
4703
4704 char *prefix_s;
4705 if (is_ipv4) {
4706 ovs_be32 prefix;
4707 /* Verify that ip prefix is a valid IPv4 address. */
4708 error = ip_parse_cidr(route->ip_prefix, &prefix, &plen);
4709 if (error) {
4710 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4711 VLOG_WARN_RL(&rl, "bad 'ip_prefix' in static routes %s",
4712 route->ip_prefix);
4713 free(error);
4714 return;
4715 }
4716 prefix_s = xasprintf(IP_FMT, IP_ARGS(prefix & be32_prefix_mask(plen)));
4717 } else {
4718 /* Verify that ip prefix is a valid IPv6 address. */
4719 struct in6_addr prefix;
4720 error = ipv6_parse_cidr(route->ip_prefix, &prefix, &plen);
4721 if (error) {
4722 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4723 VLOG_WARN_RL(&rl, "bad 'ip_prefix' in static routes %s",
4724 route->ip_prefix);
4725 free(error);
4726 return;
4727 }
4728 struct in6_addr mask = ipv6_create_mask(plen);
4729 struct in6_addr network = ipv6_addr_bitand(&prefix, &mask);
4730 prefix_s = xmalloc(INET6_ADDRSTRLEN);
4731 inet_ntop(AF_INET6, &network, prefix_s, INET6_ADDRSTRLEN);
4732 }
4733
4734 /* Find the outgoing port. */
4735 struct ovn_port *out_port = NULL;
4736 if (route->output_port) {
4737 out_port = ovn_port_find(ports, route->output_port);
4738 if (!out_port) {
4739 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4740 VLOG_WARN_RL(&rl, "Bad out port %s for static route %s",
4741 route->output_port, route->ip_prefix);
4742 goto free_prefix_s;
4743 }
4744 lrp_addr_s = find_lrp_member_ip(out_port, route->nexthop);
4745 if (!lrp_addr_s) {
4746 /* There are no IP networks configured on the router's port via
4747 * which 'route->nexthop' is theoretically reachable. But since
4748 * 'out_port' has been specified, we honor it by trying to reach
4749 * 'route->nexthop' via the first IP address of 'out_port'.
4750 * (There are cases, e.g in GCE, where each VM gets a /32 IP
4751 * address and the default gateway is still reachable from it.) */
4752 if (is_ipv4) {
4753 if (out_port->lrp_networks.n_ipv4_addrs) {
4754 lrp_addr_s = out_port->lrp_networks.ipv4_addrs[0].addr_s;
4755 }
4756 } else {
4757 if (out_port->lrp_networks.n_ipv6_addrs) {
4758 lrp_addr_s = out_port->lrp_networks.ipv6_addrs[0].addr_s;
4759 }
4760 }
4761 }
4762 } else {
4763 /* output_port is not specified, find the
4764 * router port matching the next hop. */
4765 int i;
4766 for (i = 0; i < od->nbr->n_ports; i++) {
4767 struct nbrec_logical_router_port *lrp = od->nbr->ports[i];
4768 out_port = ovn_port_find(ports, lrp->name);
4769 if (!out_port) {
4770 /* This should not happen. */
4771 continue;
4772 }
4773
4774 lrp_addr_s = find_lrp_member_ip(out_port, route->nexthop);
4775 if (lrp_addr_s) {
4776 break;
4777 }
4778 }
4779 }
4780
4781 if (!out_port || !lrp_addr_s) {
4782 /* There is no matched out port. */
4783 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4784 VLOG_WARN_RL(&rl, "No path for static route %s; next hop %s",
4785 route->ip_prefix, route->nexthop);
4786 goto free_prefix_s;
4787 }
4788
4789 char *policy = route->policy ? route->policy : "dst-ip";
4790 add_route(lflows, out_port, lrp_addr_s, prefix_s, plen, route->nexthop,
4791 policy);
4792
4793 free_prefix_s:
4794 free(prefix_s);
4795 }
4796
4797 static void
4798 op_put_v4_networks(struct ds *ds, const struct ovn_port *op, bool add_bcast)
4799 {
4800 if (!add_bcast && op->lrp_networks.n_ipv4_addrs == 1) {
4801 ds_put_format(ds, "%s", op->lrp_networks.ipv4_addrs[0].addr_s);
4802 return;
4803 }
4804
4805 ds_put_cstr(ds, "{");
4806 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
4807 ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].addr_s);
4808 if (add_bcast) {
4809 ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].bcast_s);
4810 }
4811 }
4812 ds_chomp(ds, ' ');
4813 ds_chomp(ds, ',');
4814 ds_put_cstr(ds, "}");
4815 }
4816
4817 static void
4818 op_put_v6_networks(struct ds *ds, const struct ovn_port *op)
4819 {
4820 if (op->lrp_networks.n_ipv6_addrs == 1) {
4821 ds_put_format(ds, "%s", op->lrp_networks.ipv6_addrs[0].addr_s);
4822 return;
4823 }
4824
4825 ds_put_cstr(ds, "{");
4826 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
4827 ds_put_format(ds, "%s, ", op->lrp_networks.ipv6_addrs[i].addr_s);
4828 }
4829 ds_chomp(ds, ' ');
4830 ds_chomp(ds, ',');
4831 ds_put_cstr(ds, "}");
4832 }
4833
4834 static const char *
4835 get_force_snat_ip(struct ovn_datapath *od, const char *key_type, ovs_be32 *ip)
4836 {
4837 char *key = xasprintf("%s_force_snat_ip", key_type);
4838 const char *ip_address = smap_get(&od->nbr->options, key);
4839 free(key);
4840
4841 if (ip_address) {
4842 ovs_be32 mask;
4843 char *error = ip_parse_masked(ip_address, ip, &mask);
4844 if (error || mask != OVS_BE32_MAX) {
4845 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4846 VLOG_WARN_RL(&rl, "bad ip %s in options of router "UUID_FMT"",
4847 ip_address, UUID_ARGS(&od->key));
4848 free(error);
4849 *ip = 0;
4850 return NULL;
4851 }
4852 return ip_address;
4853 }
4854
4855 *ip = 0;
4856 return NULL;
4857 }
4858
4859 static void
4860 add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od,
4861 struct ds *match, struct ds *actions, int priority,
4862 const char *lb_force_snat_ip, char *backend_ips,
4863 bool is_udp, int addr_family)
4864 {
4865 /* A match and actions for new connections. */
4866 char *new_match = xasprintf("ct.new && %s", ds_cstr(match));
4867 if (lb_force_snat_ip) {
4868 char *new_actions = xasprintf("flags.force_snat_for_lb = 1; %s",
4869 ds_cstr(actions));
4870 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, new_match,
4871 new_actions);
4872 free(new_actions);
4873 } else {
4874 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, new_match,
4875 ds_cstr(actions));
4876 }
4877
4878 /* A match and actions for established connections. */
4879 char *est_match = xasprintf("ct.est && %s", ds_cstr(match));
4880 if (lb_force_snat_ip) {
4881 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, est_match,
4882 "flags.force_snat_for_lb = 1; ct_dnat;");
4883 } else {
4884 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, est_match,
4885 "ct_dnat;");
4886 }
4887
4888 free(new_match);
4889 free(est_match);
4890
4891 if (!od->l3dgw_port || !od->l3redirect_port || !backend_ips) {
4892 return;
4893 }
4894
4895 /* Add logical flows to UNDNAT the load balanced reverse traffic in
4896 * the router egress pipleine stage - S_ROUTER_OUT_UNDNAT if the logical
4897 * router has a gateway router port associated.
4898 */
4899 struct ds undnat_match = DS_EMPTY_INITIALIZER;
4900 if (addr_family == AF_INET) {
4901 ds_put_cstr(&undnat_match, "ip4 && (");
4902 } else {
4903 ds_put_cstr(&undnat_match, "ip6 && (");
4904 }
4905 char *start, *next, *ip_str;
4906 start = next = xstrdup(backend_ips);
4907 ip_str = strsep(&next, ",");
4908 bool backend_ips_found = false;
4909 while (ip_str && ip_str[0]) {
4910 char *ip_address = NULL;
4911 uint16_t port = 0;
4912 int addr_family_;
4913 ip_address_and_port_from_lb_key(ip_str, &ip_address, &port,
4914 &addr_family_);
4915 if (!ip_address) {
4916 break;
4917 }
4918
4919 if (addr_family_ == AF_INET) {
4920 ds_put_format(&undnat_match, "(ip4.src == %s", ip_address);
4921 } else {
4922 ds_put_format(&undnat_match, "(ip6.src == %s", ip_address);
4923 }
4924 free(ip_address);
4925 if (port) {
4926 ds_put_format(&undnat_match, " && %s.src == %d) || ",
4927 is_udp ? "udp" : "tcp", port);
4928 } else {
4929 ds_put_cstr(&undnat_match, ") || ");
4930 }
4931 ip_str = strsep(&next, ",");
4932 backend_ips_found = true;
4933 }
4934
4935 free(start);
4936 if (!backend_ips_found) {
4937 ds_destroy(&undnat_match);
4938 return;
4939 }
4940 ds_chomp(&undnat_match, ' ');
4941 ds_chomp(&undnat_match, '|');
4942 ds_chomp(&undnat_match, '|');
4943 ds_chomp(&undnat_match, ' ');
4944 ds_put_format(&undnat_match, ") && outport == %s && "
4945 "is_chassis_resident(%s)", od->l3dgw_port->json_key,
4946 od->l3redirect_port->json_key);
4947 if (lb_force_snat_ip) {
4948 ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 120,
4949 ds_cstr(&undnat_match),
4950 "flags.force_snat_for_lb = 1; ct_dnat;");
4951 } else {
4952 ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 120,
4953 ds_cstr(&undnat_match), "ct_dnat;");
4954 }
4955
4956 ds_destroy(&undnat_match);
4957 }
4958
4959 #define ND_RA_MAX_INTERVAL_MAX 1800
4960 #define ND_RA_MAX_INTERVAL_MIN 4
4961
4962 #define ND_RA_MIN_INTERVAL_MAX(max) ((max) * 3 / 4)
4963 #define ND_RA_MIN_INTERVAL_MIN 3
4964
4965 static void
4966 copy_ra_to_sb(struct ovn_port *op, const char *address_mode)
4967 {
4968 struct smap options;
4969 smap_clone(&options, &op->sb->options);
4970
4971 smap_add(&options, "ipv6_ra_send_periodic", "true");
4972 smap_add(&options, "ipv6_ra_address_mode", address_mode);
4973
4974 int max_interval = smap_get_int(&op->nbrp->ipv6_ra_configs,
4975 "max_interval", ND_RA_MAX_INTERVAL_DEFAULT);
4976 if (max_interval > ND_RA_MAX_INTERVAL_MAX) {
4977 max_interval = ND_RA_MAX_INTERVAL_MAX;
4978 }
4979 if (max_interval < ND_RA_MAX_INTERVAL_MIN) {
4980 max_interval = ND_RA_MAX_INTERVAL_MIN;
4981 }
4982 smap_add_format(&options, "ipv6_ra_max_interval", "%d", max_interval);
4983
4984 int min_interval = smap_get_int(&op->nbrp->ipv6_ra_configs,
4985 "min_interval", nd_ra_min_interval_default(max_interval));
4986 if (min_interval > ND_RA_MIN_INTERVAL_MAX(max_interval)) {
4987 min_interval = ND_RA_MIN_INTERVAL_MAX(max_interval);
4988 }
4989 if (min_interval < ND_RA_MIN_INTERVAL_MIN) {
4990 min_interval = ND_RA_MIN_INTERVAL_MIN;
4991 }
4992 smap_add_format(&options, "ipv6_ra_min_interval", "%d", min_interval);
4993
4994 int mtu = smap_get_int(&op->nbrp->ipv6_ra_configs, "mtu", ND_MTU_DEFAULT);
4995 /* RFC 2460 requires the MTU for IPv6 to be at least 1280 */
4996 if (mtu && mtu >= 1280) {
4997 smap_add_format(&options, "ipv6_ra_mtu", "%d", mtu);
4998 }
4999
5000 struct ds s = DS_EMPTY_INITIALIZER;
5001 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; ++i) {
5002 struct ipv6_netaddr *addrs = &op->lrp_networks.ipv6_addrs[i];
5003 if (in6_is_lla(&addrs->network)) {
5004 smap_add(&options, "ipv6_ra_src_addr", addrs->addr_s);
5005 continue;
5006 }
5007 ds_put_format(&s, "%s/%u ", addrs->network_s, addrs->plen);
5008 }
5009 /* Remove trailing space */
5010 ds_chomp(&s, ' ');
5011 smap_add(&options, "ipv6_ra_prefixes", ds_cstr(&s));
5012 ds_destroy(&s);
5013
5014 smap_add(&options, "ipv6_ra_src_eth", op->lrp_networks.ea_s);
5015
5016 sbrec_port_binding_set_options(op->sb, &options);
5017 smap_destroy(&options);
5018 }
5019
5020 static void
5021 build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
5022 struct hmap *lflows)
5023 {
5024 /* This flow table structure is documented in ovn-northd(8), so please
5025 * update ovn-northd.8.xml if you change anything. */
5026
5027 struct ds match = DS_EMPTY_INITIALIZER;
5028 struct ds actions = DS_EMPTY_INITIALIZER;
5029
5030 /* Logical router ingress table 0: Admission control framework. */
5031 struct ovn_datapath *od;
5032 HMAP_FOR_EACH (od, key_node, datapaths) {
5033 if (!od->nbr) {
5034 continue;
5035 }
5036
5037 /* Logical VLANs not supported.
5038 * Broadcast/multicast source address is invalid. */
5039 ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100,
5040 "vlan.present || eth.src[40]", "drop;");
5041 }
5042
5043 /* Logical router ingress table 0: match (priority 50). */
5044 struct ovn_port *op;
5045 HMAP_FOR_EACH (op, key_node, ports) {
5046 if (!op->nbrp) {
5047 continue;
5048 }
5049
5050 if (!lrport_is_enabled(op->nbrp)) {
5051 /* Drop packets from disabled logical ports (since logical flow
5052 * tables are default-drop). */
5053 continue;
5054 }
5055
5056 if (op->derived) {
5057 /* No ingress packets should be received on a chassisredirect
5058 * port. */
5059 continue;
5060 }
5061
5062 ds_clear(&match);
5063 ds_put_format(&match, "eth.mcast && inport == %s", op->json_key);
5064 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
5065 ds_cstr(&match), "next;");
5066
5067 ds_clear(&match);
5068 ds_put_format(&match, "eth.dst == %s && inport == %s",
5069 op->lrp_networks.ea_s, op->json_key);
5070 if (op->od->l3dgw_port && op == op->od->l3dgw_port
5071 && op->od->l3redirect_port) {
5072 /* Traffic with eth.dst = l3dgw_port->lrp_networks.ea_s
5073 * should only be received on the "redirect-chassis". */
5074 ds_put_format(&match, " && is_chassis_resident(%s)",
5075 op->od->l3redirect_port->json_key);
5076 }
5077 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
5078 ds_cstr(&match), "next;");
5079 }
5080
5081 /* Logical router ingress table 1: IP Input. */
5082 HMAP_FOR_EACH (od, key_node, datapaths) {
5083 if (!od->nbr) {
5084 continue;
5085 }
5086
5087 /* L3 admission control: drop multicast and broadcast source, localhost
5088 * source or destination, and zero network source or destination
5089 * (priority 100). */
5090 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100,
5091 "ip4.mcast || "
5092 "ip4.src == 255.255.255.255 || "
5093 "ip4.src == 127.0.0.0/8 || "
5094 "ip4.dst == 127.0.0.0/8 || "
5095 "ip4.src == 0.0.0.0/8 || "
5096 "ip4.dst == 0.0.0.0/8",
5097 "drop;");
5098
5099 /* ARP reply handling. Use ARP replies to populate the logical
5100 * router's ARP table. */
5101 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "arp.op == 2",
5102 "put_arp(inport, arp.spa, arp.sha);");
5103
5104 /* Drop Ethernet local broadcast. By definition this traffic should
5105 * not be forwarded.*/
5106 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
5107 "eth.bcast", "drop;");
5108
5109 /* TTL discard */
5110 ds_clear(&match);
5111 ds_put_cstr(&match, "ip4 && ip.ttl == {0, 1}");
5112 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30,
5113 ds_cstr(&match), "drop;");
5114
5115 /* ND advertisement handling. Use advertisements to populate
5116 * the logical router's ARP/ND table. */
5117 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "nd_na",
5118 "put_nd(inport, nd.target, nd.tll);");
5119
5120 /* Lean from neighbor solicitations that were not directed at
5121 * us. (A priority-90 flow will respond to requests to us and
5122 * learn the sender's mac address. */
5123 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 80, "nd_ns",
5124 "put_nd(inport, ip6.src, nd.sll);");
5125
5126 /* Pass other traffic not already handled to the next table for
5127 * routing. */
5128 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;");
5129 }
5130
5131 /* Logical router ingress table 1: IP Input for IPv4. */
5132 HMAP_FOR_EACH (op, key_node, ports) {
5133 if (!op->nbrp) {
5134 continue;
5135 }
5136
5137 if (op->derived) {
5138 /* No ingress packets are accepted on a chassisredirect
5139 * port, so no need to program flows for that port. */
5140 continue;
5141 }
5142
5143 if (op->lrp_networks.n_ipv4_addrs) {
5144 /* L3 admission control: drop packets that originate from an
5145 * IPv4 address owned by the router or a broadcast address
5146 * known to the router (priority 100). */
5147 ds_clear(&match);
5148 ds_put_cstr(&match, "ip4.src == ");
5149 op_put_v4_networks(&match, op, true);
5150 ds_put_cstr(&match, " && "REGBIT_EGRESS_LOOPBACK" == 0");
5151 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
5152 ds_cstr(&match), "drop;");
5153
5154 /* ICMP echo reply. These flows reply to ICMP echo requests
5155 * received for the router's IP address. Since packets only
5156 * get here as part of the logical router datapath, the inport
5157 * (i.e. the incoming locally attached net) does not matter.
5158 * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */
5159 ds_clear(&match);
5160 ds_put_cstr(&match, "ip4.dst == ");
5161 op_put_v4_networks(&match, op, false);
5162 ds_put_cstr(&match, " && icmp4.type == 8 && icmp4.code == 0");
5163
5164 ds_clear(&actions);
5165 ds_put_format(&actions,
5166 "ip4.dst <-> ip4.src; "
5167 "ip.ttl = 255; "
5168 "icmp4.type = 0; "
5169 "flags.loopback = 1; "
5170 "next; ");
5171 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
5172 ds_cstr(&match), ds_cstr(&actions));
5173 }
5174
5175 /* ICMP time exceeded */
5176 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
5177 ds_clear(&match);
5178 ds_clear(&actions);
5179
5180 ds_put_format(&match,
5181 "inport == %s && ip4 && "
5182 "ip.ttl == {0, 1} && !ip.later_frag", op->json_key);
5183 ds_put_format(&actions,
5184 "icmp4 {"
5185 "eth.dst <-> eth.src; "
5186 "icmp4.type = 11; /* Time exceeded */ "
5187 "icmp4.code = 0; /* TTL exceeded in transit */ "
5188 "ip4.dst = ip4.src; "
5189 "ip4.src = %s; "
5190 "ip.ttl = 255; "
5191 "next; };",
5192 op->lrp_networks.ipv4_addrs[i].addr_s);
5193 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 40,
5194 ds_cstr(&match), ds_cstr(&actions));
5195 }
5196
5197 /* ARP reply. These flows reply to ARP requests for the router's own
5198 * IP address. */
5199 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
5200 ds_clear(&match);
5201 ds_put_format(&match,
5202 "inport == %s && arp.spa == %s/%u && arp.tpa == %s"
5203 " && arp.op == 1",
5204 op->json_key,
5205 op->lrp_networks.ipv4_addrs[i].network_s,
5206 op->lrp_networks.ipv4_addrs[i].plen,
5207 op->lrp_networks.ipv4_addrs[i].addr_s);
5208 if (op->od->l3dgw_port && op == op->od->l3dgw_port
5209 && op->od->l3redirect_port) {
5210 /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
5211 * should only be sent from the "redirect-chassis", so that
5212 * upstream MAC learning points to the "redirect-chassis".
5213 * Also need to avoid generation of multiple ARP responses
5214 * from different chassis. */
5215 ds_put_format(&match, " && is_chassis_resident(%s)",
5216 op->od->l3redirect_port->json_key);
5217 }
5218
5219 ds_clear(&actions);
5220 ds_put_format(&actions,
5221 "put_arp(inport, arp.spa, arp.sha); "
5222 "eth.dst = eth.src; "
5223 "eth.src = %s; "
5224 "arp.op = 2; /* ARP reply */ "
5225 "arp.tha = arp.sha; "
5226 "arp.sha = %s; "
5227 "arp.tpa = arp.spa; "
5228 "arp.spa = %s; "
5229 "outport = %s; "
5230 "flags.loopback = 1; "
5231 "output;",
5232 op->lrp_networks.ea_s,
5233 op->lrp_networks.ea_s,
5234 op->lrp_networks.ipv4_addrs[i].addr_s,
5235 op->json_key);
5236 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
5237 ds_cstr(&match), ds_cstr(&actions));
5238 }
5239
5240 /* Learn from ARP requests that were not directed at us. A typical
5241 * use case is GARP request handling. (A priority-90 flow will
5242 * respond to request to us and learn the sender's mac address.) */
5243 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
5244 ds_clear(&match);
5245 ds_put_format(&match,
5246 "inport == %s && arp.spa == %s/%u && arp.op == 1",
5247 op->json_key,
5248 op->lrp_networks.ipv4_addrs[i].network_s,
5249 op->lrp_networks.ipv4_addrs[i].plen);
5250 if (op->od->l3dgw_port && op == op->od->l3dgw_port
5251 && op->od->l3redirect_port) {
5252 ds_put_format(&match, " && is_chassis_resident(%s)",
5253 op->od->l3redirect_port->json_key);
5254 }
5255 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 80,
5256 ds_cstr(&match),
5257 "put_arp(inport, arp.spa, arp.sha);");
5258
5259 }
5260
5261 /* A set to hold all load-balancer vips that need ARP responses. */
5262 struct sset all_ips = SSET_INITIALIZER(&all_ips);
5263 int addr_family;
5264 get_router_load_balancer_ips(op->od, &all_ips, &addr_family);
5265
5266 const char *ip_address;
5267 SSET_FOR_EACH(ip_address, &all_ips) {
5268 ds_clear(&match);
5269 if (addr_family == AF_INET) {
5270 ds_put_format(&match,
5271 "inport == %s && arp.tpa == %s && arp.op == 1",
5272 op->json_key, ip_address);
5273 } else {
5274 ds_put_format(&match,
5275 "inport == %s && nd_ns && nd.target == %s",
5276 op->json_key, ip_address);
5277 }
5278
5279 ds_clear(&actions);
5280 if (addr_family == AF_INET) {
5281 ds_put_format(&actions,
5282 "eth.dst = eth.src; "
5283 "eth.src = %s; "
5284 "arp.op = 2; /* ARP reply */ "
5285 "arp.tha = arp.sha; "
5286 "arp.sha = %s; "
5287 "arp.tpa = arp.spa; "
5288 "arp.spa = %s; "
5289 "outport = %s; "
5290 "flags.loopback = 1; "
5291 "output;",
5292 op->lrp_networks.ea_s,
5293 op->lrp_networks.ea_s,
5294 ip_address,
5295 op->json_key);
5296 } else {
5297 ds_put_format(&actions,
5298 "nd_na { "
5299 "eth.src = %s; "
5300 "ip6.src = %s; "
5301 "nd.target = %s; "
5302 "nd.tll = %s; "
5303 "outport = inport; "
5304 "flags.loopback = 1; "
5305 "output; "
5306 "};",
5307 op->lrp_networks.ea_s,
5308 ip_address,
5309 ip_address,
5310 op->lrp_networks.ea_s);
5311 }
5312 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
5313 ds_cstr(&match), ds_cstr(&actions));
5314 }
5315
5316 sset_destroy(&all_ips);
5317
5318 /* A gateway router can have 2 SNAT IP addresses to force DNATed and
5319 * LBed traffic respectively to be SNATed. In addition, there can be
5320 * a number of SNAT rules in the NAT table. */
5321 ovs_be32 *snat_ips = xmalloc(sizeof *snat_ips *
5322 (op->od->nbr->n_nat + 2));
5323 size_t n_snat_ips = 0;
5324
5325 ovs_be32 snat_ip;
5326 const char *dnat_force_snat_ip = get_force_snat_ip(op->od, "dnat",
5327 &snat_ip);
5328 if (dnat_force_snat_ip) {
5329 snat_ips[n_snat_ips++] = snat_ip;
5330 }
5331
5332 const char *lb_force_snat_ip = get_force_snat_ip(op->od, "lb",
5333 &snat_ip);
5334 if (lb_force_snat_ip) {
5335 snat_ips[n_snat_ips++] = snat_ip;
5336 }
5337
5338 for (int i = 0; i < op->od->nbr->n_nat; i++) {
5339 const struct nbrec_nat *nat;
5340
5341 nat = op->od->nbr->nat[i];
5342
5343 ovs_be32 ip;
5344 if (!ip_parse(nat->external_ip, &ip) || !ip) {
5345 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
5346 VLOG_WARN_RL(&rl, "bad ip address %s in nat configuration "
5347 "for router %s", nat->external_ip, op->key);
5348 continue;
5349 }
5350
5351 if (!strcmp(nat->type, "snat")) {
5352 snat_ips[n_snat_ips++] = ip;
5353 continue;
5354 }
5355
5356 /* ARP handling for external IP addresses.
5357 *
5358 * DNAT IP addresses are external IP addresses that need ARP
5359 * handling. */
5360 ds_clear(&match);
5361 ds_put_format(&match,
5362 "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1",
5363 op->json_key, IP_ARGS(ip));
5364
5365 ds_clear(&actions);
5366 ds_put_format(&actions,
5367 "eth.dst = eth.src; "
5368 "arp.op = 2; /* ARP reply */ "
5369 "arp.tha = arp.sha; ");
5370
5371 if (op->od->l3dgw_port && op == op->od->l3dgw_port) {
5372 struct eth_addr mac;
5373 if (nat->external_mac &&
5374 eth_addr_from_string(nat->external_mac, &mac)
5375 && nat->logical_port) {
5376 /* distributed NAT case, use nat->external_mac */
5377 ds_put_format(&actions,
5378 "eth.src = "ETH_ADDR_FMT"; "
5379 "arp.sha = "ETH_ADDR_FMT"; ",
5380 ETH_ADDR_ARGS(mac),
5381 ETH_ADDR_ARGS(mac));
5382 /* Traffic with eth.src = nat->external_mac should only be
5383 * sent from the chassis where nat->logical_port is
5384 * resident, so that upstream MAC learning points to the
5385 * correct chassis. Also need to avoid generation of
5386 * multiple ARP responses from different chassis. */
5387 ds_put_format(&match, " && is_chassis_resident(\"%s\")",
5388 nat->logical_port);
5389 } else {
5390 ds_put_format(&actions,
5391 "eth.src = %s; "
5392 "arp.sha = %s; ",
5393 op->lrp_networks.ea_s,
5394 op->lrp_networks.ea_s);
5395 /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
5396 * should only be sent from the "redirect-chassis", so that
5397 * upstream MAC learning points to the "redirect-chassis".
5398 * Also need to avoid generation of multiple ARP responses
5399 * from different chassis. */
5400 if (op->od->l3redirect_port) {
5401 ds_put_format(&match, " && is_chassis_resident(%s)",
5402 op->od->l3redirect_port->json_key);
5403 }
5404 }
5405 } else {
5406 ds_put_format(&actions,
5407 "eth.src = %s; "
5408 "arp.sha = %s; ",
5409 op->lrp_networks.ea_s,
5410 op->lrp_networks.ea_s);
5411 }
5412 ds_put_format(&actions,
5413 "arp.tpa = arp.spa; "
5414 "arp.spa = "IP_FMT"; "
5415 "outport = %s; "
5416 "flags.loopback = 1; "
5417 "output;",
5418 IP_ARGS(ip),
5419 op->json_key);
5420 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
5421 ds_cstr(&match), ds_cstr(&actions));
5422 }
5423
5424 if (!smap_get(&op->od->nbr->options, "chassis")
5425 && !op->od->l3dgw_port) {
5426 /* UDP/TCP port unreachable. */
5427 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
5428 ds_clear(&match);
5429 ds_put_format(&match,
5430 "ip4 && ip4.dst == %s && !ip.later_frag && udp",
5431 op->lrp_networks.ipv4_addrs[i].addr_s);
5432 const char *action = "icmp4 {"
5433 "eth.dst <-> eth.src; "
5434 "ip4.dst <-> ip4.src; "
5435 "ip.ttl = 255; "
5436 "icmp4.type = 3; "
5437 "icmp4.code = 3; "
5438 "next; };";
5439 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 80,
5440 ds_cstr(&match), action);
5441
5442 ds_clear(&match);
5443 ds_put_format(&match,
5444 "ip4 && ip4.dst == %s && !ip.later_frag && tcp",
5445 op->lrp_networks.ipv4_addrs[i].addr_s);
5446 action = "tcp_reset {"
5447 "eth.dst <-> eth.src; "
5448 "ip4.dst <-> ip4.src; "
5449 "next; };";
5450 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 80,
5451 ds_cstr(&match), action);
5452
5453 ds_clear(&match);
5454 ds_put_format(&match,
5455 "ip4 && ip4.dst == %s && !ip.later_frag",
5456 op->lrp_networks.ipv4_addrs[i].addr_s);
5457 action = "icmp4 {"
5458 "eth.dst <-> eth.src; "
5459 "ip4.dst <-> ip4.src; "
5460 "ip.ttl = 255; "
5461 "icmp4.type = 3; "
5462 "icmp4.code = 2; "
5463 "next; };";
5464 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 70,
5465 ds_cstr(&match), action);
5466 }
5467 }
5468
5469 ds_clear(&match);
5470 ds_put_cstr(&match, "ip4.dst == {");
5471 bool has_drop_ips = false;
5472 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
5473 bool snat_ip_is_router_ip = false;
5474 for (int j = 0; j < n_snat_ips; j++) {
5475 /* Packets to SNAT IPs should not be dropped. */
5476 if (op->lrp_networks.ipv4_addrs[i].addr == snat_ips[j]) {
5477 snat_ip_is_router_ip = true;
5478 break;
5479 }
5480 }
5481 if (snat_ip_is_router_ip) {
5482 continue;
5483 }
5484 ds_put_format(&match, "%s, ",
5485 op->lrp_networks.ipv4_addrs[i].addr_s);
5486 has_drop_ips = true;
5487 }
5488 ds_chomp(&match, ' ');
5489 ds_chomp(&match, ',');
5490 ds_put_cstr(&match, "}");
5491
5492 if (has_drop_ips) {
5493 /* Drop IP traffic to this router. */
5494 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
5495 ds_cstr(&match), "drop;");
5496 }
5497
5498 free(snat_ips);
5499 }
5500
5501 /* Logical router ingress table 1: IP Input for IPv6. */
5502 HMAP_FOR_EACH (op, key_node, ports) {
5503 if (!op->nbrp) {
5504 continue;
5505 }
5506
5507 if (op->derived) {
5508 /* No ingress packets are accepted on a chassisredirect
5509 * port, so no need to program flows for that port. */
5510 continue;
5511 }
5512
5513 if (op->lrp_networks.n_ipv6_addrs) {
5514 /* L3 admission control: drop packets that originate from an
5515 * IPv6 address owned by the router (priority 100). */
5516 ds_clear(&match);
5517 ds_put_cstr(&match, "ip6.src == ");
5518 op_put_v6_networks(&match, op);
5519 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
5520 ds_cstr(&match), "drop;");
5521
5522 /* ICMPv6 echo reply. These flows reply to echo requests
5523 * received for the router's IP address. */
5524 ds_clear(&match);
5525 ds_put_cstr(&match, "ip6.dst == ");
5526 op_put_v6_networks(&match, op);
5527 ds_put_cstr(&match, " && icmp6.type == 128 && icmp6.code == 0");
5528
5529 ds_clear(&actions);
5530 ds_put_cstr(&actions,
5531 "ip6.dst <-> ip6.src; "
5532 "ip.ttl = 255; "
5533 "icmp6.type = 129; "
5534 "flags.loopback = 1; "
5535 "next; ");
5536 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
5537 ds_cstr(&match), ds_cstr(&actions));
5538
5539 /* Drop IPv6 traffic to this router. */
5540 ds_clear(&match);
5541 ds_put_cstr(&match, "ip6.dst == ");
5542 op_put_v6_networks(&match, op);
5543 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
5544 ds_cstr(&match), "drop;");
5545 }
5546
5547 /* ND reply. These flows reply to ND solicitations for the
5548 * router's own IP address. */
5549 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
5550 ds_clear(&match);
5551 ds_put_format(&match,
5552 "inport == %s && nd_ns && ip6.dst == {%s, %s} "
5553 "&& nd.target == %s",
5554 op->json_key,
5555 op->lrp_networks.ipv6_addrs[i].addr_s,
5556 op->lrp_networks.ipv6_addrs[i].sn_addr_s,
5557 op->lrp_networks.ipv6_addrs[i].addr_s);
5558 if (op->od->l3dgw_port && op == op->od->l3dgw_port
5559 && op->od->l3redirect_port) {
5560 /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
5561 * should only be sent from the "redirect-chassis", so that
5562 * upstream MAC learning points to the "redirect-chassis".
5563 * Also need to avoid generation of multiple ND replies
5564 * from different chassis. */
5565 ds_put_format(&match, " && is_chassis_resident(%s)",
5566 op->od->l3redirect_port->json_key);
5567 }
5568
5569 ds_clear(&actions);
5570 ds_put_format(&actions,
5571 "put_nd(inport, ip6.src, nd.sll); "
5572 "nd_na_router { "
5573 "eth.src = %s; "
5574 "ip6.src = %s; "
5575 "nd.target = %s; "
5576 "nd.tll = %s; "
5577 "outport = inport; "
5578 "flags.loopback = 1; "
5579 "output; "
5580 "};",
5581 op->lrp_networks.ea_s,
5582 op->lrp_networks.ipv6_addrs[i].addr_s,
5583 op->lrp_networks.ipv6_addrs[i].addr_s,
5584 op->lrp_networks.ea_s);
5585 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
5586 ds_cstr(&match), ds_cstr(&actions));
5587 }
5588
5589 /* UDP/TCP port unreachable */
5590 if (!smap_get(&op->od->nbr->options, "chassis")
5591 && !op->od->l3dgw_port) {
5592 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
5593 ds_clear(&match);
5594 ds_put_format(&match,
5595 "ip6 && ip6.dst == %s && !ip.later_frag && tcp",
5596 op->lrp_networks.ipv6_addrs[i].addr_s);
5597 const char *action = "tcp_reset {"
5598 "eth.dst <-> eth.src; "
5599 "ip6.dst <-> ip6.src; "
5600 "next; };";
5601 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 80,
5602 ds_cstr(&match), action);
5603
5604 ds_clear(&match);
5605 ds_put_format(&match,
5606 "ip6 && ip6.dst == %s && !ip.later_frag && udp",
5607 op->lrp_networks.ipv6_addrs[i].addr_s);
5608 action = "icmp6 {"
5609 "eth.dst <-> eth.src; "
5610 "ip6.dst <-> ip6.src; "
5611 "ip.ttl = 255; "
5612 "icmp6.type = 1; "
5613 "icmp6.code = 4; "
5614 "next; };";
5615 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 80,
5616 ds_cstr(&match), action);
5617
5618 ds_clear(&match);
5619 ds_put_format(&match,
5620 "ip6 && ip6.dst == %s && !ip.later_frag",
5621 op->lrp_networks.ipv6_addrs[i].addr_s);
5622 action = "icmp6 {"
5623 "eth.dst <-> eth.src; "
5624 "ip6.dst <-> ip6.src; "
5625 "ip.ttl = 255; "
5626 "icmp6.type = 1; "
5627 "icmp6.code = 3; "
5628 "next; };";
5629 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 70,
5630 ds_cstr(&match), action);
5631 }
5632 }
5633
5634 /* ICMPv6 time exceeded */
5635 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
5636 /* skip link-local address */
5637 if (in6_is_lla(&op->lrp_networks.ipv6_addrs[i].network)) {
5638 continue;
5639 }
5640
5641 ds_clear(&match);
5642 ds_clear(&actions);
5643
5644 ds_put_format(&match,
5645 "inport == %s && ip6 && "
5646 "ip6.src == %s/%d && "
5647 "ip.ttl == {0, 1} && !ip.later_frag",
5648 op->json_key,
5649 op->lrp_networks.ipv6_addrs[i].network_s,
5650 op->lrp_networks.ipv6_addrs[i].plen);
5651 ds_put_format(&actions,
5652 "icmp6 {"
5653 "eth.dst <-> eth.src; "
5654 "ip6.dst = ip6.src; "
5655 "ip6.src = %s; "
5656 "ip.ttl = 255; "
5657 "icmp6.type = 3; /* Time exceeded */ "
5658 "icmp6.code = 0; /* TTL exceeded in transit */ "
5659 "next; };",
5660 op->lrp_networks.ipv6_addrs[i].addr_s);
5661 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 40,
5662 ds_cstr(&match), ds_cstr(&actions));
5663 }
5664 }
5665
5666 /* NAT, Defrag and load balancing. */
5667 HMAP_FOR_EACH (od, key_node, datapaths) {
5668 if (!od->nbr) {
5669 continue;
5670 }
5671
5672 /* Packets are allowed by default. */
5673 ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG, 0, "1", "next;");
5674 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;");
5675 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;");
5676 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;");
5677 ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 0, "1", "next;");
5678 ovn_lflow_add(lflows, od, S_ROUTER_OUT_EGR_LOOP, 0, "1", "next;");
5679
5680 /* NAT rules are only valid on Gateway routers and routers with
5681 * l3dgw_port (router has a port with "redirect-chassis"
5682 * specified). */
5683 if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) {
5684 continue;
5685 }
5686
5687 ovs_be32 snat_ip;
5688 const char *dnat_force_snat_ip = get_force_snat_ip(od, "dnat",
5689 &snat_ip);
5690 const char *lb_force_snat_ip = get_force_snat_ip(od, "lb",
5691 &snat_ip);
5692
5693 for (int i = 0; i < od->nbr->n_nat; i++) {
5694 const struct nbrec_nat *nat;
5695
5696 nat = od->nbr->nat[i];
5697
5698 ovs_be32 ip, mask;
5699
5700 char *error = ip_parse_masked(nat->external_ip, &ip, &mask);
5701 if (error || mask != OVS_BE32_MAX) {
5702 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
5703 VLOG_WARN_RL(&rl, "bad external ip %s for nat",
5704 nat->external_ip);
5705 free(error);
5706 continue;
5707 }
5708
5709 /* Check the validity of nat->logical_ip. 'logical_ip' can
5710 * be a subnet when the type is "snat". */
5711 error = ip_parse_masked(nat->logical_ip, &ip, &mask);
5712 if (!strcmp(nat->type, "snat")) {
5713 if (error) {
5714 static struct vlog_rate_limit rl =
5715 VLOG_RATE_LIMIT_INIT(5, 1);
5716 VLOG_WARN_RL(&rl, "bad ip network or ip %s for snat "
5717 "in router "UUID_FMT"",
5718 nat->logical_ip, UUID_ARGS(&od->key));
5719 free(error);
5720 continue;
5721 }
5722 } else {
5723 if (error || mask != OVS_BE32_MAX) {
5724 static struct vlog_rate_limit rl =
5725 VLOG_RATE_LIMIT_INIT(5, 1);
5726 VLOG_WARN_RL(&rl, "bad ip %s for dnat in router "
5727 ""UUID_FMT"", nat->logical_ip, UUID_ARGS(&od->key));
5728 free(error);
5729 continue;
5730 }
5731 }
5732
5733 /* For distributed router NAT, determine whether this NAT rule
5734 * satisfies the conditions for distributed NAT processing. */
5735 bool distributed = false;
5736 struct eth_addr mac;
5737 if (od->l3dgw_port && !strcmp(nat->type, "dnat_and_snat") &&
5738 nat->logical_port && nat->external_mac) {
5739 if (eth_addr_from_string(nat->external_mac, &mac)) {
5740 distributed = true;
5741 } else {
5742 static struct vlog_rate_limit rl =
5743 VLOG_RATE_LIMIT_INIT(5, 1);
5744 VLOG_WARN_RL(&rl, "bad mac %s for dnat in router "
5745 ""UUID_FMT"", nat->external_mac, UUID_ARGS(&od->key));
5746 continue;
5747 }
5748 }
5749
5750 /* Ingress UNSNAT table: It is for already established connections'
5751 * reverse traffic. i.e., SNAT has already been done in egress
5752 * pipeline and now the packet has entered the ingress pipeline as
5753 * part of a reply. We undo the SNAT here.
5754 *
5755 * Undoing SNAT has to happen before DNAT processing. This is
5756 * because when the packet was DNATed in ingress pipeline, it did
5757 * not know about the possibility of eventual additional SNAT in
5758 * egress pipeline. */
5759 if (!strcmp(nat->type, "snat")
5760 || !strcmp(nat->type, "dnat_and_snat")) {
5761 if (!od->l3dgw_port) {
5762 /* Gateway router. */
5763 ds_clear(&match);
5764 ds_put_format(&match, "ip && ip4.dst == %s",
5765 nat->external_ip);
5766 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 90,
5767 ds_cstr(&match), "ct_snat;");
5768 } else {
5769 /* Distributed router. */
5770
5771 /* Traffic received on l3dgw_port is subject to NAT. */
5772 ds_clear(&match);
5773 ds_put_format(&match, "ip && ip4.dst == %s"
5774 " && inport == %s",
5775 nat->external_ip,
5776 od->l3dgw_port->json_key);
5777 if (!distributed && od->l3redirect_port) {
5778 /* Flows for NAT rules that are centralized are only
5779 * programmed on the "redirect-chassis". */
5780 ds_put_format(&match, " && is_chassis_resident(%s)",
5781 od->l3redirect_port->json_key);
5782 }
5783 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 100,
5784 ds_cstr(&match), "ct_snat;");
5785
5786 /* Traffic received on other router ports must be
5787 * redirected to the central instance of the l3dgw_port
5788 * for NAT processing. */
5789 ds_clear(&match);
5790 ds_put_format(&match, "ip && ip4.dst == %s",
5791 nat->external_ip);
5792 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 50,
5793 ds_cstr(&match),
5794 REGBIT_NAT_REDIRECT" = 1; next;");
5795 }
5796 }
5797
5798 /* Ingress DNAT table: Packets enter the pipeline with destination
5799 * IP address that needs to be DNATted from a external IP address
5800 * to a logical IP address. */
5801 if (!strcmp(nat->type, "dnat")
5802 || !strcmp(nat->type, "dnat_and_snat")) {
5803 if (!od->l3dgw_port) {
5804 /* Gateway router. */
5805 /* Packet when it goes from the initiator to destination.
5806 * We need to set flags.loopback because the router can
5807 * send the packet back through the same interface. */
5808 ds_clear(&match);
5809 ds_put_format(&match, "ip && ip4.dst == %s",
5810 nat->external_ip);
5811 ds_clear(&actions);
5812 if (dnat_force_snat_ip) {
5813 /* Indicate to the future tables that a DNAT has taken
5814 * place and a force SNAT needs to be done in the
5815 * Egress SNAT table. */
5816 ds_put_format(&actions,
5817 "flags.force_snat_for_dnat = 1; ");
5818 }
5819 ds_put_format(&actions, "flags.loopback = 1; ct_dnat(%s);",
5820 nat->logical_ip);
5821 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100,
5822 ds_cstr(&match), ds_cstr(&actions));
5823 } else {
5824 /* Distributed router. */
5825
5826 /* Traffic received on l3dgw_port is subject to NAT. */
5827 ds_clear(&match);
5828 ds_put_format(&match, "ip && ip4.dst == %s"
5829 " && inport == %s",
5830 nat->external_ip,
5831 od->l3dgw_port->json_key);
5832 if (!distributed && od->l3redirect_port) {
5833 /* Flows for NAT rules that are centralized are only
5834 * programmed on the "redirect-chassis". */
5835 ds_put_format(&match, " && is_chassis_resident(%s)",
5836 od->l3redirect_port->json_key);
5837 }
5838 ds_clear(&actions);
5839 ds_put_format(&actions, "ct_dnat(%s);",
5840 nat->logical_ip);
5841 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100,
5842 ds_cstr(&match), ds_cstr(&actions));
5843
5844 /* Traffic received on other router ports must be
5845 * redirected to the central instance of the l3dgw_port
5846 * for NAT processing. */
5847 ds_clear(&match);
5848 ds_put_format(&match, "ip && ip4.dst == %s",
5849 nat->external_ip);
5850 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50,
5851 ds_cstr(&match),
5852 REGBIT_NAT_REDIRECT" = 1; next;");
5853 }
5854 }
5855
5856 /* Egress UNDNAT table: It is for already established connections'
5857 * reverse traffic. i.e., DNAT has already been done in ingress
5858 * pipeline and now the packet has entered the egress pipeline as
5859 * part of a reply. We undo the DNAT here.
5860 *
5861 * Note that this only applies for NAT on a distributed router.
5862 * Undo DNAT on a gateway router is done in the ingress DNAT
5863 * pipeline stage. */
5864 if (od->l3dgw_port && (!strcmp(nat->type, "dnat")
5865 || !strcmp(nat->type, "dnat_and_snat"))) {
5866 ds_clear(&match);
5867 ds_put_format(&match, "ip && ip4.src == %s"
5868 " && outport == %s",
5869 nat->logical_ip,
5870 od->l3dgw_port->json_key);
5871 if (!distributed && od->l3redirect_port) {
5872 /* Flows for NAT rules that are centralized are only
5873 * programmed on the "redirect-chassis". */
5874 ds_put_format(&match, " && is_chassis_resident(%s)",
5875 od->l3redirect_port->json_key);
5876 }
5877 ds_clear(&actions);
5878 if (distributed) {
5879 ds_put_format(&actions, "eth.src = "ETH_ADDR_FMT"; ",
5880 ETH_ADDR_ARGS(mac));
5881 }
5882 ds_put_format(&actions, "ct_dnat;");
5883 ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 100,
5884 ds_cstr(&match), ds_cstr(&actions));
5885 }
5886
5887 /* Egress SNAT table: Packets enter the egress pipeline with
5888 * source ip address that needs to be SNATted to a external ip
5889 * address. */
5890 if (!strcmp(nat->type, "snat")
5891 || !strcmp(nat->type, "dnat_and_snat")) {
5892 if (!od->l3dgw_port) {
5893 /* Gateway router. */
5894 ds_clear(&match);
5895 ds_put_format(&match, "ip && ip4.src == %s",
5896 nat->logical_ip);
5897 ds_clear(&actions);
5898 ds_put_format(&actions, "ct_snat(%s);", nat->external_ip);
5899
5900 /* The priority here is calculated such that the
5901 * nat->logical_ip with the longest mask gets a higher
5902 * priority. */
5903 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT,
5904 count_1bits(ntohl(mask)) + 1,
5905 ds_cstr(&match), ds_cstr(&actions));
5906 } else {
5907 /* Distributed router. */
5908 ds_clear(&match);
5909 ds_put_format(&match, "ip && ip4.src == %s"
5910 " && outport == %s",
5911 nat->logical_ip,
5912 od->l3dgw_port->json_key);
5913 if (!distributed && od->l3redirect_port) {
5914 /* Flows for NAT rules that are centralized are only
5915 * programmed on the "redirect-chassis". */
5916 ds_put_format(&match, " && is_chassis_resident(%s)",
5917 od->l3redirect_port->json_key);
5918 }
5919 ds_clear(&actions);
5920 if (distributed) {
5921 ds_put_format(&actions, "eth.src = "ETH_ADDR_FMT"; ",
5922 ETH_ADDR_ARGS(mac));
5923 }
5924 ds_put_format(&actions, "ct_snat(%s);", nat->external_ip);
5925
5926 /* The priority here is calculated such that the
5927 * nat->logical_ip with the longest mask gets a higher
5928 * priority. */
5929 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT,
5930 count_1bits(ntohl(mask)) + 1,
5931 ds_cstr(&match), ds_cstr(&actions));
5932 }
5933 }
5934
5935 /* Logical router ingress table 0:
5936 * For NAT on a distributed router, add rules allowing
5937 * ingress traffic with eth.dst matching nat->external_mac
5938 * on the l3dgw_port instance where nat->logical_port is
5939 * resident. */
5940 if (distributed) {
5941 ds_clear(&match);
5942 ds_put_format(&match,
5943 "eth.dst == "ETH_ADDR_FMT" && inport == %s"
5944 " && is_chassis_resident(\"%s\")",
5945 ETH_ADDR_ARGS(mac),
5946 od->l3dgw_port->json_key,
5947 nat->logical_port);
5948 ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 50,
5949 ds_cstr(&match), "next;");
5950 }
5951
5952 /* Ingress Gateway Redirect Table: For NAT on a distributed
5953 * router, add flows that are specific to a NAT rule. These
5954 * flows indicate the presence of an applicable NAT rule that
5955 * can be applied in a distributed manner. */
5956 if (distributed) {
5957 ds_clear(&match);
5958 ds_put_format(&match, "ip4.src == %s && outport == %s",
5959 nat->logical_ip,
5960 od->l3dgw_port->json_key);
5961 ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 100,
5962 ds_cstr(&match), "next;");
5963 }
5964
5965 /* Egress Loopback table: For NAT on a distributed router.
5966 * If packets in the egress pipeline on the distributed
5967 * gateway port have ip.dst matching a NAT external IP, then
5968 * loop a clone of the packet back to the beginning of the
5969 * ingress pipeline with inport = outport. */
5970 if (od->l3dgw_port) {
5971 /* Distributed router. */
5972 ds_clear(&match);
5973 ds_put_format(&match, "ip4.dst == %s && outport == %s",
5974 nat->external_ip,
5975 od->l3dgw_port->json_key);
5976 ds_clear(&actions);
5977 ds_put_format(&actions,
5978 "clone { ct_clear; "
5979 "inport = outport; outport = \"\"; "
5980 "flags = 0; flags.loopback = 1; ");
5981 for (int j = 0; j < MFF_N_LOG_REGS; j++) {
5982 ds_put_format(&actions, "reg%d = 0; ", j);
5983 }
5984 ds_put_format(&actions, REGBIT_EGRESS_LOOPBACK" = 1; "
5985 "next(pipeline=ingress, table=0); };");
5986 ovn_lflow_add(lflows, od, S_ROUTER_OUT_EGR_LOOP, 100,
5987 ds_cstr(&match), ds_cstr(&actions));
5988 }
5989 }
5990
5991 /* Handle force SNAT options set in the gateway router. */
5992 if (dnat_force_snat_ip && !od->l3dgw_port) {
5993 /* If a packet with destination IP address as that of the
5994 * gateway router (as set in options:dnat_force_snat_ip) is seen,
5995 * UNSNAT it. */
5996 ds_clear(&match);
5997 ds_put_format(&match, "ip && ip4.dst == %s", dnat_force_snat_ip);
5998 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 110,
5999 ds_cstr(&match), "ct_snat;");
6000
6001 /* Higher priority rules to force SNAT with the IP addresses
6002 * configured in the Gateway router. This only takes effect
6003 * when the packet has already been DNATed once. */
6004 ds_clear(&match);
6005 ds_put_format(&match, "flags.force_snat_for_dnat == 1 && ip");
6006 ds_clear(&actions);
6007 ds_put_format(&actions, "ct_snat(%s);", dnat_force_snat_ip);
6008 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 100,
6009 ds_cstr(&match), ds_cstr(&actions));
6010 }
6011 if (lb_force_snat_ip && !od->l3dgw_port) {
6012 /* If a packet with destination IP address as that of the
6013 * gateway router (as set in options:lb_force_snat_ip) is seen,
6014 * UNSNAT it. */
6015 ds_clear(&match);
6016 ds_put_format(&match, "ip && ip4.dst == %s", lb_force_snat_ip);
6017 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 100,
6018 ds_cstr(&match), "ct_snat;");
6019
6020 /* Load balanced traffic will have flags.force_snat_for_lb set.
6021 * Force SNAT it. */
6022 ds_clear(&match);
6023 ds_put_format(&match, "flags.force_snat_for_lb == 1 && ip");
6024 ds_clear(&actions);
6025 ds_put_format(&actions, "ct_snat(%s);", lb_force_snat_ip);
6026 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 100,
6027 ds_cstr(&match), ds_cstr(&actions));
6028 }
6029
6030 if (!od->l3dgw_port) {
6031 /* For gateway router, re-circulate every packet through
6032 * the DNAT zone. This helps with the following.
6033 *
6034 * Any packet that needs to be unDNATed in the reverse
6035 * direction gets unDNATed. Ideally this could be done in
6036 * the egress pipeline. But since the gateway router
6037 * does not have any feature that depends on the source
6038 * ip address being external IP address for IP routing,
6039 * we can do it here, saving a future re-circulation. */
6040 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50,
6041 "ip", "flags.loopback = 1; ct_dnat;");
6042 } else {
6043 /* For NAT on a distributed router, add flows to Ingress
6044 * IP Routing table, Ingress ARP Resolution table, and
6045 * Ingress Gateway Redirect Table that are not specific to a
6046 * NAT rule. */
6047
6048 /* The highest priority IN_IP_ROUTING rule matches packets
6049 * with REGBIT_NAT_REDIRECT (set in DNAT or UNSNAT stages),
6050 * with action "ip.ttl--; next;". The IN_GW_REDIRECT table
6051 * will take care of setting the outport. */
6052 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 300,
6053 REGBIT_NAT_REDIRECT" == 1", "ip.ttl--; next;");
6054
6055 /* The highest priority IN_ARP_RESOLVE rule matches packets
6056 * with REGBIT_NAT_REDIRECT (set in DNAT or UNSNAT stages),
6057 * then sets eth.dst to the distributed gateway port's
6058 * ethernet address. */
6059 ds_clear(&actions);
6060 ds_put_format(&actions, "eth.dst = %s; next;",
6061 od->l3dgw_port->lrp_networks.ea_s);
6062 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 200,
6063 REGBIT_NAT_REDIRECT" == 1", ds_cstr(&actions));
6064
6065 /* The highest priority IN_GW_REDIRECT rule redirects packets
6066 * with REGBIT_NAT_REDIRECT (set in DNAT or UNSNAT stages) to
6067 * the central instance of the l3dgw_port for NAT processing. */
6068 ds_clear(&actions);
6069 ds_put_format(&actions, "outport = %s; next;",
6070 od->l3redirect_port->json_key);
6071 ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 200,
6072 REGBIT_NAT_REDIRECT" == 1", ds_cstr(&actions));
6073 }
6074
6075 /* Load balancing and packet defrag are only valid on
6076 * Gateway routers or router with gateway port. */
6077 if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) {
6078 continue;
6079 }
6080
6081 /* A set to hold all ips that need defragmentation and tracking. */
6082 struct sset all_ips = SSET_INITIALIZER(&all_ips);
6083
6084 for (int i = 0; i < od->nbr->n_load_balancer; i++) {
6085 struct nbrec_load_balancer *lb = od->nbr->load_balancer[i];
6086 struct smap *vips = &lb->vips;
6087 struct smap_node *node;
6088
6089 SMAP_FOR_EACH (node, vips) {
6090 uint16_t port = 0;
6091 int addr_family;
6092
6093 /* node->key contains IP:port or just IP. */
6094 char *ip_address = NULL;
6095 ip_address_and_port_from_lb_key(node->key, &ip_address, &port,
6096 &addr_family);
6097 if (!ip_address) {
6098 continue;
6099 }
6100
6101 if (!sset_contains(&all_ips, ip_address)) {
6102 sset_add(&all_ips, ip_address);
6103 /* If there are any load balancing rules, we should send
6104 * the packet to conntrack for defragmentation and
6105 * tracking. This helps with two things.
6106 *
6107 * 1. With tracking, we can send only new connections to
6108 * pick a DNAT ip address from a group.
6109 * 2. If there are L4 ports in load balancing rules, we
6110 * need the defragmentation to match on L4 ports. */
6111 ds_clear(&match);
6112 if (addr_family == AF_INET) {
6113 ds_put_format(&match, "ip && ip4.dst == %s",
6114 ip_address);
6115 } else {
6116 ds_put_format(&match, "ip && ip6.dst == %s",
6117 ip_address);
6118 }
6119 ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG,
6120 100, ds_cstr(&match), "ct_next;");
6121 }
6122
6123 /* Higher priority rules are added for load-balancing in DNAT
6124 * table. For every match (on a VIP[:port]), we add two flows
6125 * via add_router_lb_flow(). One flow is for specific matching
6126 * on ct.new with an action of "ct_lb($targets);". The other
6127 * flow is for ct.est with an action of "ct_dnat;". */
6128 ds_clear(&actions);
6129 ds_put_format(&actions, "ct_lb(%s);", node->value);
6130
6131 ds_clear(&match);
6132 if (addr_family == AF_INET) {
6133 ds_put_format(&match, "ip && ip4.dst == %s",
6134 ip_address);
6135 } else {
6136 ds_put_format(&match, "ip && ip6.dst == %s",
6137 ip_address);
6138 }
6139 free(ip_address);
6140
6141 int prio = 110;
6142 bool is_udp = lb->protocol && !strcmp(lb->protocol, "udp") ?
6143 true : false;
6144 if (port) {
6145 if (is_udp) {
6146 ds_put_format(&match, " && udp && udp.dst == %d",
6147 port);
6148 } else {
6149 ds_put_format(&match, " && tcp && tcp.dst == %d",
6150 port);
6151 }
6152 prio = 120;
6153 }
6154
6155 if (od->l3redirect_port) {
6156 ds_put_format(&match, " && is_chassis_resident(%s)",
6157 od->l3redirect_port->json_key);
6158 }
6159 add_router_lb_flow(lflows, od, &match, &actions, prio,
6160 lb_force_snat_ip, node->value, is_udp,
6161 addr_family);
6162 }
6163 }
6164 sset_destroy(&all_ips);
6165 }
6166
6167 /* Logical router ingress table 5 and 6: IPv6 Router Adv (RA) options and
6168 * response. */
6169 HMAP_FOR_EACH (op, key_node, ports) {
6170 if (!op->nbrp || op->nbrp->peer || !op->peer) {
6171 continue;
6172 }
6173
6174 if (!op->lrp_networks.n_ipv6_addrs) {
6175 continue;
6176 }
6177
6178 const char *address_mode = smap_get(
6179 &op->nbrp->ipv6_ra_configs, "address_mode");
6180
6181 if (!address_mode) {
6182 continue;
6183 }
6184 if (strcmp(address_mode, "slaac") &&
6185 strcmp(address_mode, "dhcpv6_stateful") &&
6186 strcmp(address_mode, "dhcpv6_stateless")) {
6187 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
6188 VLOG_WARN_RL(&rl, "Invalid address mode [%s] defined",
6189 address_mode);
6190 continue;
6191 }
6192
6193 if (smap_get_bool(&op->nbrp->ipv6_ra_configs, "send_periodic",
6194 false)) {
6195 copy_ra_to_sb(op, address_mode);
6196 }
6197
6198 ds_clear(&match);
6199 ds_put_format(&match, "inport == %s && ip6.dst == ff02::2 && nd_rs",
6200 op->json_key);
6201 ds_clear(&actions);
6202
6203 const char *mtu_s = smap_get(
6204 &op->nbrp->ipv6_ra_configs, "mtu");
6205
6206 /* As per RFC 2460, 1280 is minimum IPv6 MTU. */
6207 uint32_t mtu = (mtu_s && atoi(mtu_s) >= 1280) ? atoi(mtu_s) : 0;
6208
6209 ds_put_format(&actions, REGBIT_ND_RA_OPTS_RESULT" = put_nd_ra_opts("
6210 "addr_mode = \"%s\", slla = %s",
6211 address_mode, op->lrp_networks.ea_s);
6212 if (mtu > 0) {
6213 ds_put_format(&actions, ", mtu = %u", mtu);
6214 }
6215
6216 bool add_rs_response_flow = false;
6217
6218 for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
6219 if (in6_is_lla(&op->lrp_networks.ipv6_addrs[i].network)) {
6220 continue;
6221 }
6222
6223 /* Add the prefix option if the address mode is slaac or
6224 * dhcpv6_stateless. */
6225 if (strcmp(address_mode, "dhcpv6_stateful")) {
6226 ds_put_format(&actions, ", prefix = %s/%u",
6227 op->lrp_networks.ipv6_addrs[i].network_s,
6228 op->lrp_networks.ipv6_addrs[i].plen);
6229 }
6230 add_rs_response_flow = true;
6231 }
6232
6233 if (add_rs_response_flow) {
6234 ds_put_cstr(&actions, "); next;");
6235 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ND_RA_OPTIONS, 50,
6236 ds_cstr(&match), ds_cstr(&actions));
6237 ds_clear(&actions);
6238 ds_clear(&match);
6239 ds_put_format(&match, "inport == %s && ip6.dst == ff02::2 && "
6240 "nd_ra && "REGBIT_ND_RA_OPTS_RESULT, op->json_key);
6241
6242 char ip6_str[INET6_ADDRSTRLEN + 1];
6243 struct in6_addr lla;
6244 in6_generate_lla(op->lrp_networks.ea, &lla);
6245 memset(ip6_str, 0, sizeof(ip6_str));
6246 ipv6_string_mapped(ip6_str, &lla);
6247 ds_put_format(&actions, "eth.dst = eth.src; eth.src = %s; "
6248 "ip6.dst = ip6.src; ip6.src = %s; "
6249 "outport = inport; flags.loopback = 1; "
6250 "output;",
6251 op->lrp_networks.ea_s, ip6_str);
6252 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ND_RA_RESPONSE, 50,
6253 ds_cstr(&match), ds_cstr(&actions));
6254 }
6255 }
6256
6257 /* Logical router ingress table 5, 6: RS responder, by default goto next.
6258 * (priority 0)*/
6259 HMAP_FOR_EACH (od, key_node, datapaths) {
6260 if (!od->nbr) {
6261 continue;
6262 }
6263
6264 ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_OPTIONS, 0, "1", "next;");
6265 ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_RESPONSE, 0, "1", "next;");
6266 }
6267
6268 /* Logical router ingress table 7: IP Routing.
6269 *
6270 * A packet that arrives at this table is an IP packet that should be
6271 * routed to the address in 'ip[46].dst'. This table sets outport to
6272 * the correct output port, eth.src to the output port's MAC
6273 * address, and '[xx]reg0' to the next-hop IP address (leaving
6274 * 'ip[46].dst', the packet’s final destination, unchanged), and
6275 * advances to the next table for ARP/ND resolution. */
6276 HMAP_FOR_EACH (op, key_node, ports) {
6277 if (!op->nbrp) {
6278 continue;
6279 }
6280
6281 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
6282 add_route(lflows, op, op->lrp_networks.ipv4_addrs[i].addr_s,
6283 op->lrp_networks.ipv4_addrs[i].network_s,
6284 op->lrp_networks.ipv4_addrs[i].plen, NULL, NULL);
6285 }
6286
6287 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
6288 add_route(lflows, op, op->lrp_networks.ipv6_addrs[i].addr_s,
6289 op->lrp_networks.ipv6_addrs[i].network_s,
6290 op->lrp_networks.ipv6_addrs[i].plen, NULL, NULL);
6291 }
6292 }
6293
6294 /* Convert the static routes to flows. */
6295 HMAP_FOR_EACH (od, key_node, datapaths) {
6296 if (!od->nbr) {
6297 continue;
6298 }
6299
6300 for (int i = 0; i < od->nbr->n_static_routes; i++) {
6301 const struct nbrec_logical_router_static_route *route;
6302
6303 route = od->nbr->static_routes[i];
6304 build_static_route_flow(lflows, od, ports, route);
6305 }
6306 }
6307
6308 /* XXX destination unreachable */
6309
6310 /* Local router ingress table 8: ARP Resolution.
6311 *
6312 * Any packet that reaches this table is an IP packet whose next-hop IP
6313 * address is in reg0. (ip4.dst is the final destination.) This table
6314 * resolves the IP address in reg0 into an output port in outport and an
6315 * Ethernet address in eth.dst. */
6316 HMAP_FOR_EACH (op, key_node, ports) {
6317 if (op->nbsp && !lsp_is_enabled(op->nbsp)) {
6318 continue;
6319 }
6320
6321 if (op->nbrp) {
6322 /* This is a logical router port. If next-hop IP address in
6323 * '[xx]reg0' matches IP address of this router port, then
6324 * the packet is intended to eventually be sent to this
6325 * logical port. Set the destination mac address using this
6326 * port's mac address.
6327 *
6328 * The packet is still in peer's logical pipeline. So the match
6329 * should be on peer's outport. */
6330 if (op->peer && op->nbrp->peer) {
6331 if (op->lrp_networks.n_ipv4_addrs) {
6332 ds_clear(&match);
6333 ds_put_format(&match, "outport == %s && reg0 == ",
6334 op->peer->json_key);
6335 op_put_v4_networks(&match, op, false);
6336
6337 ds_clear(&actions);
6338 ds_put_format(&actions, "eth.dst = %s; next;",
6339 op->lrp_networks.ea_s);
6340 ovn_lflow_add(lflows, op->peer->od, S_ROUTER_IN_ARP_RESOLVE,
6341 100, ds_cstr(&match), ds_cstr(&actions));
6342 }
6343
6344 if (op->lrp_networks.n_ipv6_addrs) {
6345 ds_clear(&match);
6346 ds_put_format(&match, "outport == %s && xxreg0 == ",
6347 op->peer->json_key);
6348 op_put_v6_networks(&match, op);
6349
6350 ds_clear(&actions);
6351 ds_put_format(&actions, "eth.dst = %s; next;",
6352 op->lrp_networks.ea_s);
6353 ovn_lflow_add(lflows, op->peer->od, S_ROUTER_IN_ARP_RESOLVE,
6354 100, ds_cstr(&match), ds_cstr(&actions));
6355 }
6356 }
6357 } else if (op->od->n_router_ports && strcmp(op->nbsp->type, "router")) {
6358 /* This is a logical switch port that backs a VM or a container.
6359 * Extract its addresses. For each of the address, go through all
6360 * the router ports attached to the switch (to which this port
6361 * connects) and if the address in question is reachable from the
6362 * router port, add an ARP/ND entry in that router's pipeline. */
6363
6364 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
6365 const char *ea_s = op->lsp_addrs[i].ea_s;
6366 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
6367 const char *ip_s = op->lsp_addrs[i].ipv4_addrs[j].addr_s;
6368 for (size_t k = 0; k < op->od->n_router_ports; k++) {
6369 /* Get the Logical_Router_Port that the
6370 * Logical_Switch_Port is connected to, as
6371 * 'peer'. */
6372 const char *peer_name = smap_get(
6373 &op->od->router_ports[k]->nbsp->options,
6374 "router-port");
6375 if (!peer_name) {
6376 continue;
6377 }
6378
6379 struct ovn_port *peer = ovn_port_find(ports, peer_name);
6380 if (!peer || !peer->nbrp) {
6381 continue;
6382 }
6383
6384 if (!find_lrp_member_ip(peer, ip_s)) {
6385 continue;
6386 }
6387
6388 ds_clear(&match);
6389 ds_put_format(&match, "outport == %s && reg0 == %s",
6390 peer->json_key, ip_s);
6391
6392 ds_clear(&actions);
6393 ds_put_format(&actions, "eth.dst = %s; next;", ea_s);
6394 ovn_lflow_add(lflows, peer->od,
6395 S_ROUTER_IN_ARP_RESOLVE, 100,
6396 ds_cstr(&match), ds_cstr(&actions));
6397 }
6398 }
6399
6400 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
6401 const char *ip_s = op->lsp_addrs[i].ipv6_addrs[j].addr_s;
6402 for (size_t k = 0; k < op->od->n_router_ports; k++) {
6403 /* Get the Logical_Router_Port that the
6404 * Logical_Switch_Port is connected to, as
6405 * 'peer'. */
6406 const char *peer_name = smap_get(
6407 &op->od->router_ports[k]->nbsp->options,
6408 "router-port");
6409 if (!peer_name) {
6410 continue;
6411 }
6412
6413 struct ovn_port *peer = ovn_port_find(ports, peer_name);
6414 if (!peer || !peer->nbrp) {
6415 continue;
6416 }
6417
6418 if (!find_lrp_member_ip(peer, ip_s)) {
6419 continue;
6420 }
6421
6422 ds_clear(&match);
6423 ds_put_format(&match, "outport == %s && xxreg0 == %s",
6424 peer->json_key, ip_s);
6425
6426 ds_clear(&actions);
6427 ds_put_format(&actions, "eth.dst = %s; next;", ea_s);
6428 ovn_lflow_add(lflows, peer->od,
6429 S_ROUTER_IN_ARP_RESOLVE, 100,
6430 ds_cstr(&match), ds_cstr(&actions));
6431 }
6432 }
6433 }
6434 } else if (!strcmp(op->nbsp->type, "router")) {
6435 /* This is a logical switch port that connects to a router. */
6436
6437 /* The peer of this switch port is the router port for which
6438 * we need to add logical flows such that it can resolve
6439 * ARP entries for all the other router ports connected to
6440 * the switch in question. */
6441
6442 const char *peer_name = smap_get(&op->nbsp->options,
6443 "router-port");
6444 if (!peer_name) {
6445 continue;
6446 }
6447
6448 struct ovn_port *peer = ovn_port_find(ports, peer_name);
6449 if (!peer || !peer->nbrp) {
6450 continue;
6451 }
6452
6453 for (size_t i = 0; i < op->od->n_router_ports; i++) {
6454 const char *router_port_name = smap_get(
6455 &op->od->router_ports[i]->nbsp->options,
6456 "router-port");
6457 struct ovn_port *router_port = ovn_port_find(ports,
6458 router_port_name);
6459 if (!router_port || !router_port->nbrp) {
6460 continue;
6461 }
6462
6463 /* Skip the router port under consideration. */
6464 if (router_port == peer) {
6465 continue;
6466 }
6467
6468 if (router_port->lrp_networks.n_ipv4_addrs) {
6469 ds_clear(&match);
6470 ds_put_format(&match, "outport == %s && reg0 == ",
6471 peer->json_key);
6472 op_put_v4_networks(&match, router_port, false);
6473
6474 ds_clear(&actions);
6475 ds_put_format(&actions, "eth.dst = %s; next;",
6476 router_port->lrp_networks.ea_s);
6477 ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE,
6478 100, ds_cstr(&match), ds_cstr(&actions));
6479 }
6480
6481 if (router_port->lrp_networks.n_ipv6_addrs) {
6482 ds_clear(&match);
6483 ds_put_format(&match, "outport == %s && xxreg0 == ",
6484 peer->json_key);
6485 op_put_v6_networks(&match, router_port);
6486
6487 ds_clear(&actions);
6488 ds_put_format(&actions, "eth.dst = %s; next;",
6489 router_port->lrp_networks.ea_s);
6490 ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE,
6491 100, ds_cstr(&match), ds_cstr(&actions));
6492 }
6493 }
6494 }
6495 }
6496
6497 HMAP_FOR_EACH (od, key_node, datapaths) {
6498 if (!od->nbr) {
6499 continue;
6500 }
6501
6502 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip4",
6503 "get_arp(outport, reg0); next;");
6504
6505 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip6",
6506 "get_nd(outport, xxreg0); next;");
6507 }
6508
6509 /* Logical router ingress table 9: Gateway redirect.
6510 *
6511 * For traffic with outport equal to the l3dgw_port
6512 * on a distributed router, this table redirects a subset
6513 * of the traffic to the l3redirect_port which represents
6514 * the central instance of the l3dgw_port.
6515 */
6516 HMAP_FOR_EACH (od, key_node, datapaths) {
6517 if (!od->nbr) {
6518 continue;
6519 }
6520 if (od->l3dgw_port && od->l3redirect_port) {
6521 /* For traffic with outport == l3dgw_port, if the
6522 * packet did not match any higher priority redirect
6523 * rule, then the traffic is redirected to the central
6524 * instance of the l3dgw_port. */
6525 ds_clear(&match);
6526 ds_put_format(&match, "outport == %s",
6527 od->l3dgw_port->json_key);
6528 ds_clear(&actions);
6529 ds_put_format(&actions, "outport = %s; next;",
6530 od->l3redirect_port->json_key);
6531 ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 50,
6532 ds_cstr(&match), ds_cstr(&actions));
6533
6534 /* If the Ethernet destination has not been resolved,
6535 * redirect to the central instance of the l3dgw_port.
6536 * Such traffic will be replaced by an ARP request or ND
6537 * Neighbor Solicitation in the ARP request ingress
6538 * table, before being redirected to the central instance.
6539 */
6540 ds_put_format(&match, " && eth.dst == 00:00:00:00:00:00");
6541 ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 150,
6542 ds_cstr(&match), ds_cstr(&actions));
6543 }
6544
6545 /* Packets are allowed by default. */
6546 ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 0, "1", "next;");
6547 }
6548
6549 /* Local router ingress table 10: ARP request.
6550 *
6551 * In the common case where the Ethernet destination has been resolved,
6552 * this table outputs the packet (priority 0). Otherwise, it composes
6553 * and sends an ARP/IPv6 NA request (priority 100). */
6554 HMAP_FOR_EACH (od, key_node, datapaths) {
6555 if (!od->nbr) {
6556 continue;
6557 }
6558
6559 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100,
6560 "eth.dst == 00:00:00:00:00:00",
6561 "arp { "
6562 "eth.dst = ff:ff:ff:ff:ff:ff; "
6563 "arp.spa = reg1; "
6564 "arp.tpa = reg0; "
6565 "arp.op = 1; " /* ARP request */
6566 "output; "
6567 "};");
6568 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100,
6569 "eth.dst == 00:00:00:00:00:00",
6570 "nd_ns { "
6571 "nd.target = xxreg0; "
6572 "output; "
6573 "};");
6574 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1", "output;");
6575 }
6576
6577 /* Logical router egress table 1: Delivery (priority 100).
6578 *
6579 * Priority 100 rules deliver packets to enabled logical ports. */
6580 HMAP_FOR_EACH (op, key_node, ports) {
6581 if (!op->nbrp) {
6582 continue;
6583 }
6584
6585 if (!lrport_is_enabled(op->nbrp)) {
6586 /* Drop packets to disabled logical ports (since logical flow
6587 * tables are default-drop). */
6588 continue;
6589 }
6590
6591 if (op->derived) {
6592 /* No egress packets should be processed in the context of
6593 * a chassisredirect port. The chassisredirect port should
6594 * be replaced by the l3dgw port in the local output
6595 * pipeline stage before egress processing. */
6596 continue;
6597 }
6598
6599 ds_clear(&match);
6600 ds_put_format(&match, "outport == %s", op->json_key);
6601 ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100,
6602 ds_cstr(&match), "output;");
6603 }
6604
6605 ds_destroy(&match);
6606 ds_destroy(&actions);
6607 }
6608
6609 /* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
6610 * constructing their contents based on the OVN_NB database. */
6611 static void
6612 build_lflows(struct northd_context *ctx, struct hmap *datapaths,
6613 struct hmap *ports, struct hmap *port_groups)
6614 {
6615 struct hmap lflows = HMAP_INITIALIZER(&lflows);
6616 struct hmap mcgroups = HMAP_INITIALIZER(&mcgroups);
6617
6618 build_lswitch_flows(datapaths, ports, port_groups, &lflows, &mcgroups);
6619 build_lrouter_flows(datapaths, ports, &lflows);
6620
6621 /* Push changes to the Logical_Flow table to database. */
6622 const struct sbrec_logical_flow *sbflow, *next_sbflow;
6623 SBREC_LOGICAL_FLOW_FOR_EACH_SAFE (sbflow, next_sbflow, ctx->ovnsb_idl) {
6624 struct ovn_datapath *od
6625 = ovn_datapath_from_sbrec(datapaths, sbflow->logical_datapath);
6626 if (!od) {
6627 sbrec_logical_flow_delete(sbflow);
6628 continue;
6629 }
6630
6631 enum ovn_datapath_type dp_type = od->nbs ? DP_SWITCH : DP_ROUTER;
6632 enum ovn_pipeline pipeline
6633 = !strcmp(sbflow->pipeline, "ingress") ? P_IN : P_OUT;
6634 struct ovn_lflow *lflow = ovn_lflow_find(
6635 &lflows, od, ovn_stage_build(dp_type, pipeline, sbflow->table_id),
6636 sbflow->priority, sbflow->match, sbflow->actions, sbflow->hash);
6637 if (lflow) {
6638 ovn_lflow_destroy(&lflows, lflow);
6639 } else {
6640 sbrec_logical_flow_delete(sbflow);
6641 }
6642 }
6643 struct ovn_lflow *lflow, *next_lflow;
6644 HMAP_FOR_EACH_SAFE (lflow, next_lflow, hmap_node, &lflows) {
6645 const char *pipeline = ovn_stage_get_pipeline_name(lflow->stage);
6646 uint8_t table = ovn_stage_get_table(lflow->stage);
6647
6648 sbflow = sbrec_logical_flow_insert(ctx->ovnsb_txn);
6649 sbrec_logical_flow_set_logical_datapath(sbflow, lflow->od->sb);
6650 sbrec_logical_flow_set_pipeline(sbflow, pipeline);
6651 sbrec_logical_flow_set_table_id(sbflow, table);
6652 sbrec_logical_flow_set_priority(sbflow, lflow->priority);
6653 sbrec_logical_flow_set_match(sbflow, lflow->match);
6654 sbrec_logical_flow_set_actions(sbflow, lflow->actions);
6655
6656 /* Trim the source locator lflow->where, which looks something like
6657 * "ovn/northd/ovn-northd.c:1234", down to just the part following the
6658 * last slash, e.g. "ovn-northd.c:1234". */
6659 const char *slash = strrchr(lflow->where, '/');
6660 #if _WIN32
6661 const char *backslash = strrchr(lflow->where, '\\');
6662 if (!slash || backslash > slash) {
6663 slash = backslash;
6664 }
6665 #endif
6666 const char *where = slash ? slash + 1 : lflow->where;
6667
6668 struct smap ids = SMAP_INITIALIZER(&ids);
6669 smap_add(&ids, "stage-name", ovn_stage_to_str(lflow->stage));
6670 smap_add(&ids, "source", where);
6671 if (lflow->stage_hint) {
6672 smap_add(&ids, "stage-hint", lflow->stage_hint);
6673 }
6674 sbrec_logical_flow_set_external_ids(sbflow, &ids);
6675 smap_destroy(&ids);
6676
6677 ovn_lflow_destroy(&lflows, lflow);
6678 }
6679 hmap_destroy(&lflows);
6680
6681 /* Push changes to the Multicast_Group table to database. */
6682 const struct sbrec_multicast_group *sbmc, *next_sbmc;
6683 SBREC_MULTICAST_GROUP_FOR_EACH_SAFE (sbmc, next_sbmc, ctx->ovnsb_idl) {
6684 struct ovn_datapath *od = ovn_datapath_from_sbrec(datapaths,
6685 sbmc->datapath);
6686 if (!od) {
6687 sbrec_multicast_group_delete(sbmc);
6688 continue;
6689 }
6690
6691 struct multicast_group group = { .name = sbmc->name,
6692 .key = sbmc->tunnel_key };
6693 struct ovn_multicast *mc = ovn_multicast_find(&mcgroups, od, &group);
6694 if (mc) {
6695 ovn_multicast_update_sbrec(mc, sbmc);
6696 ovn_multicast_destroy(&mcgroups, mc);
6697 } else {
6698 sbrec_multicast_group_delete(sbmc);
6699 }
6700 }
6701 struct ovn_multicast *mc, *next_mc;
6702 HMAP_FOR_EACH_SAFE (mc, next_mc, hmap_node, &mcgroups) {
6703 sbmc = sbrec_multicast_group_insert(ctx->ovnsb_txn);
6704 sbrec_multicast_group_set_datapath(sbmc, mc->datapath->sb);
6705 sbrec_multicast_group_set_name(sbmc, mc->group->name);
6706 sbrec_multicast_group_set_tunnel_key(sbmc, mc->group->key);
6707 ovn_multicast_update_sbrec(mc, sbmc);
6708 ovn_multicast_destroy(&mcgroups, mc);
6709 }
6710 hmap_destroy(&mcgroups);
6711 }
6712
6713 static void
6714 sync_address_set(struct northd_context *ctx, const char *name,
6715 const char **addrs, size_t n_addrs,
6716 struct shash *sb_address_sets)
6717 {
6718 const struct sbrec_address_set *sb_address_set;
6719 sb_address_set = shash_find_and_delete(sb_address_sets,
6720 name);
6721 if (!sb_address_set) {
6722 sb_address_set = sbrec_address_set_insert(ctx->ovnsb_txn);
6723 sbrec_address_set_set_name(sb_address_set, name);
6724 }
6725
6726 sbrec_address_set_set_addresses(sb_address_set,
6727 addrs, n_addrs);
6728 }
6729
6730 /* Go through 'addresses' and add found IPv4 addresses to 'ipv4_addrs' and IPv6
6731 * addresses to 'ipv6_addrs'.
6732 */
6733 static void
6734 split_addresses(const char *addresses, struct svec *ipv4_addrs,
6735 struct svec *ipv6_addrs)
6736 {
6737 struct lport_addresses laddrs;
6738 extract_lsp_addresses(addresses, &laddrs);
6739 for (size_t k = 0; k < laddrs.n_ipv4_addrs; k++) {
6740 svec_add(ipv4_addrs, laddrs.ipv4_addrs[k].addr_s);
6741 }
6742 for (size_t k = 0; k < laddrs.n_ipv6_addrs; k++) {
6743 svec_add(ipv6_addrs, laddrs.ipv6_addrs[k].addr_s);
6744 }
6745 destroy_lport_addresses(&laddrs);
6746 }
6747
6748 /* OVN_Southbound Address_Set table contains same records as in north
6749 * bound, plus the records generated from Port_Group table in north bound.
6750 *
6751 * There are 2 records generated from each port group, one for IPv4, and
6752 * one for IPv6, named in the format: <port group name>_ip4 and
6753 * <port group name>_ip6 respectively. MAC addresses are ignored.
6754 *
6755 * We always update OVN_Southbound to match the Address_Set and Port_Group
6756 * in OVN_Northbound, so that the address sets used in Logical_Flows in
6757 * OVN_Southbound is checked against the proper set.*/
6758 static void
6759 sync_address_sets(struct northd_context *ctx)
6760 {
6761 struct shash sb_address_sets = SHASH_INITIALIZER(&sb_address_sets);
6762
6763 const struct sbrec_address_set *sb_address_set;
6764 SBREC_ADDRESS_SET_FOR_EACH (sb_address_set, ctx->ovnsb_idl) {
6765 shash_add(&sb_address_sets, sb_address_set->name, sb_address_set);
6766 }
6767
6768 /* sync port group generated address sets first */
6769 const struct nbrec_port_group *nb_port_group;
6770 NBREC_PORT_GROUP_FOR_EACH (nb_port_group, ctx->ovnnb_idl) {
6771 struct svec ipv4_addrs = SVEC_EMPTY_INITIALIZER;
6772 struct svec ipv6_addrs = SVEC_EMPTY_INITIALIZER;
6773 for (size_t i = 0; i < nb_port_group->n_ports; i++) {
6774 for (size_t j = 0; j < nb_port_group->ports[i]->n_addresses; j++) {
6775 const char *addrs = nb_port_group->ports[i]->addresses[j];
6776 if (!is_dynamic_lsp_address(addrs)) {
6777 split_addresses(addrs, &ipv4_addrs, &ipv6_addrs);
6778 }
6779 }
6780 if (nb_port_group->ports[i]->dynamic_addresses) {
6781 split_addresses(nb_port_group->ports[i]->dynamic_addresses,
6782 &ipv4_addrs, &ipv6_addrs);
6783 }
6784 }
6785 char *ipv4_addrs_name = xasprintf("%s_ip4", nb_port_group->name);
6786 char *ipv6_addrs_name = xasprintf("%s_ip6", nb_port_group->name);
6787 sync_address_set(ctx, ipv4_addrs_name,
6788 /* "char **" is not compatible with "const char **" */
6789 (const char **)ipv4_addrs.names,
6790 ipv4_addrs.n, &sb_address_sets);
6791 sync_address_set(ctx, ipv6_addrs_name,
6792 /* "char **" is not compatible with "const char **" */
6793 (const char **)ipv6_addrs.names,
6794 ipv6_addrs.n, &sb_address_sets);
6795 free(ipv4_addrs_name);
6796 free(ipv6_addrs_name);
6797 svec_destroy(&ipv4_addrs);
6798 svec_destroy(&ipv6_addrs);
6799 }
6800
6801 /* sync user defined address sets, which may overwrite port group
6802 * generated address sets if same name is used */
6803 const struct nbrec_address_set *nb_address_set;
6804 NBREC_ADDRESS_SET_FOR_EACH (nb_address_set, ctx->ovnnb_idl) {
6805 sync_address_set(ctx, nb_address_set->name,
6806 /* "char **" is not compatible with "const char **" */
6807 (const char **)nb_address_set->addresses,
6808 nb_address_set->n_addresses, &sb_address_sets);
6809 }
6810
6811 struct shash_node *node, *next;
6812 SHASH_FOR_EACH_SAFE (node, next, &sb_address_sets) {
6813 sbrec_address_set_delete(node->data);
6814 shash_delete(&sb_address_sets, node);
6815 }
6816 shash_destroy(&sb_address_sets);
6817 }
6818
6819 /* Each port group in Port_Group table in OVN_Northbound has a corresponding
6820 * entry in Port_Group table in OVN_Southbound. In OVN_Northbound the entries
6821 * contains lport uuids, while in OVN_Southbound we store the lport names.
6822 */
6823 static void
6824 sync_port_groups(struct northd_context *ctx)
6825 {
6826 struct shash sb_port_groups = SHASH_INITIALIZER(&sb_port_groups);
6827
6828 const struct sbrec_port_group *sb_port_group;
6829 SBREC_PORT_GROUP_FOR_EACH (sb_port_group, ctx->ovnsb_idl) {
6830 shash_add(&sb_port_groups, sb_port_group->name, sb_port_group);
6831 }
6832
6833 const struct nbrec_port_group *nb_port_group;
6834 NBREC_PORT_GROUP_FOR_EACH (nb_port_group, ctx->ovnnb_idl) {
6835 sb_port_group = shash_find_and_delete(&sb_port_groups,
6836 nb_port_group->name);
6837 if (!sb_port_group) {
6838 sb_port_group = sbrec_port_group_insert(ctx->ovnsb_txn);
6839 sbrec_port_group_set_name(sb_port_group, nb_port_group->name);
6840 }
6841
6842 const char **nb_port_names = xcalloc(nb_port_group->n_ports,
6843 sizeof *nb_port_names);
6844 int i;
6845 for (i = 0; i < nb_port_group->n_ports; i++) {
6846 nb_port_names[i] = nb_port_group->ports[i]->name;
6847 }
6848 sbrec_port_group_set_ports(sb_port_group,
6849 nb_port_names,
6850 nb_port_group->n_ports);
6851 free(nb_port_names);
6852 }
6853
6854 struct shash_node *node, *next;
6855 SHASH_FOR_EACH_SAFE (node, next, &sb_port_groups) {
6856 sbrec_port_group_delete(node->data);
6857 shash_delete(&sb_port_groups, node);
6858 }
6859 shash_destroy(&sb_port_groups);
6860 }
6861
6862 struct band_entry {
6863 int64_t rate;
6864 int64_t burst_size;
6865 const char *action;
6866 };
6867
6868 static int
6869 band_cmp(const void *band1_, const void *band2_)
6870 {
6871 const struct band_entry *band1p = band1_;
6872 const struct band_entry *band2p = band2_;
6873
6874 if (band1p->rate != band2p->rate) {
6875 return band1p->rate > band2p->rate ? -1 : 1;
6876 } else if (band1p->burst_size != band2p->burst_size) {
6877 return band1p->burst_size > band2p->burst_size ? -1 : 1;
6878 } else {
6879 return strcmp(band1p->action, band2p->action);
6880 }
6881 }
6882
6883 static bool
6884 bands_need_update(const struct nbrec_meter *nb_meter,
6885 const struct sbrec_meter *sb_meter)
6886 {
6887 if (nb_meter->n_bands != sb_meter->n_bands) {
6888 return true;
6889 }
6890
6891 /* A single band is the most common scenario, so speed up that
6892 * check. */
6893 if (nb_meter->n_bands == 1) {
6894 struct nbrec_meter_band *nb_band = nb_meter->bands[0];
6895 struct sbrec_meter_band *sb_band = sb_meter->bands[0];
6896
6897 return !(nb_band->rate == sb_band->rate
6898 && nb_band->burst_size == sb_band->burst_size
6899 && !strcmp(sb_band->action, nb_band->action));
6900 }
6901
6902 /* Place the Northbound entries in sorted order. */
6903 struct band_entry *nb_bands;
6904 nb_bands = xmalloc(sizeof *nb_bands * nb_meter->n_bands);
6905 for (size_t i = 0; i < nb_meter->n_bands; i++) {
6906 struct nbrec_meter_band *nb_band = nb_meter->bands[i];
6907
6908 nb_bands[i].rate = nb_band->rate;
6909 nb_bands[i].burst_size = nb_band->burst_size;
6910 nb_bands[i].action = nb_band->action;
6911 }
6912 qsort(nb_bands, nb_meter->n_bands, sizeof *nb_bands, band_cmp);
6913
6914 /* Place the Southbound entries in sorted order. */
6915 struct band_entry *sb_bands;
6916 sb_bands = xmalloc(sizeof *sb_bands * sb_meter->n_bands);
6917 for (size_t i = 0; i < sb_meter->n_bands; i++) {
6918 struct sbrec_meter_band *sb_band = sb_meter->bands[i];
6919
6920 sb_bands[i].rate = sb_band->rate;
6921 sb_bands[i].burst_size = sb_band->burst_size;
6922 sb_bands[i].action = sb_band->action;
6923 }
6924 qsort(sb_bands, sb_meter->n_bands, sizeof *sb_bands, band_cmp);
6925
6926 bool need_update = false;
6927 for (size_t i = 0; i < nb_meter->n_bands; i++) {
6928 if (nb_bands[i].rate != sb_bands[i].rate
6929 || nb_bands[i].burst_size != sb_bands[i].burst_size
6930 || strcmp(nb_bands[i].action, nb_bands[i].action)) {
6931 need_update = true;
6932 goto done;
6933 }
6934 }
6935
6936 done:
6937 free(nb_bands);
6938 free(sb_bands);
6939
6940 return need_update;
6941 }
6942
6943 /* Each entry in the Meter and Meter_Band tables in OVN_Northbound have
6944 * a corresponding entries in the Meter and Meter_Band tables in
6945 * OVN_Southbound.
6946 */
6947 static void
6948 sync_meters(struct northd_context *ctx)
6949 {
6950 struct shash sb_meters = SHASH_INITIALIZER(&sb_meters);
6951
6952 const struct sbrec_meter *sb_meter;
6953 SBREC_METER_FOR_EACH (sb_meter, ctx->ovnsb_idl) {
6954 shash_add(&sb_meters, sb_meter->name, sb_meter);
6955 }
6956
6957 const struct nbrec_meter *nb_meter;
6958 NBREC_METER_FOR_EACH (nb_meter, ctx->ovnnb_idl) {
6959 bool new_sb_meter = false;
6960
6961 sb_meter = shash_find_and_delete(&sb_meters, nb_meter->name);
6962 if (!sb_meter) {
6963 sb_meter = sbrec_meter_insert(ctx->ovnsb_txn);
6964 sbrec_meter_set_name(sb_meter, nb_meter->name);
6965 new_sb_meter = true;
6966 }
6967
6968 if (new_sb_meter || bands_need_update(nb_meter, sb_meter)) {
6969 struct sbrec_meter_band **sb_bands;
6970 sb_bands = xcalloc(nb_meter->n_bands, sizeof *sb_bands);
6971 for (size_t i = 0; i < nb_meter->n_bands; i++) {
6972 const struct nbrec_meter_band *nb_band = nb_meter->bands[i];
6973
6974 sb_bands[i] = sbrec_meter_band_insert(ctx->ovnsb_txn);
6975
6976 sbrec_meter_band_set_action(sb_bands[i], nb_band->action);
6977 sbrec_meter_band_set_rate(sb_bands[i], nb_band->rate);
6978 sbrec_meter_band_set_burst_size(sb_bands[i],
6979 nb_band->burst_size);
6980 }
6981 sbrec_meter_set_bands(sb_meter, sb_bands, nb_meter->n_bands);
6982 free(sb_bands);
6983 }
6984
6985 sbrec_meter_set_unit(sb_meter, nb_meter->unit);
6986 }
6987
6988 struct shash_node *node, *next;
6989 SHASH_FOR_EACH_SAFE (node, next, &sb_meters) {
6990 sbrec_meter_delete(node->data);
6991 shash_delete(&sb_meters, node);
6992 }
6993 shash_destroy(&sb_meters);
6994 }
6995
6996 /*
6997 * struct 'dns_info' is used to sync the DNS records between OVN Northbound db
6998 * and Southbound db.
6999 */
7000 struct dns_info {
7001 struct hmap_node hmap_node;
7002 const struct nbrec_dns *nb_dns; /* DNS record in the Northbound db. */
7003 const struct sbrec_dns *sb_dns; /* DNS record in the Soutbound db. */
7004
7005 /* Datapaths to which the DNS entry is associated with it. */
7006 const struct sbrec_datapath_binding **sbs;
7007 size_t n_sbs;
7008 };
7009
7010 static inline struct dns_info *
7011 get_dns_info_from_hmap(struct hmap *dns_map, struct uuid *uuid)
7012 {
7013 struct dns_info *dns_info;
7014 size_t hash = uuid_hash(uuid);
7015 HMAP_FOR_EACH_WITH_HASH (dns_info, hmap_node, hash, dns_map) {
7016 if (uuid_equals(&dns_info->nb_dns->header_.uuid, uuid)) {
7017 return dns_info;
7018 }
7019 }
7020
7021 return NULL;
7022 }
7023
7024 static void
7025 sync_dns_entries(struct northd_context *ctx, struct hmap *datapaths)
7026 {
7027 struct hmap dns_map = HMAP_INITIALIZER(&dns_map);
7028 struct ovn_datapath *od;
7029 HMAP_FOR_EACH (od, key_node, datapaths) {
7030 if (!od->nbs || !od->nbs->n_dns_records) {
7031 continue;
7032 }
7033
7034 for (size_t i = 0; i < od->nbs->n_dns_records; i++) {
7035 struct dns_info *dns_info = get_dns_info_from_hmap(
7036 &dns_map, &od->nbs->dns_records[i]->header_.uuid);
7037 if (!dns_info) {
7038 size_t hash = uuid_hash(
7039 &od->nbs->dns_records[i]->header_.uuid);
7040 dns_info = xzalloc(sizeof *dns_info);;
7041 dns_info->nb_dns = od->nbs->dns_records[i];
7042 hmap_insert(&dns_map, &dns_info->hmap_node, hash);
7043 }
7044
7045 dns_info->n_sbs++;
7046 dns_info->sbs = xrealloc(dns_info->sbs,
7047 dns_info->n_sbs * sizeof *dns_info->sbs);
7048 dns_info->sbs[dns_info->n_sbs - 1] = od->sb;
7049 }
7050 }
7051
7052 const struct sbrec_dns *sbrec_dns, *next;
7053 SBREC_DNS_FOR_EACH_SAFE (sbrec_dns, next, ctx->ovnsb_idl) {
7054 const char *nb_dns_uuid = smap_get(&sbrec_dns->external_ids, "dns_id");
7055 struct uuid dns_uuid;
7056 if (!nb_dns_uuid || !uuid_from_string(&dns_uuid, nb_dns_uuid)) {
7057 sbrec_dns_delete(sbrec_dns);
7058 continue;
7059 }
7060
7061 struct dns_info *dns_info =
7062 get_dns_info_from_hmap(&dns_map, &dns_uuid);
7063 if (dns_info) {
7064 dns_info->sb_dns = sbrec_dns;
7065 } else {
7066 sbrec_dns_delete(sbrec_dns);
7067 }
7068 }
7069
7070 struct dns_info *dns_info;
7071 HMAP_FOR_EACH_POP (dns_info, hmap_node, &dns_map) {
7072 if (!dns_info->sb_dns) {
7073 sbrec_dns = sbrec_dns_insert(ctx->ovnsb_txn);
7074 dns_info->sb_dns = sbrec_dns;
7075 char *dns_id = xasprintf(
7076 UUID_FMT, UUID_ARGS(&dns_info->nb_dns->header_.uuid));
7077 const struct smap external_ids =
7078 SMAP_CONST1(&external_ids, "dns_id", dns_id);
7079 sbrec_dns_set_external_ids(sbrec_dns, &external_ids);
7080 free(dns_id);
7081 }
7082
7083 /* Set the datapaths and records. If nothing has changed, then
7084 * this will be a no-op.
7085 */
7086 sbrec_dns_set_datapaths(
7087 dns_info->sb_dns,
7088 (struct sbrec_datapath_binding **)dns_info->sbs,
7089 dns_info->n_sbs);
7090 sbrec_dns_set_records(dns_info->sb_dns, &dns_info->nb_dns->records);
7091 free(dns_info->sbs);
7092 free(dns_info);
7093 }
7094 hmap_destroy(&dns_map);
7095 }
7096
7097
7098 \f
7099 static void
7100 ovnnb_db_run(struct northd_context *ctx,
7101 struct ovsdb_idl_index *sbrec_chassis_by_name,
7102 struct ovsdb_idl_loop *sb_loop)
7103 {
7104 if (!ctx->ovnsb_txn || !ctx->ovnnb_txn) {
7105 return;
7106 }
7107 struct hmap datapaths, ports, port_groups;
7108 build_datapaths(ctx, &datapaths);
7109 build_ports(ctx, sbrec_chassis_by_name, &datapaths, &ports);
7110 build_ipam(&datapaths, &ports);
7111 build_port_group_lswitches(ctx, &port_groups, &ports);
7112 build_lflows(ctx, &datapaths, &ports, &port_groups);
7113
7114 sync_address_sets(ctx);
7115 sync_port_groups(ctx);
7116 sync_meters(ctx);
7117 sync_dns_entries(ctx, &datapaths);
7118
7119 struct ovn_port_group *pg, *next_pg;
7120 HMAP_FOR_EACH_SAFE (pg, next_pg, key_node, &port_groups) {
7121 ovn_port_group_destroy(&port_groups, pg);
7122 }
7123 hmap_destroy(&port_groups);
7124
7125 struct ovn_datapath *dp, *next_dp;
7126 HMAP_FOR_EACH_SAFE (dp, next_dp, key_node, &datapaths) {
7127 ovn_datapath_destroy(&datapaths, dp);
7128 }
7129 hmap_destroy(&datapaths);
7130
7131 struct ovn_port *port, *next_port;
7132 HMAP_FOR_EACH_SAFE (port, next_port, key_node, &ports) {
7133 ovn_port_destroy(&ports, port);
7134 }
7135 hmap_destroy(&ports);
7136
7137 /* Copy nb_cfg from northbound to southbound database.
7138 *
7139 * Also set up to update sb_cfg once our southbound transaction commits. */
7140 const struct nbrec_nb_global *nb = nbrec_nb_global_first(ctx->ovnnb_idl);
7141 if (!nb) {
7142 nb = nbrec_nb_global_insert(ctx->ovnnb_txn);
7143 }
7144 const struct sbrec_sb_global *sb = sbrec_sb_global_first(ctx->ovnsb_idl);
7145 if (!sb) {
7146 sb = sbrec_sb_global_insert(ctx->ovnsb_txn);
7147 }
7148 sbrec_sb_global_set_nb_cfg(sb, nb->nb_cfg);
7149 sbrec_sb_global_set_options(sb, &nb->options);
7150 sb_loop->next_cfg = nb->nb_cfg;
7151
7152 cleanup_macam(&macam);
7153 }
7154
7155 /* Handle changes to the 'chassis' column of the 'Port_Binding' table. When
7156 * this column is not empty, it means we need to set the corresponding logical
7157 * port as 'up' in the northbound DB. */
7158 static void
7159 update_logical_port_status(struct northd_context *ctx)
7160 {
7161 struct hmap lports_hmap;
7162 const struct sbrec_port_binding *sb;
7163 const struct nbrec_logical_switch_port *nbsp;
7164
7165 struct lport_hash_node {
7166 struct hmap_node node;
7167 const struct nbrec_logical_switch_port *nbsp;
7168 } *hash_node;
7169
7170 hmap_init(&lports_hmap);
7171
7172 NBREC_LOGICAL_SWITCH_PORT_FOR_EACH(nbsp, ctx->ovnnb_idl) {
7173 hash_node = xzalloc(sizeof *hash_node);
7174 hash_node->nbsp = nbsp;
7175 hmap_insert(&lports_hmap, &hash_node->node, hash_string(nbsp->name, 0));
7176 }
7177
7178 SBREC_PORT_BINDING_FOR_EACH(sb, ctx->ovnsb_idl) {
7179 nbsp = NULL;
7180 HMAP_FOR_EACH_WITH_HASH(hash_node, node,
7181 hash_string(sb->logical_port, 0),
7182 &lports_hmap) {
7183 if (!strcmp(sb->logical_port, hash_node->nbsp->name)) {
7184 nbsp = hash_node->nbsp;
7185 break;
7186 }
7187 }
7188
7189 if (!nbsp) {
7190 /* The logical port doesn't exist for this port binding. This can
7191 * happen under normal circumstances when ovn-northd hasn't gotten
7192 * around to pruning the Port_Binding yet. */
7193 continue;
7194 }
7195
7196 bool up = (sb->chassis || !strcmp(nbsp->type, "router"));
7197 if (!nbsp->up || *nbsp->up != up) {
7198 nbrec_logical_switch_port_set_up(nbsp, &up, 1);
7199 }
7200 }
7201
7202 HMAP_FOR_EACH_POP(hash_node, node, &lports_hmap) {
7203 free(hash_node);
7204 }
7205 hmap_destroy(&lports_hmap);
7206 }
7207
7208 static struct gen_opts_map supported_dhcp_opts[] = {
7209 OFFERIP,
7210 DHCP_OPT_NETMASK,
7211 DHCP_OPT_ROUTER,
7212 DHCP_OPT_DNS_SERVER,
7213 DHCP_OPT_LOG_SERVER,
7214 DHCP_OPT_LPR_SERVER,
7215 DHCP_OPT_SWAP_SERVER,
7216 DHCP_OPT_POLICY_FILTER,
7217 DHCP_OPT_ROUTER_SOLICITATION,
7218 DHCP_OPT_NIS_SERVER,
7219 DHCP_OPT_NTP_SERVER,
7220 DHCP_OPT_SERVER_ID,
7221 DHCP_OPT_TFTP_SERVER,
7222 DHCP_OPT_CLASSLESS_STATIC_ROUTE,
7223 DHCP_OPT_MS_CLASSLESS_STATIC_ROUTE,
7224 DHCP_OPT_IP_FORWARD_ENABLE,
7225 DHCP_OPT_ROUTER_DISCOVERY,
7226 DHCP_OPT_ETHERNET_ENCAP,
7227 DHCP_OPT_DEFAULT_TTL,
7228 DHCP_OPT_TCP_TTL,
7229 DHCP_OPT_MTU,
7230 DHCP_OPT_LEASE_TIME,
7231 DHCP_OPT_T1,
7232 DHCP_OPT_T2,
7233 DHCP_OPT_WPAD,
7234 };
7235
7236 static struct gen_opts_map supported_dhcpv6_opts[] = {
7237 DHCPV6_OPT_IA_ADDR,
7238 DHCPV6_OPT_SERVER_ID,
7239 DHCPV6_OPT_DOMAIN_SEARCH,
7240 DHCPV6_OPT_DNS_SERVER
7241 };
7242
7243 static void
7244 check_and_add_supported_dhcp_opts_to_sb_db(struct northd_context *ctx)
7245 {
7246 struct hmap dhcp_opts_to_add = HMAP_INITIALIZER(&dhcp_opts_to_add);
7247 for (size_t i = 0; (i < sizeof(supported_dhcp_opts) /
7248 sizeof(supported_dhcp_opts[0])); i++) {
7249 hmap_insert(&dhcp_opts_to_add, &supported_dhcp_opts[i].hmap_node,
7250 dhcp_opt_hash(supported_dhcp_opts[i].name));
7251 }
7252
7253 const struct sbrec_dhcp_options *opt_row, *opt_row_next;
7254 SBREC_DHCP_OPTIONS_FOR_EACH_SAFE(opt_row, opt_row_next, ctx->ovnsb_idl) {
7255 struct gen_opts_map *dhcp_opt =
7256 dhcp_opts_find(&dhcp_opts_to_add, opt_row->name);
7257 if (dhcp_opt) {
7258 hmap_remove(&dhcp_opts_to_add, &dhcp_opt->hmap_node);
7259 } else {
7260 sbrec_dhcp_options_delete(opt_row);
7261 }
7262 }
7263
7264 struct gen_opts_map *opt;
7265 HMAP_FOR_EACH (opt, hmap_node, &dhcp_opts_to_add) {
7266 struct sbrec_dhcp_options *sbrec_dhcp_option =
7267 sbrec_dhcp_options_insert(ctx->ovnsb_txn);
7268 sbrec_dhcp_options_set_name(sbrec_dhcp_option, opt->name);
7269 sbrec_dhcp_options_set_code(sbrec_dhcp_option, opt->code);
7270 sbrec_dhcp_options_set_type(sbrec_dhcp_option, opt->type);
7271 }
7272
7273 hmap_destroy(&dhcp_opts_to_add);
7274 }
7275
7276 static void
7277 check_and_add_supported_dhcpv6_opts_to_sb_db(struct northd_context *ctx)
7278 {
7279 struct hmap dhcpv6_opts_to_add = HMAP_INITIALIZER(&dhcpv6_opts_to_add);
7280 for (size_t i = 0; (i < sizeof(supported_dhcpv6_opts) /
7281 sizeof(supported_dhcpv6_opts[0])); i++) {
7282 hmap_insert(&dhcpv6_opts_to_add, &supported_dhcpv6_opts[i].hmap_node,
7283 dhcp_opt_hash(supported_dhcpv6_opts[i].name));
7284 }
7285
7286 const struct sbrec_dhcpv6_options *opt_row, *opt_row_next;
7287 SBREC_DHCPV6_OPTIONS_FOR_EACH_SAFE(opt_row, opt_row_next, ctx->ovnsb_idl) {
7288 struct gen_opts_map *dhcp_opt =
7289 dhcp_opts_find(&dhcpv6_opts_to_add, opt_row->name);
7290 if (dhcp_opt) {
7291 hmap_remove(&dhcpv6_opts_to_add, &dhcp_opt->hmap_node);
7292 } else {
7293 sbrec_dhcpv6_options_delete(opt_row);
7294 }
7295 }
7296
7297 struct gen_opts_map *opt;
7298 HMAP_FOR_EACH(opt, hmap_node, &dhcpv6_opts_to_add) {
7299 struct sbrec_dhcpv6_options *sbrec_dhcpv6_option =
7300 sbrec_dhcpv6_options_insert(ctx->ovnsb_txn);
7301 sbrec_dhcpv6_options_set_name(sbrec_dhcpv6_option, opt->name);
7302 sbrec_dhcpv6_options_set_code(sbrec_dhcpv6_option, opt->code);
7303 sbrec_dhcpv6_options_set_type(sbrec_dhcpv6_option, opt->type);
7304 }
7305
7306 hmap_destroy(&dhcpv6_opts_to_add);
7307 }
7308
7309 static const char *rbac_chassis_auth[] =
7310 {"name"};
7311 static const char *rbac_chassis_update[] =
7312 {"nb_cfg", "external_ids", "encaps", "vtep_logical_switches"};
7313
7314 static const char *rbac_encap_auth[] =
7315 {"chassis_name"};
7316 static const char *rbac_encap_update[] =
7317 {"type", "options", "ip"};
7318
7319 static const char *rbac_port_binding_auth[] =
7320 {""};
7321 static const char *rbac_port_binding_update[] =
7322 {"chassis"};
7323
7324 static const char *rbac_mac_binding_auth[] =
7325 {""};
7326 static const char *rbac_mac_binding_update[] =
7327 {"logical_port", "ip", "mac", "datapath"};
7328
7329 static struct rbac_perm_cfg {
7330 const char *table;
7331 const char **auth;
7332 int n_auth;
7333 bool insdel;
7334 const char **update;
7335 int n_update;
7336 const struct sbrec_rbac_permission *row;
7337 } rbac_perm_cfg[] = {
7338 {
7339 .table = "Chassis",
7340 .auth = rbac_chassis_auth,
7341 .n_auth = ARRAY_SIZE(rbac_chassis_auth),
7342 .insdel = true,
7343 .update = rbac_chassis_update,
7344 .n_update = ARRAY_SIZE(rbac_chassis_update),
7345 .row = NULL
7346 },{
7347 .table = "Encap",
7348 .auth = rbac_encap_auth,
7349 .n_auth = ARRAY_SIZE(rbac_encap_auth),
7350 .insdel = true,
7351 .update = rbac_encap_update,
7352 .n_update = ARRAY_SIZE(rbac_encap_update),
7353 .row = NULL
7354 },{
7355 .table = "Port_Binding",
7356 .auth = rbac_port_binding_auth,
7357 .n_auth = ARRAY_SIZE(rbac_port_binding_auth),
7358 .insdel = false,
7359 .update = rbac_port_binding_update,
7360 .n_update = ARRAY_SIZE(rbac_port_binding_update),
7361 .row = NULL
7362 },{
7363 .table = "MAC_Binding",
7364 .auth = rbac_mac_binding_auth,
7365 .n_auth = ARRAY_SIZE(rbac_mac_binding_auth),
7366 .insdel = true,
7367 .update = rbac_mac_binding_update,
7368 .n_update = ARRAY_SIZE(rbac_mac_binding_update),
7369 .row = NULL
7370 },{
7371 .table = NULL,
7372 .auth = NULL,
7373 .n_auth = 0,
7374 .insdel = false,
7375 .update = NULL,
7376 .n_update = 0,
7377 .row = NULL
7378 }
7379 };
7380
7381 static bool
7382 ovn_rbac_validate_perm(const struct sbrec_rbac_permission *perm)
7383 {
7384 struct rbac_perm_cfg *pcfg;
7385 int i, j, n_found;
7386
7387 for (pcfg = rbac_perm_cfg; pcfg->table; pcfg++) {
7388 if (!strcmp(perm->table, pcfg->table)) {
7389 break;
7390 }
7391 }
7392 if (!pcfg->table) {
7393 return false;
7394 }
7395 if (perm->n_authorization != pcfg->n_auth ||
7396 perm->n_update != pcfg->n_update) {
7397 return false;
7398 }
7399 if (perm->insert_delete != pcfg->insdel) {
7400 return false;
7401 }
7402 /* verify perm->authorization vs. pcfg->auth */
7403 n_found = 0;
7404 for (i = 0; i < pcfg->n_auth; i++) {
7405 for (j = 0; j < perm->n_authorization; j++) {
7406 if (!strcmp(pcfg->auth[i], perm->authorization[j])) {
7407 n_found++;
7408 break;
7409 }
7410 }
7411 }
7412 if (n_found != pcfg->n_auth) {
7413 return false;
7414 }
7415
7416 /* verify perm->update vs. pcfg->update */
7417 n_found = 0;
7418 for (i = 0; i < pcfg->n_update; i++) {
7419 for (j = 0; j < perm->n_update; j++) {
7420 if (!strcmp(pcfg->update[i], perm->update[j])) {
7421 n_found++;
7422 break;
7423 }
7424 }
7425 }
7426 if (n_found != pcfg->n_update) {
7427 return false;
7428 }
7429
7430 /* Success, db state matches expected state */
7431 pcfg->row = perm;
7432 return true;
7433 }
7434
7435 static void
7436 ovn_rbac_create_perm(struct rbac_perm_cfg *pcfg,
7437 struct northd_context *ctx,
7438 const struct sbrec_rbac_role *rbac_role)
7439 {
7440 struct sbrec_rbac_permission *rbac_perm;
7441
7442 rbac_perm = sbrec_rbac_permission_insert(ctx->ovnsb_txn);
7443 sbrec_rbac_permission_set_table(rbac_perm, pcfg->table);
7444 sbrec_rbac_permission_set_authorization(rbac_perm,
7445 pcfg->auth,
7446 pcfg->n_auth);
7447 sbrec_rbac_permission_set_insert_delete(rbac_perm, pcfg->insdel);
7448 sbrec_rbac_permission_set_update(rbac_perm,
7449 pcfg->update,
7450 pcfg->n_update);
7451 sbrec_rbac_role_update_permissions_setkey(rbac_role, pcfg->table,
7452 rbac_perm);
7453 }
7454
7455 static void
7456 check_and_update_rbac(struct northd_context *ctx)
7457 {
7458 const struct sbrec_rbac_role *rbac_role = NULL;
7459 const struct sbrec_rbac_permission *perm_row, *perm_next;
7460 const struct sbrec_rbac_role *role_row, *role_row_next;
7461 struct rbac_perm_cfg *pcfg;
7462
7463 for (pcfg = rbac_perm_cfg; pcfg->table; pcfg++) {
7464 pcfg->row = NULL;
7465 }
7466
7467 SBREC_RBAC_PERMISSION_FOR_EACH_SAFE (perm_row, perm_next, ctx->ovnsb_idl) {
7468 if (!ovn_rbac_validate_perm(perm_row)) {
7469 sbrec_rbac_permission_delete(perm_row);
7470 }
7471 }
7472 SBREC_RBAC_ROLE_FOR_EACH_SAFE (role_row, role_row_next, ctx->ovnsb_idl) {
7473 if (strcmp(role_row->name, "ovn-controller")) {
7474 sbrec_rbac_role_delete(role_row);
7475 } else {
7476 rbac_role = role_row;
7477 }
7478 }
7479
7480 if (!rbac_role) {
7481 rbac_role = sbrec_rbac_role_insert(ctx->ovnsb_txn);
7482 sbrec_rbac_role_set_name(rbac_role, "ovn-controller");
7483 }
7484
7485 for (pcfg = rbac_perm_cfg; pcfg->table; pcfg++) {
7486 if (!pcfg->row) {
7487 ovn_rbac_create_perm(pcfg, ctx, rbac_role);
7488 }
7489 }
7490 }
7491
7492 /* Updates the sb_cfg and hv_cfg columns in the northbound NB_Global table. */
7493 static void
7494 update_northbound_cfg(struct northd_context *ctx,
7495 struct ovsdb_idl_loop *sb_loop)
7496 {
7497 /* Update northbound sb_cfg if appropriate. */
7498 const struct nbrec_nb_global *nbg = nbrec_nb_global_first(ctx->ovnnb_idl);
7499 int64_t sb_cfg = sb_loop->cur_cfg;
7500 if (nbg && sb_cfg && nbg->sb_cfg != sb_cfg) {
7501 nbrec_nb_global_set_sb_cfg(nbg, sb_cfg);
7502 }
7503
7504 /* Update northbound hv_cfg if appropriate. */
7505 if (nbg) {
7506 /* Find minimum nb_cfg among all chassis. */
7507 const struct sbrec_chassis *chassis;
7508 int64_t hv_cfg = nbg->nb_cfg;
7509 SBREC_CHASSIS_FOR_EACH (chassis, ctx->ovnsb_idl) {
7510 if (chassis->nb_cfg < hv_cfg) {
7511 hv_cfg = chassis->nb_cfg;
7512 }
7513 }
7514
7515 /* Update hv_cfg. */
7516 if (nbg->hv_cfg != hv_cfg) {
7517 nbrec_nb_global_set_hv_cfg(nbg, hv_cfg);
7518 }
7519 }
7520 }
7521
7522 /* Handle a fairly small set of changes in the southbound database. */
7523 static void
7524 ovnsb_db_run(struct northd_context *ctx, struct ovsdb_idl_loop *sb_loop)
7525 {
7526 if (!ctx->ovnnb_txn || !ovsdb_idl_has_ever_connected(ctx->ovnsb_idl)) {
7527 return;
7528 }
7529
7530 update_logical_port_status(ctx);
7531 update_northbound_cfg(ctx, sb_loop);
7532 }
7533 \f
7534 static void
7535 parse_options(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
7536 {
7537 enum {
7538 DAEMON_OPTION_ENUMS,
7539 VLOG_OPTION_ENUMS,
7540 SSL_OPTION_ENUMS,
7541 };
7542 static const struct option long_options[] = {
7543 {"ovnsb-db", required_argument, NULL, 'd'},
7544 {"ovnnb-db", required_argument, NULL, 'D'},
7545 {"unixctl", required_argument, NULL, 'u'},
7546 {"help", no_argument, NULL, 'h'},
7547 {"options", no_argument, NULL, 'o'},
7548 {"version", no_argument, NULL, 'V'},
7549 DAEMON_LONG_OPTIONS,
7550 VLOG_LONG_OPTIONS,
7551 STREAM_SSL_LONG_OPTIONS,
7552 {NULL, 0, NULL, 0},
7553 };
7554 char *short_options = ovs_cmdl_long_options_to_short_options(long_options);
7555
7556 for (;;) {
7557 int c;
7558
7559 c = getopt_long(argc, argv, short_options, long_options, NULL);
7560 if (c == -1) {
7561 break;
7562 }
7563
7564 switch (c) {
7565 DAEMON_OPTION_HANDLERS;
7566 VLOG_OPTION_HANDLERS;
7567 STREAM_SSL_OPTION_HANDLERS;
7568
7569 case 'd':
7570 ovnsb_db = optarg;
7571 break;
7572
7573 case 'D':
7574 ovnnb_db = optarg;
7575 break;
7576
7577 case 'u':
7578 unixctl_path = optarg;
7579 break;
7580
7581 case 'h':
7582 usage();
7583 exit(EXIT_SUCCESS);
7584
7585 case 'o':
7586 ovs_cmdl_print_options(long_options);
7587 exit(EXIT_SUCCESS);
7588
7589 case 'V':
7590 ovs_print_version(0, 0);
7591 exit(EXIT_SUCCESS);
7592
7593 default:
7594 break;
7595 }
7596 }
7597
7598 if (!ovnsb_db) {
7599 ovnsb_db = default_sb_db();
7600 }
7601
7602 if (!ovnnb_db) {
7603 ovnnb_db = default_nb_db();
7604 }
7605
7606 free(short_options);
7607 }
7608
7609 static void
7610 add_column_noalert(struct ovsdb_idl *idl,
7611 const struct ovsdb_idl_column *column)
7612 {
7613 ovsdb_idl_add_column(idl, column);
7614 ovsdb_idl_omit_alert(idl, column);
7615 }
7616
7617 int
7618 main(int argc, char *argv[])
7619 {
7620 int res = EXIT_SUCCESS;
7621 struct unixctl_server *unixctl;
7622 int retval;
7623 bool exiting;
7624
7625 fatal_ignore_sigpipe();
7626 ovs_cmdl_proctitle_init(argc, argv);
7627 set_program_name(argv[0]);
7628 service_start(&argc, &argv);
7629 parse_options(argc, argv);
7630
7631 daemonize_start(false);
7632
7633 retval = unixctl_server_create(unixctl_path, &unixctl);
7634 if (retval) {
7635 exit(EXIT_FAILURE);
7636 }
7637 unixctl_command_register("exit", "", 0, 0, ovn_northd_exit, &exiting);
7638
7639 daemonize_complete();
7640
7641 /* We want to detect (almost) all changes to the ovn-nb db. */
7642 struct ovsdb_idl_loop ovnnb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
7643 ovsdb_idl_create(ovnnb_db, &nbrec_idl_class, true, true));
7644 ovsdb_idl_omit_alert(ovnnb_idl_loop.idl, &nbrec_nb_global_col_sb_cfg);
7645 ovsdb_idl_omit_alert(ovnnb_idl_loop.idl, &nbrec_nb_global_col_hv_cfg);
7646
7647 /* We want to detect only selected changes to the ovn-sb db. */
7648 struct ovsdb_idl_loop ovnsb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
7649 ovsdb_idl_create(ovnsb_db, &sbrec_idl_class, false, true));
7650
7651 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_sb_global);
7652 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_sb_global_col_nb_cfg);
7653 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_sb_global_col_options);
7654
7655 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_logical_flow);
7656 add_column_noalert(ovnsb_idl_loop.idl,
7657 &sbrec_logical_flow_col_logical_datapath);
7658 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_pipeline);
7659 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_table_id);
7660 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_priority);
7661 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_match);
7662 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_actions);
7663
7664 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_multicast_group);
7665 add_column_noalert(ovnsb_idl_loop.idl,
7666 &sbrec_multicast_group_col_datapath);
7667 add_column_noalert(ovnsb_idl_loop.idl,
7668 &sbrec_multicast_group_col_tunnel_key);
7669 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_name);
7670 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_ports);
7671
7672 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_datapath_binding);
7673 add_column_noalert(ovnsb_idl_loop.idl,
7674 &sbrec_datapath_binding_col_tunnel_key);
7675 add_column_noalert(ovnsb_idl_loop.idl,
7676 &sbrec_datapath_binding_col_external_ids);
7677
7678 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_port_binding);
7679 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_datapath);
7680 add_column_noalert(ovnsb_idl_loop.idl,
7681 &sbrec_port_binding_col_logical_port);
7682 add_column_noalert(ovnsb_idl_loop.idl,
7683 &sbrec_port_binding_col_tunnel_key);
7684 add_column_noalert(ovnsb_idl_loop.idl,
7685 &sbrec_port_binding_col_parent_port);
7686 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_tag);
7687 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_type);
7688 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_options);
7689 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_mac);
7690 add_column_noalert(ovnsb_idl_loop.idl,
7691 &sbrec_port_binding_col_nat_addresses);
7692 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_port_binding_col_chassis);
7693 ovsdb_idl_add_column(ovnsb_idl_loop.idl,
7694 &sbrec_port_binding_col_gateway_chassis);
7695 ovsdb_idl_add_column(ovnsb_idl_loop.idl,
7696 &sbrec_gateway_chassis_col_chassis);
7697 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_gateway_chassis_col_name);
7698 ovsdb_idl_add_column(ovnsb_idl_loop.idl,
7699 &sbrec_gateway_chassis_col_priority);
7700 ovsdb_idl_add_column(ovnsb_idl_loop.idl,
7701 &sbrec_gateway_chassis_col_external_ids);
7702 ovsdb_idl_add_column(ovnsb_idl_loop.idl,
7703 &sbrec_gateway_chassis_col_options);
7704 add_column_noalert(ovnsb_idl_loop.idl,
7705 &sbrec_port_binding_col_external_ids);
7706 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_mac_binding);
7707 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_datapath);
7708 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_ip);
7709 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_mac);
7710 add_column_noalert(ovnsb_idl_loop.idl,
7711 &sbrec_mac_binding_col_logical_port);
7712 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_dhcp_options);
7713 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_code);
7714 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_type);
7715 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_name);
7716 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_dhcpv6_options);
7717 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_code);
7718 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_type);
7719 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_name);
7720 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_address_set);
7721 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_name);
7722 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_addresses);
7723 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_port_group);
7724 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_group_col_name);
7725 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_group_col_ports);
7726
7727 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_dns);
7728 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dns_col_datapaths);
7729 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dns_col_records);
7730 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dns_col_external_ids);
7731
7732 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_rbac_role);
7733 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_rbac_role_col_name);
7734 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_rbac_role_col_permissions);
7735
7736 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_rbac_permission);
7737 add_column_noalert(ovnsb_idl_loop.idl,
7738 &sbrec_rbac_permission_col_table);
7739 add_column_noalert(ovnsb_idl_loop.idl,
7740 &sbrec_rbac_permission_col_authorization);
7741 add_column_noalert(ovnsb_idl_loop.idl,
7742 &sbrec_rbac_permission_col_insert_delete);
7743 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_rbac_permission_col_update);
7744
7745 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_meter);
7746 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_meter_col_name);
7747 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_meter_col_unit);
7748 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_meter_col_bands);
7749
7750 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_meter_band);
7751 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_meter_band_col_action);
7752 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_meter_band_col_rate);
7753 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_meter_band_col_burst_size);
7754
7755 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_chassis);
7756 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_nb_cfg);
7757 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_name);
7758
7759 struct ovsdb_idl_index *sbrec_chassis_by_name
7760 = chassis_index_create(ovnsb_idl_loop.idl);
7761
7762 /* Ensure that only a single ovn-northd is active in the deployment by
7763 * acquiring a lock called "ovn_northd" on the southbound database
7764 * and then only performing DB transactions if the lock is held. */
7765 ovsdb_idl_set_lock(ovnsb_idl_loop.idl, "ovn_northd");
7766 bool had_lock = false;
7767
7768 /* Main loop. */
7769 exiting = false;
7770 while (!exiting) {
7771 struct northd_context ctx = {
7772 .ovnnb_idl = ovnnb_idl_loop.idl,
7773 .ovnnb_txn = ovsdb_idl_loop_run(&ovnnb_idl_loop),
7774 .ovnsb_idl = ovnsb_idl_loop.idl,
7775 .ovnsb_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop),
7776 };
7777
7778 if (!had_lock && ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) {
7779 VLOG_INFO("ovn-northd lock acquired. "
7780 "This ovn-northd instance is now active.");
7781 had_lock = true;
7782 } else if (had_lock && !ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) {
7783 VLOG_INFO("ovn-northd lock lost. "
7784 "This ovn-northd instance is now on standby.");
7785 had_lock = false;
7786 }
7787
7788 if (ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) {
7789 ovnnb_db_run(&ctx, sbrec_chassis_by_name, &ovnsb_idl_loop);
7790 ovnsb_db_run(&ctx, &ovnsb_idl_loop);
7791 if (ctx.ovnsb_txn) {
7792 check_and_add_supported_dhcp_opts_to_sb_db(&ctx);
7793 check_and_add_supported_dhcpv6_opts_to_sb_db(&ctx);
7794 check_and_update_rbac(&ctx);
7795 }
7796 }
7797
7798 unixctl_server_run(unixctl);
7799 unixctl_server_wait(unixctl);
7800 if (exiting) {
7801 poll_immediate_wake();
7802 }
7803 ovsdb_idl_loop_commit_and_wait(&ovnnb_idl_loop);
7804 ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);
7805
7806 poll_block();
7807 if (should_service_stop()) {
7808 exiting = true;
7809 }
7810 }
7811
7812 unixctl_server_destroy(unixctl);
7813 ovsdb_idl_loop_destroy(&ovnnb_idl_loop);
7814 ovsdb_idl_loop_destroy(&ovnsb_idl_loop);
7815 service_stop();
7816
7817 exit(res);
7818 }
7819
7820 static void
7821 ovn_northd_exit(struct unixctl_conn *conn, int argc OVS_UNUSED,
7822 const char *argv[] OVS_UNUSED, void *exiting_)
7823 {
7824 bool *exiting = exiting_;
7825 *exiting = true;
7826
7827 unixctl_command_reply(conn, NULL);
7828 }