]> git.proxmox.com Git - mirror_ovs.git/blob - ovn/northd/ovn-northd.c
ovn: add rbac tables to ovn southbound schema
[mirror_ovs.git] / ovn / northd / ovn-northd.c
1 /*
2 * Licensed under the Apache License, Version 2.0 (the "License");
3 * you may not use this file except in compliance with the License.
4 * You may obtain a copy of the License at:
5 *
6 * http://www.apache.org/licenses/LICENSE-2.0
7 *
8 * Unless required by applicable law or agreed to in writing, software
9 * distributed under the License is distributed on an "AS IS" BASIS,
10 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 * See the License for the specific language governing permissions and
12 * limitations under the License.
13 */
14
15 #include <config.h>
16
17 #include <getopt.h>
18 #include <stdlib.h>
19 #include <stdio.h>
20
21 #include "bitmap.h"
22 #include "command-line.h"
23 #include "daemon.h"
24 #include "dirs.h"
25 #include "openvswitch/dynamic-string.h"
26 #include "fatal-signal.h"
27 #include "hash.h"
28 #include "openvswitch/hmap.h"
29 #include "openvswitch/json.h"
30 #include "ovn/lex.h"
31 #include "ovn/lib/logical-fields.h"
32 #include "ovn/lib/ovn-dhcp.h"
33 #include "ovn/lib/ovn-nb-idl.h"
34 #include "ovn/lib/ovn-sb-idl.h"
35 #include "ovn/lib/ovn-util.h"
36 #include "ovn/actions.h"
37 #include "packets.h"
38 #include "poll-loop.h"
39 #include "smap.h"
40 #include "sset.h"
41 #include "stream.h"
42 #include "stream-ssl.h"
43 #include "unixctl.h"
44 #include "util.h"
45 #include "uuid.h"
46 #include "openvswitch/vlog.h"
47
48 VLOG_DEFINE_THIS_MODULE(ovn_northd);
49
50 static unixctl_cb_func ovn_northd_exit;
51
52 struct northd_context {
53 struct ovsdb_idl *ovnnb_idl;
54 struct ovsdb_idl *ovnsb_idl;
55 struct ovsdb_idl_txn *ovnnb_txn;
56 struct ovsdb_idl_txn *ovnsb_txn;
57 };
58
59 static const char *ovnnb_db;
60 static const char *ovnsb_db;
61
62 #define MAC_ADDR_PREFIX 0x0A0000000000ULL
63 #define MAC_ADDR_SPACE 0xffffff
64
65 /* MAC address management (macam) table of "struct eth_addr"s, that holds the
66 * MAC addresses allocated by the OVN ipam module. */
67 static struct hmap macam = HMAP_INITIALIZER(&macam);
68
69 #define MAX_OVN_TAGS 4096
70 \f
71 /* Pipeline stages. */
72
73 /* The two pipelines in an OVN logical flow table. */
74 enum ovn_pipeline {
75 P_IN, /* Ingress pipeline. */
76 P_OUT /* Egress pipeline. */
77 };
78
79 /* The two purposes for which ovn-northd uses OVN logical datapaths. */
80 enum ovn_datapath_type {
81 DP_SWITCH, /* OVN logical switch. */
82 DP_ROUTER /* OVN logical router. */
83 };
84
85 /* Returns an "enum ovn_stage" built from the arguments.
86 *
87 * (It's better to use ovn_stage_build() for type-safety reasons, but inline
88 * functions can't be used in enums or switch cases.) */
89 #define OVN_STAGE_BUILD(DP_TYPE, PIPELINE, TABLE) \
90 (((DP_TYPE) << 9) | ((PIPELINE) << 8) | (TABLE))
91
92 /* A stage within an OVN logical switch or router.
93 *
94 * An "enum ovn_stage" indicates whether the stage is part of a logical switch
95 * or router, whether the stage is part of the ingress or egress pipeline, and
96 * the table within that pipeline. The first three components are combined to
97 * form the stage's full name, e.g. S_SWITCH_IN_PORT_SEC_L2,
98 * S_ROUTER_OUT_DELIVERY. */
99 enum ovn_stage {
100 #define PIPELINE_STAGES \
101 /* Logical switch ingress stages. */ \
102 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_L2, 0, "ls_in_port_sec_l2") \
103 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_IP, 1, "ls_in_port_sec_ip") \
104 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_ND, 2, "ls_in_port_sec_nd") \
105 PIPELINE_STAGE(SWITCH, IN, PRE_ACL, 3, "ls_in_pre_acl") \
106 PIPELINE_STAGE(SWITCH, IN, PRE_LB, 4, "ls_in_pre_lb") \
107 PIPELINE_STAGE(SWITCH, IN, PRE_STATEFUL, 5, "ls_in_pre_stateful") \
108 PIPELINE_STAGE(SWITCH, IN, ACL, 6, "ls_in_acl") \
109 PIPELINE_STAGE(SWITCH, IN, QOS_MARK, 7, "ls_in_qos_mark") \
110 PIPELINE_STAGE(SWITCH, IN, LB, 8, "ls_in_lb") \
111 PIPELINE_STAGE(SWITCH, IN, STATEFUL, 9, "ls_in_stateful") \
112 PIPELINE_STAGE(SWITCH, IN, ARP_ND_RSP, 10, "ls_in_arp_rsp") \
113 PIPELINE_STAGE(SWITCH, IN, DHCP_OPTIONS, 11, "ls_in_dhcp_options") \
114 PIPELINE_STAGE(SWITCH, IN, DHCP_RESPONSE, 12, "ls_in_dhcp_response") \
115 PIPELINE_STAGE(SWITCH, IN, DNS_LOOKUP, 13, "ls_in_dns_lookup") \
116 PIPELINE_STAGE(SWITCH, IN, DNS_RESPONSE, 14, "ls_in_dns_response") \
117 PIPELINE_STAGE(SWITCH, IN, L2_LKUP, 15, "ls_in_l2_lkup") \
118 \
119 /* Logical switch egress stages. */ \
120 PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 0, "ls_out_pre_lb") \
121 PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 1, "ls_out_pre_acl") \
122 PIPELINE_STAGE(SWITCH, OUT, PRE_STATEFUL, 2, "ls_out_pre_stateful") \
123 PIPELINE_STAGE(SWITCH, OUT, LB, 3, "ls_out_lb") \
124 PIPELINE_STAGE(SWITCH, OUT, ACL, 4, "ls_out_acl") \
125 PIPELINE_STAGE(SWITCH, OUT, QOS_MARK, 5, "ls_out_qos_mark") \
126 PIPELINE_STAGE(SWITCH, OUT, STATEFUL, 6, "ls_out_stateful") \
127 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_IP, 7, "ls_out_port_sec_ip") \
128 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_L2, 8, "ls_out_port_sec_l2") \
129 \
130 /* Logical router ingress stages. */ \
131 PIPELINE_STAGE(ROUTER, IN, ADMISSION, 0, "lr_in_admission") \
132 PIPELINE_STAGE(ROUTER, IN, IP_INPUT, 1, "lr_in_ip_input") \
133 PIPELINE_STAGE(ROUTER, IN, DEFRAG, 2, "lr_in_defrag") \
134 PIPELINE_STAGE(ROUTER, IN, UNSNAT, 3, "lr_in_unsnat") \
135 PIPELINE_STAGE(ROUTER, IN, DNAT, 4, "lr_in_dnat") \
136 PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 5, "lr_in_ip_routing") \
137 PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 6, "lr_in_arp_resolve") \
138 PIPELINE_STAGE(ROUTER, IN, GW_REDIRECT, 7, "lr_in_gw_redirect") \
139 PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 8, "lr_in_arp_request") \
140 \
141 /* Logical router egress stages. */ \
142 PIPELINE_STAGE(ROUTER, OUT, UNDNAT, 0, "lr_out_undnat") \
143 PIPELINE_STAGE(ROUTER, OUT, SNAT, 1, "lr_out_snat") \
144 PIPELINE_STAGE(ROUTER, OUT, EGR_LOOP, 2, "lr_out_egr_loop") \
145 PIPELINE_STAGE(ROUTER, OUT, DELIVERY, 3, "lr_out_delivery")
146
147 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
148 S_##DP_TYPE##_##PIPELINE##_##STAGE \
149 = OVN_STAGE_BUILD(DP_##DP_TYPE, P_##PIPELINE, TABLE),
150 PIPELINE_STAGES
151 #undef PIPELINE_STAGE
152 };
153
154 /* Due to various hard-coded priorities need to implement ACLs, the
155 * northbound database supports a smaller range of ACL priorities than
156 * are available to logical flows. This value is added to an ACL
157 * priority to determine the ACL's logical flow priority. */
158 #define OVN_ACL_PRI_OFFSET 1000
159
160 /* Register definitions specific to switches. */
161 #define REGBIT_CONNTRACK_DEFRAG "reg0[0]"
162 #define REGBIT_CONNTRACK_COMMIT "reg0[1]"
163 #define REGBIT_CONNTRACK_NAT "reg0[2]"
164 #define REGBIT_DHCP_OPTS_RESULT "reg0[3]"
165 #define REGBIT_DNS_LOOKUP_RESULT "reg0[4]"
166
167 /* Register definitions for switches and routers. */
168 #define REGBIT_NAT_REDIRECT "reg9[0]"
169 /* Indicate that this packet has been recirculated using egress
170 * loopback. This allows certain checks to be bypassed, such as a
171 * logical router dropping packets with source IP address equals
172 * one of the logical router's own IP addresses. */
173 #define REGBIT_EGRESS_LOOPBACK "reg9[1]"
174
175 /* Returns an "enum ovn_stage" built from the arguments. */
176 static enum ovn_stage
177 ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline,
178 uint8_t table)
179 {
180 return OVN_STAGE_BUILD(dp_type, pipeline, table);
181 }
182
183 /* Returns the pipeline to which 'stage' belongs. */
184 static enum ovn_pipeline
185 ovn_stage_get_pipeline(enum ovn_stage stage)
186 {
187 return (stage >> 8) & 1;
188 }
189
190 /* Returns the table to which 'stage' belongs. */
191 static uint8_t
192 ovn_stage_get_table(enum ovn_stage stage)
193 {
194 return stage & 0xff;
195 }
196
197 /* Returns a string name for 'stage'. */
198 static const char *
199 ovn_stage_to_str(enum ovn_stage stage)
200 {
201 switch (stage) {
202 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
203 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return NAME;
204 PIPELINE_STAGES
205 #undef PIPELINE_STAGE
206 default: return "<unknown>";
207 }
208 }
209
210 /* Returns the type of the datapath to which a flow with the given 'stage' may
211 * be added. */
212 static enum ovn_datapath_type
213 ovn_stage_to_datapath_type(enum ovn_stage stage)
214 {
215 switch (stage) {
216 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
217 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return DP_##DP_TYPE;
218 PIPELINE_STAGES
219 #undef PIPELINE_STAGE
220 default: OVS_NOT_REACHED();
221 }
222 }
223 \f
224 static void
225 usage(void)
226 {
227 printf("\
228 %s: OVN northbound management daemon\n\
229 usage: %s [OPTIONS]\n\
230 \n\
231 Options:\n\
232 --ovnnb-db=DATABASE connect to ovn-nb database at DATABASE\n\
233 (default: %s)\n\
234 --ovnsb-db=DATABASE connect to ovn-sb database at DATABASE\n\
235 (default: %s)\n\
236 -h, --help display this help message\n\
237 -o, --options list available options\n\
238 -V, --version display version information\n\
239 ", program_name, program_name, default_nb_db(), default_sb_db());
240 daemon_usage();
241 vlog_usage();
242 stream_usage("database", true, true, false);
243 }
244 \f
245 struct tnlid_node {
246 struct hmap_node hmap_node;
247 uint32_t tnlid;
248 };
249
250 static void
251 destroy_tnlids(struct hmap *tnlids)
252 {
253 struct tnlid_node *node;
254 HMAP_FOR_EACH_POP (node, hmap_node, tnlids) {
255 free(node);
256 }
257 hmap_destroy(tnlids);
258 }
259
260 static void
261 add_tnlid(struct hmap *set, uint32_t tnlid)
262 {
263 struct tnlid_node *node = xmalloc(sizeof *node);
264 hmap_insert(set, &node->hmap_node, hash_int(tnlid, 0));
265 node->tnlid = tnlid;
266 }
267
268 static bool
269 tnlid_in_use(const struct hmap *set, uint32_t tnlid)
270 {
271 const struct tnlid_node *node;
272 HMAP_FOR_EACH_IN_BUCKET (node, hmap_node, hash_int(tnlid, 0), set) {
273 if (node->tnlid == tnlid) {
274 return true;
275 }
276 }
277 return false;
278 }
279
280 static uint32_t
281 allocate_tnlid(struct hmap *set, const char *name, uint32_t max,
282 uint32_t *hint)
283 {
284 for (uint32_t tnlid = *hint + 1; tnlid != *hint;
285 tnlid = tnlid + 1 <= max ? tnlid + 1 : 1) {
286 if (!tnlid_in_use(set, tnlid)) {
287 add_tnlid(set, tnlid);
288 *hint = tnlid;
289 return tnlid;
290 }
291 }
292
293 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
294 VLOG_WARN_RL(&rl, "all %s tunnel ids exhausted", name);
295 return 0;
296 }
297 \f
298 struct ovn_chassis_qdisc_queues {
299 struct hmap_node key_node;
300 uint32_t queue_id;
301 struct uuid chassis_uuid;
302 };
303
304 static void
305 destroy_chassis_queues(struct hmap *set)
306 {
307 struct ovn_chassis_qdisc_queues *node;
308 HMAP_FOR_EACH_POP (node, key_node, set) {
309 free(node);
310 }
311 hmap_destroy(set);
312 }
313
314 static void
315 add_chassis_queue(struct hmap *set, struct uuid *chassis_uuid,
316 uint32_t queue_id)
317 {
318 struct ovn_chassis_qdisc_queues *node = xmalloc(sizeof *node);
319 node->queue_id = queue_id;
320 memcpy(&node->chassis_uuid, chassis_uuid, sizeof node->chassis_uuid);
321 hmap_insert(set, &node->key_node, uuid_hash(chassis_uuid));
322 }
323
324 static bool
325 chassis_queueid_in_use(const struct hmap *set, struct uuid *chassis_uuid,
326 uint32_t queue_id)
327 {
328 const struct ovn_chassis_qdisc_queues *node;
329 HMAP_FOR_EACH_WITH_HASH (node, key_node, uuid_hash(chassis_uuid), set) {
330 if (uuid_equals(chassis_uuid, &node->chassis_uuid)
331 && node->queue_id == queue_id) {
332 return true;
333 }
334 }
335 return false;
336 }
337
338 static uint32_t
339 allocate_chassis_queueid(struct hmap *set, struct sbrec_chassis *chassis)
340 {
341 for (uint32_t queue_id = QDISC_MIN_QUEUE_ID + 1;
342 queue_id <= QDISC_MAX_QUEUE_ID;
343 queue_id++) {
344 if (!chassis_queueid_in_use(set, &chassis->header_.uuid, queue_id)) {
345 add_chassis_queue(set, &chassis->header_.uuid, queue_id);
346 return queue_id;
347 }
348 }
349
350 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
351 VLOG_WARN_RL(&rl, "all %s queue ids exhausted", chassis->name);
352 return 0;
353 }
354
355 static void
356 free_chassis_queueid(struct hmap *set, struct sbrec_chassis *chassis,
357 uint32_t queue_id)
358 {
359 struct ovn_chassis_qdisc_queues *node;
360 HMAP_FOR_EACH_WITH_HASH (node, key_node,
361 uuid_hash(&chassis->header_.uuid),
362 set) {
363 if (uuid_equals(&chassis->header_.uuid, &node->chassis_uuid)
364 && node->queue_id == queue_id) {
365 hmap_remove(set, &node->key_node);
366 break;
367 }
368 }
369 }
370
371 static inline bool
372 port_has_qos_params(const struct smap *opts)
373 {
374 return (smap_get(opts, "qos_max_rate") ||
375 smap_get(opts, "qos_burst"));
376 }
377 \f
378
379 struct ipam_info {
380 uint32_t start_ipv4;
381 size_t total_ipv4s;
382 unsigned long *allocated_ipv4s; /* A bitmap of allocated IPv4s */
383 bool ipv6_prefix_set;
384 struct in6_addr ipv6_prefix;
385 };
386
387 /* The 'key' comes from nbs->header_.uuid or nbr->header_.uuid or
388 * sb->external_ids:logical-switch. */
389 struct ovn_datapath {
390 struct hmap_node key_node; /* Index on 'key'. */
391 struct uuid key; /* (nbs/nbr)->header_.uuid. */
392
393 const struct nbrec_logical_switch *nbs; /* May be NULL. */
394 const struct nbrec_logical_router *nbr; /* May be NULL. */
395 const struct sbrec_datapath_binding *sb; /* May be NULL. */
396
397 struct ovs_list list; /* In list of similar records. */
398
399 /* Logical switch data. */
400 struct ovn_port **router_ports;
401 size_t n_router_ports;
402
403 struct hmap port_tnlids;
404 uint32_t port_key_hint;
405
406 bool has_unknown;
407
408 /* IPAM data. */
409 struct ipam_info *ipam_info;
410
411 /* OVN northd only needs to know about the logical router gateway port for
412 * NAT on a distributed router. This "distributed gateway port" is
413 * populated only when there is a "redirect-chassis" specified for one of
414 * the ports on the logical router. Otherwise this will be NULL. */
415 struct ovn_port *l3dgw_port;
416 /* The "derived" OVN port representing the instance of l3dgw_port on
417 * the "redirect-chassis". */
418 struct ovn_port *l3redirect_port;
419 };
420
421 struct macam_node {
422 struct hmap_node hmap_node;
423 struct eth_addr mac_addr; /* Allocated MAC address. */
424 };
425
426 static void
427 cleanup_macam(struct hmap *macam)
428 {
429 struct macam_node *node;
430 HMAP_FOR_EACH_POP (node, hmap_node, macam) {
431 free(node);
432 }
433 }
434
435 static struct ovn_datapath *
436 ovn_datapath_create(struct hmap *datapaths, const struct uuid *key,
437 const struct nbrec_logical_switch *nbs,
438 const struct nbrec_logical_router *nbr,
439 const struct sbrec_datapath_binding *sb)
440 {
441 struct ovn_datapath *od = xzalloc(sizeof *od);
442 od->key = *key;
443 od->sb = sb;
444 od->nbs = nbs;
445 od->nbr = nbr;
446 hmap_init(&od->port_tnlids);
447 od->port_key_hint = 0;
448 hmap_insert(datapaths, &od->key_node, uuid_hash(&od->key));
449 return od;
450 }
451
452 static void
453 ovn_datapath_destroy(struct hmap *datapaths, struct ovn_datapath *od)
454 {
455 if (od) {
456 /* Don't remove od->list. It is used within build_datapaths() as a
457 * private list and once we've exited that function it is not safe to
458 * use it. */
459 hmap_remove(datapaths, &od->key_node);
460 destroy_tnlids(&od->port_tnlids);
461 if (od->ipam_info) {
462 bitmap_free(od->ipam_info->allocated_ipv4s);
463 free(od->ipam_info);
464 }
465 free(od->router_ports);
466 free(od);
467 }
468 }
469
470 /* Returns 'od''s datapath type. */
471 static enum ovn_datapath_type
472 ovn_datapath_get_type(const struct ovn_datapath *od)
473 {
474 return od->nbs ? DP_SWITCH : DP_ROUTER;
475 }
476
477 static struct ovn_datapath *
478 ovn_datapath_find(struct hmap *datapaths, const struct uuid *uuid)
479 {
480 struct ovn_datapath *od;
481
482 HMAP_FOR_EACH_WITH_HASH (od, key_node, uuid_hash(uuid), datapaths) {
483 if (uuid_equals(uuid, &od->key)) {
484 return od;
485 }
486 }
487 return NULL;
488 }
489
490 static struct ovn_datapath *
491 ovn_datapath_from_sbrec(struct hmap *datapaths,
492 const struct sbrec_datapath_binding *sb)
493 {
494 struct uuid key;
495
496 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
497 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
498 return NULL;
499 }
500 return ovn_datapath_find(datapaths, &key);
501 }
502
503 static bool
504 lrouter_is_enabled(const struct nbrec_logical_router *lrouter)
505 {
506 return !lrouter->enabled || *lrouter->enabled;
507 }
508
509 static void
510 init_ipam_info_for_datapath(struct ovn_datapath *od)
511 {
512 if (!od->nbs) {
513 return;
514 }
515
516 const char *subnet_str = smap_get(&od->nbs->other_config, "subnet");
517 const char *ipv6_prefix = smap_get(&od->nbs->other_config, "ipv6_prefix");
518
519 if (ipv6_prefix) {
520 od->ipam_info = xzalloc(sizeof *od->ipam_info);
521 od->ipam_info->ipv6_prefix_set = ipv6_parse(
522 ipv6_prefix, &od->ipam_info->ipv6_prefix);
523 }
524
525 if (!subnet_str) {
526 return;
527 }
528
529 ovs_be32 subnet, mask;
530 char *error = ip_parse_masked(subnet_str, &subnet, &mask);
531 if (error || mask == OVS_BE32_MAX || !ip_is_cidr(mask)) {
532 static struct vlog_rate_limit rl
533 = VLOG_RATE_LIMIT_INIT(5, 1);
534 VLOG_WARN_RL(&rl, "bad 'subnet' %s", subnet_str);
535 free(error);
536 return;
537 }
538
539 if (!od->ipam_info) {
540 od->ipam_info = xzalloc(sizeof *od->ipam_info);
541 }
542 od->ipam_info->start_ipv4 = ntohl(subnet) + 1;
543 od->ipam_info->total_ipv4s = ~ntohl(mask);
544 od->ipam_info->allocated_ipv4s =
545 bitmap_allocate(od->ipam_info->total_ipv4s);
546
547 /* Mark first IP as taken */
548 bitmap_set1(od->ipam_info->allocated_ipv4s, 0);
549
550 /* Check if there are any reserver IPs (list) to be excluded from IPAM */
551 const char *exclude_ip_list = smap_get(&od->nbs->other_config,
552 "exclude_ips");
553 if (!exclude_ip_list) {
554 return;
555 }
556
557 struct lexer lexer;
558 lexer_init(&lexer, exclude_ip_list);
559 /* exclude_ip_list could be in the format -
560 * "10.0.0.4 10.0.0.10 10.0.0.20..10.0.0.50 10.0.0.100..10.0.0.110".
561 */
562 lexer_get(&lexer);
563 while (lexer.token.type != LEX_T_END) {
564 if (lexer.token.type != LEX_T_INTEGER) {
565 lexer_syntax_error(&lexer, "expecting address");
566 break;
567 }
568 uint32_t start = ntohl(lexer.token.value.ipv4);
569 lexer_get(&lexer);
570
571 uint32_t end = start + 1;
572 if (lexer_match(&lexer, LEX_T_ELLIPSIS)) {
573 if (lexer.token.type != LEX_T_INTEGER) {
574 lexer_syntax_error(&lexer, "expecting address range");
575 break;
576 }
577 end = ntohl(lexer.token.value.ipv4) + 1;
578 lexer_get(&lexer);
579 }
580
581 /* Clamp start...end to fit the subnet. */
582 start = MAX(od->ipam_info->start_ipv4, start);
583 end = MIN(od->ipam_info->start_ipv4 + od->ipam_info->total_ipv4s, end);
584 if (end > start) {
585 bitmap_set_multiple(od->ipam_info->allocated_ipv4s,
586 start - od->ipam_info->start_ipv4,
587 end - start, 1);
588 } else {
589 lexer_error(&lexer, "excluded addresses not in subnet");
590 }
591 }
592 if (lexer.error) {
593 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
594 VLOG_WARN_RL(&rl, "logical switch "UUID_FMT": bad exclude_ips (%s)",
595 UUID_ARGS(&od->key), lexer.error);
596 }
597 lexer_destroy(&lexer);
598 }
599
600 static void
601 ovn_datapath_update_external_ids(struct ovn_datapath *od)
602 {
603 /* Get the logical-switch or logical-router UUID to set in
604 * external-ids. */
605 char uuid_s[UUID_LEN + 1];
606 sprintf(uuid_s, UUID_FMT, UUID_ARGS(&od->key));
607 const char *key = od->nbs ? "logical-switch" : "logical-router";
608
609 /* Get names to set in external-ids. */
610 const char *name = od->nbs ? od->nbs->name : od->nbr->name;
611 const char *name2 = (od->nbs
612 ? smap_get(&od->nbs->external_ids,
613 "neutron:network_name")
614 : smap_get(&od->nbr->external_ids,
615 "neutron:router_name"));
616
617 /* Set external-ids. */
618 struct smap ids = SMAP_INITIALIZER(&ids);
619 smap_add(&ids, key, uuid_s);
620 smap_add(&ids, "name", name);
621 if (name2 && name2[0]) {
622 smap_add(&ids, "name2", name2);
623 }
624 sbrec_datapath_binding_set_external_ids(od->sb, &ids);
625 smap_destroy(&ids);
626 }
627
628 static void
629 join_datapaths(struct northd_context *ctx, struct hmap *datapaths,
630 struct ovs_list *sb_only, struct ovs_list *nb_only,
631 struct ovs_list *both)
632 {
633 hmap_init(datapaths);
634 ovs_list_init(sb_only);
635 ovs_list_init(nb_only);
636 ovs_list_init(both);
637
638 const struct sbrec_datapath_binding *sb, *sb_next;
639 SBREC_DATAPATH_BINDING_FOR_EACH_SAFE (sb, sb_next, ctx->ovnsb_idl) {
640 struct uuid key;
641 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
642 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
643 ovsdb_idl_txn_add_comment(
644 ctx->ovnsb_txn,
645 "deleting Datapath_Binding "UUID_FMT" that lacks "
646 "external-ids:logical-switch and "
647 "external-ids:logical-router",
648 UUID_ARGS(&sb->header_.uuid));
649 sbrec_datapath_binding_delete(sb);
650 continue;
651 }
652
653 if (ovn_datapath_find(datapaths, &key)) {
654 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
655 VLOG_INFO_RL(
656 &rl, "deleting Datapath_Binding "UUID_FMT" with "
657 "duplicate external-ids:logical-switch/router "UUID_FMT,
658 UUID_ARGS(&sb->header_.uuid), UUID_ARGS(&key));
659 sbrec_datapath_binding_delete(sb);
660 continue;
661 }
662
663 struct ovn_datapath *od = ovn_datapath_create(datapaths, &key,
664 NULL, NULL, sb);
665 ovs_list_push_back(sb_only, &od->list);
666 }
667
668 const struct nbrec_logical_switch *nbs;
669 NBREC_LOGICAL_SWITCH_FOR_EACH (nbs, ctx->ovnnb_idl) {
670 struct ovn_datapath *od = ovn_datapath_find(datapaths,
671 &nbs->header_.uuid);
672 if (od) {
673 od->nbs = nbs;
674 ovs_list_remove(&od->list);
675 ovs_list_push_back(both, &od->list);
676 ovn_datapath_update_external_ids(od);
677 } else {
678 od = ovn_datapath_create(datapaths, &nbs->header_.uuid,
679 nbs, NULL, NULL);
680 ovs_list_push_back(nb_only, &od->list);
681 }
682
683 init_ipam_info_for_datapath(od);
684 }
685
686 const struct nbrec_logical_router *nbr;
687 NBREC_LOGICAL_ROUTER_FOR_EACH (nbr, ctx->ovnnb_idl) {
688 if (!lrouter_is_enabled(nbr)) {
689 continue;
690 }
691
692 struct ovn_datapath *od = ovn_datapath_find(datapaths,
693 &nbr->header_.uuid);
694 if (od) {
695 if (!od->nbs) {
696 od->nbr = nbr;
697 ovs_list_remove(&od->list);
698 ovs_list_push_back(both, &od->list);
699 ovn_datapath_update_external_ids(od);
700 } else {
701 /* Can't happen! */
702 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
703 VLOG_WARN_RL(&rl,
704 "duplicate UUID "UUID_FMT" in OVN_Northbound",
705 UUID_ARGS(&nbr->header_.uuid));
706 continue;
707 }
708 } else {
709 od = ovn_datapath_create(datapaths, &nbr->header_.uuid,
710 NULL, nbr, NULL);
711 ovs_list_push_back(nb_only, &od->list);
712 }
713 }
714 }
715
716 static uint32_t
717 ovn_datapath_allocate_key(struct hmap *dp_tnlids)
718 {
719 static uint32_t hint;
720 return allocate_tnlid(dp_tnlids, "datapath", (1u << 24) - 1, &hint);
721 }
722
723 /* Updates the southbound Datapath_Binding table so that it contains the
724 * logical switches and routers specified by the northbound database.
725 *
726 * Initializes 'datapaths' to contain a "struct ovn_datapath" for every logical
727 * switch and router. */
728 static void
729 build_datapaths(struct northd_context *ctx, struct hmap *datapaths)
730 {
731 struct ovs_list sb_only, nb_only, both;
732
733 join_datapaths(ctx, datapaths, &sb_only, &nb_only, &both);
734
735 if (!ovs_list_is_empty(&nb_only)) {
736 /* First index the in-use datapath tunnel IDs. */
737 struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
738 struct ovn_datapath *od;
739 LIST_FOR_EACH (od, list, &both) {
740 add_tnlid(&dp_tnlids, od->sb->tunnel_key);
741 }
742
743 /* Add southbound record for each unmatched northbound record. */
744 LIST_FOR_EACH (od, list, &nb_only) {
745 uint16_t tunnel_key = ovn_datapath_allocate_key(&dp_tnlids);
746 if (!tunnel_key) {
747 break;
748 }
749
750 od->sb = sbrec_datapath_binding_insert(ctx->ovnsb_txn);
751 ovn_datapath_update_external_ids(od);
752 sbrec_datapath_binding_set_tunnel_key(od->sb, tunnel_key);
753 }
754 destroy_tnlids(&dp_tnlids);
755 }
756
757 /* Delete southbound records without northbound matches. */
758 struct ovn_datapath *od, *next;
759 LIST_FOR_EACH_SAFE (od, next, list, &sb_only) {
760 ovs_list_remove(&od->list);
761 sbrec_datapath_binding_delete(od->sb);
762 ovn_datapath_destroy(datapaths, od);
763 }
764 }
765 \f
766 struct ovn_port {
767 struct hmap_node key_node; /* Index on 'key'. */
768 char *key; /* nbs->name, nbr->name, sb->logical_port. */
769 char *json_key; /* 'key', quoted for use in JSON. */
770
771 const struct sbrec_port_binding *sb; /* May be NULL. */
772
773 /* Logical switch port data. */
774 const struct nbrec_logical_switch_port *nbsp; /* May be NULL. */
775
776 struct lport_addresses *lsp_addrs; /* Logical switch port addresses. */
777 unsigned int n_lsp_addrs;
778
779 struct lport_addresses *ps_addrs; /* Port security addresses. */
780 unsigned int n_ps_addrs;
781
782 /* Logical router port data. */
783 const struct nbrec_logical_router_port *nbrp; /* May be NULL. */
784
785 struct lport_addresses lrp_networks;
786
787 bool derived; /* Indicates whether this is an additional port
788 * derived from nbsp or nbrp. */
789
790 /* The port's peer:
791 *
792 * - A switch port S of type "router" has a router port R as a peer,
793 * and R in turn has S has its peer.
794 *
795 * - Two connected logical router ports have each other as peer. */
796 struct ovn_port *peer;
797
798 struct ovn_datapath *od;
799
800 struct ovs_list list; /* In list of similar records. */
801 };
802
803 static struct ovn_port *
804 ovn_port_create(struct hmap *ports, const char *key,
805 const struct nbrec_logical_switch_port *nbsp,
806 const struct nbrec_logical_router_port *nbrp,
807 const struct sbrec_port_binding *sb)
808 {
809 struct ovn_port *op = xzalloc(sizeof *op);
810
811 struct ds json_key = DS_EMPTY_INITIALIZER;
812 json_string_escape(key, &json_key);
813 op->json_key = ds_steal_cstr(&json_key);
814
815 op->key = xstrdup(key);
816 op->sb = sb;
817 op->nbsp = nbsp;
818 op->nbrp = nbrp;
819 op->derived = false;
820 hmap_insert(ports, &op->key_node, hash_string(op->key, 0));
821 return op;
822 }
823
824 static void
825 ovn_port_destroy(struct hmap *ports, struct ovn_port *port)
826 {
827 if (port) {
828 /* Don't remove port->list. It is used within build_ports() as a
829 * private list and once we've exited that function it is not safe to
830 * use it. */
831 hmap_remove(ports, &port->key_node);
832
833 for (int i = 0; i < port->n_lsp_addrs; i++) {
834 destroy_lport_addresses(&port->lsp_addrs[i]);
835 }
836 free(port->lsp_addrs);
837
838 for (int i = 0; i < port->n_ps_addrs; i++) {
839 destroy_lport_addresses(&port->ps_addrs[i]);
840 }
841 free(port->ps_addrs);
842
843 destroy_lport_addresses(&port->lrp_networks);
844 free(port->json_key);
845 free(port->key);
846 free(port);
847 }
848 }
849
850 static struct ovn_port *
851 ovn_port_find(struct hmap *ports, const char *name)
852 {
853 struct ovn_port *op;
854
855 HMAP_FOR_EACH_WITH_HASH (op, key_node, hash_string(name, 0), ports) {
856 if (!strcmp(op->key, name)) {
857 return op;
858 }
859 }
860 return NULL;
861 }
862
863 static uint32_t
864 ovn_port_allocate_key(struct ovn_datapath *od)
865 {
866 return allocate_tnlid(&od->port_tnlids, "port",
867 (1u << 15) - 1, &od->port_key_hint);
868 }
869
870 static char *
871 chassis_redirect_name(const char *port_name)
872 {
873 return xasprintf("cr-%s", port_name);
874 }
875
876 static bool
877 ipam_is_duplicate_mac(struct eth_addr *ea, uint64_t mac64, bool warn)
878 {
879 struct macam_node *macam_node;
880 HMAP_FOR_EACH_WITH_HASH (macam_node, hmap_node, hash_uint64(mac64),
881 &macam) {
882 if (eth_addr_equals(*ea, macam_node->mac_addr)) {
883 if (warn) {
884 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
885 VLOG_WARN_RL(&rl, "Duplicate MAC set: "ETH_ADDR_FMT,
886 ETH_ADDR_ARGS(macam_node->mac_addr));
887 }
888 return true;
889 }
890 }
891 return false;
892 }
893
894 static void
895 ipam_insert_mac(struct eth_addr *ea, bool check)
896 {
897 if (!ea) {
898 return;
899 }
900
901 uint64_t mac64 = eth_addr_to_uint64(*ea);
902 /* If the new MAC was not assigned by this address management system or
903 * check is true and the new MAC is a duplicate, do not insert it into the
904 * macam hmap. */
905 if (((mac64 ^ MAC_ADDR_PREFIX) >> 24)
906 || (check && ipam_is_duplicate_mac(ea, mac64, true))) {
907 return;
908 }
909
910 struct macam_node *new_macam_node = xmalloc(sizeof *new_macam_node);
911 new_macam_node->mac_addr = *ea;
912 hmap_insert(&macam, &new_macam_node->hmap_node, hash_uint64(mac64));
913 }
914
915 static void
916 ipam_insert_ip(struct ovn_datapath *od, uint32_t ip)
917 {
918 if (!od || !od->ipam_info || !od->ipam_info->allocated_ipv4s) {
919 return;
920 }
921
922 if (ip >= od->ipam_info->start_ipv4 &&
923 ip < (od->ipam_info->start_ipv4 + od->ipam_info->total_ipv4s)) {
924 bitmap_set1(od->ipam_info->allocated_ipv4s,
925 ip - od->ipam_info->start_ipv4);
926 }
927 }
928
929 static void
930 ipam_insert_lsp_addresses(struct ovn_datapath *od, struct ovn_port *op,
931 char *address)
932 {
933 if (!od || !op || !address || !strcmp(address, "unknown")
934 || !strcmp(address, "router") || is_dynamic_lsp_address(address)) {
935 return;
936 }
937
938 struct lport_addresses laddrs;
939 if (!extract_lsp_addresses(address, &laddrs)) {
940 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
941 VLOG_WARN_RL(&rl, "Extract addresses failed.");
942 return;
943 }
944 ipam_insert_mac(&laddrs.ea, true);
945
946 /* IP is only added to IPAM if the switch's subnet option
947 * is set, whereas MAC is always added to MACAM. */
948 if (!od->ipam_info || !od->ipam_info->allocated_ipv4s) {
949 destroy_lport_addresses(&laddrs);
950 return;
951 }
952
953 for (size_t j = 0; j < laddrs.n_ipv4_addrs; j++) {
954 uint32_t ip = ntohl(laddrs.ipv4_addrs[j].addr);
955 ipam_insert_ip(od, ip);
956 }
957
958 destroy_lport_addresses(&laddrs);
959 }
960
961 static void
962 ipam_add_port_addresses(struct ovn_datapath *od, struct ovn_port *op)
963 {
964 if (!od || !op) {
965 return;
966 }
967
968 if (op->nbsp) {
969 /* Add all the port's addresses to address data structures. */
970 for (size_t i = 0; i < op->nbsp->n_addresses; i++) {
971 ipam_insert_lsp_addresses(od, op, op->nbsp->addresses[i]);
972 }
973 if (op->nbsp->dynamic_addresses) {
974 ipam_insert_lsp_addresses(od, op, op->nbsp->dynamic_addresses);
975 }
976 } else if (op->nbrp) {
977 struct lport_addresses lrp_networks;
978 if (!extract_lrp_networks(op->nbrp, &lrp_networks)) {
979 static struct vlog_rate_limit rl
980 = VLOG_RATE_LIMIT_INIT(1, 1);
981 VLOG_WARN_RL(&rl, "Extract addresses failed.");
982 return;
983 }
984 ipam_insert_mac(&lrp_networks.ea, true);
985
986 if (!op->peer || !op->peer->nbsp || !op->peer->od || !op->peer->od->nbs
987 || !smap_get(&op->peer->od->nbs->other_config, "subnet")) {
988 destroy_lport_addresses(&lrp_networks);
989 return;
990 }
991
992 for (size_t i = 0; i < lrp_networks.n_ipv4_addrs; i++) {
993 uint32_t ip = ntohl(lrp_networks.ipv4_addrs[i].addr);
994 ipam_insert_ip(op->peer->od, ip);
995 }
996
997 destroy_lport_addresses(&lrp_networks);
998 }
999 }
1000
1001 static uint64_t
1002 ipam_get_unused_mac(void)
1003 {
1004 /* Stores the suffix of the most recently ipam-allocated MAC address. */
1005 static uint32_t last_mac;
1006
1007 uint64_t mac64;
1008 struct eth_addr mac;
1009 uint32_t mac_addr_suffix, i;
1010 for (i = 0; i < MAC_ADDR_SPACE - 1; i++) {
1011 /* The tentative MAC's suffix will be in the interval (1, 0xfffffe). */
1012 mac_addr_suffix = ((last_mac + i) % (MAC_ADDR_SPACE - 1)) + 1;
1013 mac64 = MAC_ADDR_PREFIX | mac_addr_suffix;
1014 eth_addr_from_uint64(mac64, &mac);
1015 if (!ipam_is_duplicate_mac(&mac, mac64, false)) {
1016 last_mac = mac_addr_suffix;
1017 break;
1018 }
1019 }
1020
1021 if (i == MAC_ADDR_SPACE) {
1022 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1023 VLOG_WARN_RL(&rl, "MAC address space exhausted.");
1024 mac64 = 0;
1025 }
1026
1027 return mac64;
1028 }
1029
1030 static uint32_t
1031 ipam_get_unused_ip(struct ovn_datapath *od)
1032 {
1033 if (!od || !od->ipam_info || !od->ipam_info->allocated_ipv4s) {
1034 return 0;
1035 }
1036
1037 size_t new_ip_index = bitmap_scan(od->ipam_info->allocated_ipv4s, 0, 0,
1038 od->ipam_info->total_ipv4s - 1);
1039 if (new_ip_index == od->ipam_info->total_ipv4s - 1) {
1040 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1041 VLOG_WARN_RL( &rl, "Subnet address space has been exhausted.");
1042 return 0;
1043 }
1044
1045 return od->ipam_info->start_ipv4 + new_ip_index;
1046 }
1047
1048 static bool
1049 ipam_allocate_addresses(struct ovn_datapath *od, struct ovn_port *op,
1050 const char *addrspec)
1051 {
1052 if (!op->nbsp || !od->ipam_info) {
1053 return false;
1054 }
1055
1056 /* Get or generate MAC address. */
1057 struct eth_addr mac;
1058 bool dynamic_mac;
1059 int n = 0;
1060 if (ovs_scan(addrspec, ETH_ADDR_SCAN_FMT" dynamic%n",
1061 ETH_ADDR_SCAN_ARGS(mac), &n)
1062 && addrspec[n] == '\0') {
1063 dynamic_mac = false;
1064 } else {
1065 uint64_t mac64 = ipam_get_unused_mac();
1066 if (!mac64) {
1067 return false;
1068 }
1069 eth_addr_from_uint64(mac64, &mac);
1070 dynamic_mac = true;
1071 }
1072
1073 /* Generate IPv4 address, if desirable. */
1074 bool dynamic_ip4 = od->ipam_info->allocated_ipv4s != NULL;
1075 uint32_t ip4 = dynamic_ip4 ? ipam_get_unused_ip(od) : 0;
1076
1077 /* Generate IPv6 address, if desirable. */
1078 bool dynamic_ip6 = od->ipam_info->ipv6_prefix_set;
1079 struct in6_addr ip6;
1080 if (dynamic_ip6) {
1081 in6_generate_eui64(mac, &od->ipam_info->ipv6_prefix, &ip6);
1082 }
1083
1084 /* If we didn't generate anything, bail out. */
1085 if (!dynamic_ip4 && !dynamic_ip6) {
1086 return false;
1087 }
1088
1089 /* Save the dynamic addresses. */
1090 struct ds new_addr = DS_EMPTY_INITIALIZER;
1091 ds_put_format(&new_addr, ETH_ADDR_FMT, ETH_ADDR_ARGS(mac));
1092 if (dynamic_ip4 && ip4) {
1093 ipam_insert_ip(od, ip4);
1094 ds_put_format(&new_addr, " "IP_FMT, IP_ARGS(htonl(ip4)));
1095 }
1096 if (dynamic_ip6) {
1097 char ip6_s[INET6_ADDRSTRLEN + 1];
1098 ipv6_string_mapped(ip6_s, &ip6);
1099 ds_put_format(&new_addr, " %s", ip6_s);
1100 }
1101 ipam_insert_mac(&mac, !dynamic_mac);
1102 nbrec_logical_switch_port_set_dynamic_addresses(op->nbsp,
1103 ds_cstr(&new_addr));
1104 ds_destroy(&new_addr);
1105 return true;
1106 }
1107
1108 static void
1109 build_ipam(struct hmap *datapaths, struct hmap *ports)
1110 {
1111 /* IPAM generally stands for IP address management. In non-virtualized
1112 * world, MAC addresses come with the hardware. But, with virtualized
1113 * workloads, they need to be assigned and managed. This function
1114 * does both IP address management (ipam) and MAC address management
1115 * (macam). */
1116
1117 /* If the switch's other_config:subnet is set, allocate new addresses for
1118 * ports that have the "dynamic" keyword in their addresses column. */
1119 struct ovn_datapath *od;
1120 HMAP_FOR_EACH (od, key_node, datapaths) {
1121 if (!od->nbs || !od->ipam_info) {
1122 continue;
1123 }
1124
1125 struct ovn_port *op;
1126 for (size_t i = 0; i < od->nbs->n_ports; i++) {
1127 const struct nbrec_logical_switch_port *nbsp =
1128 od->nbs->ports[i];
1129
1130 if (!nbsp) {
1131 continue;
1132 }
1133
1134 op = ovn_port_find(ports, nbsp->name);
1135 if (!op || (op->nbsp && op->peer)) {
1136 /* Do not allocate addresses for logical switch ports that
1137 * have a peer. */
1138 continue;
1139 }
1140
1141 for (size_t j = 0; j < nbsp->n_addresses; j++) {
1142 if (is_dynamic_lsp_address(nbsp->addresses[j])
1143 && !nbsp->dynamic_addresses) {
1144 if (!ipam_allocate_addresses(od, op, nbsp->addresses[j])
1145 || !extract_lsp_addresses(nbsp->dynamic_addresses,
1146 &op->lsp_addrs[op->n_lsp_addrs])) {
1147 static struct vlog_rate_limit rl
1148 = VLOG_RATE_LIMIT_INIT(1, 1);
1149 VLOG_INFO_RL(&rl, "Failed to allocate address.");
1150 } else {
1151 op->n_lsp_addrs++;
1152 }
1153 break;
1154 }
1155 }
1156
1157 if (!nbsp->n_addresses && nbsp->dynamic_addresses) {
1158 nbrec_logical_switch_port_set_dynamic_addresses(op->nbsp,
1159 NULL);
1160 }
1161 }
1162 }
1163 }
1164 \f
1165 /* Tag allocation for nested containers.
1166 *
1167 * For a logical switch port with 'parent_name' and a request to allocate tags,
1168 * keeps a track of all allocated tags. */
1169 struct tag_alloc_node {
1170 struct hmap_node hmap_node;
1171 char *parent_name;
1172 unsigned long *allocated_tags; /* A bitmap to track allocated tags. */
1173 };
1174
1175 static void
1176 tag_alloc_destroy(struct hmap *tag_alloc_table)
1177 {
1178 struct tag_alloc_node *node;
1179 HMAP_FOR_EACH_POP (node, hmap_node, tag_alloc_table) {
1180 bitmap_free(node->allocated_tags);
1181 free(node->parent_name);
1182 free(node);
1183 }
1184 hmap_destroy(tag_alloc_table);
1185 }
1186
1187 static struct tag_alloc_node *
1188 tag_alloc_get_node(struct hmap *tag_alloc_table, const char *parent_name)
1189 {
1190 /* If a node for the 'parent_name' exists, return it. */
1191 struct tag_alloc_node *tag_alloc_node;
1192 HMAP_FOR_EACH_WITH_HASH (tag_alloc_node, hmap_node,
1193 hash_string(parent_name, 0),
1194 tag_alloc_table) {
1195 if (!strcmp(tag_alloc_node->parent_name, parent_name)) {
1196 return tag_alloc_node;
1197 }
1198 }
1199
1200 /* Create a new node. */
1201 tag_alloc_node = xmalloc(sizeof *tag_alloc_node);
1202 tag_alloc_node->parent_name = xstrdup(parent_name);
1203 tag_alloc_node->allocated_tags = bitmap_allocate(MAX_OVN_TAGS);
1204 /* Tag 0 is invalid for nested containers. */
1205 bitmap_set1(tag_alloc_node->allocated_tags, 0);
1206 hmap_insert(tag_alloc_table, &tag_alloc_node->hmap_node,
1207 hash_string(parent_name, 0));
1208
1209 return tag_alloc_node;
1210 }
1211
1212 static void
1213 tag_alloc_add_existing_tags(struct hmap *tag_alloc_table,
1214 const struct nbrec_logical_switch_port *nbsp)
1215 {
1216 /* Add the tags of already existing nested containers. If there is no
1217 * 'nbsp->parent_name' or no 'nbsp->tag' set, there is nothing to do. */
1218 if (!nbsp->parent_name || !nbsp->parent_name[0] || !nbsp->tag) {
1219 return;
1220 }
1221
1222 struct tag_alloc_node *tag_alloc_node;
1223 tag_alloc_node = tag_alloc_get_node(tag_alloc_table, nbsp->parent_name);
1224 bitmap_set1(tag_alloc_node->allocated_tags, *nbsp->tag);
1225 }
1226
1227 static void
1228 tag_alloc_create_new_tag(struct hmap *tag_alloc_table,
1229 const struct nbrec_logical_switch_port *nbsp)
1230 {
1231 if (!nbsp->tag_request) {
1232 return;
1233 }
1234
1235 if (nbsp->parent_name && nbsp->parent_name[0]
1236 && *nbsp->tag_request == 0) {
1237 /* For nested containers that need allocation, do the allocation. */
1238
1239 if (nbsp->tag) {
1240 /* This has already been allocated. */
1241 return;
1242 }
1243
1244 struct tag_alloc_node *tag_alloc_node;
1245 int64_t tag;
1246 tag_alloc_node = tag_alloc_get_node(tag_alloc_table,
1247 nbsp->parent_name);
1248 tag = bitmap_scan(tag_alloc_node->allocated_tags, 0, 1, MAX_OVN_TAGS);
1249 if (tag == MAX_OVN_TAGS) {
1250 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1251 VLOG_ERR_RL(&rl, "out of vlans for logical switch ports with "
1252 "parent %s", nbsp->parent_name);
1253 return;
1254 }
1255 bitmap_set1(tag_alloc_node->allocated_tags, tag);
1256 nbrec_logical_switch_port_set_tag(nbsp, &tag, 1);
1257 } else if (*nbsp->tag_request != 0) {
1258 /* For everything else, copy the contents of 'tag_request' to 'tag'. */
1259 nbrec_logical_switch_port_set_tag(nbsp, nbsp->tag_request, 1);
1260 }
1261 }
1262 \f
1263
1264 /*
1265 * This function checks if the MAC in "address" parameter (if present) is
1266 * different from the one stored in Logical_Switch_Port.dynamic_addresses
1267 * and updates it.
1268 */
1269 static void
1270 check_and_update_mac_in_dynamic_addresses(
1271 const char *address,
1272 const struct nbrec_logical_switch_port *nbsp)
1273 {
1274 if (!nbsp->dynamic_addresses) {
1275 return;
1276 }
1277 int buf_index = 0;
1278 struct eth_addr ea;
1279 if (!ovs_scan_len(address, &buf_index,
1280 ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(ea))) {
1281 return;
1282 }
1283
1284 struct eth_addr present_ea;
1285 buf_index = 0;
1286 if (ovs_scan_len(nbsp->dynamic_addresses, &buf_index,
1287 ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(present_ea))
1288 && !eth_addr_equals(ea, present_ea)) {
1289 /* MAC address has changed. Update it */
1290 char *new_addr = xasprintf(
1291 ETH_ADDR_FMT"%s", ETH_ADDR_ARGS(ea),
1292 &nbsp->dynamic_addresses[buf_index]);
1293 nbrec_logical_switch_port_set_dynamic_addresses(
1294 nbsp, new_addr);
1295 free(new_addr);
1296 }
1297 }
1298
1299 static void
1300 join_logical_ports(struct northd_context *ctx,
1301 struct hmap *datapaths, struct hmap *ports,
1302 struct hmap *chassis_qdisc_queues,
1303 struct hmap *tag_alloc_table, struct ovs_list *sb_only,
1304 struct ovs_list *nb_only, struct ovs_list *both)
1305 {
1306 hmap_init(ports);
1307 ovs_list_init(sb_only);
1308 ovs_list_init(nb_only);
1309 ovs_list_init(both);
1310
1311 const struct sbrec_port_binding *sb;
1312 SBREC_PORT_BINDING_FOR_EACH (sb, ctx->ovnsb_idl) {
1313 struct ovn_port *op = ovn_port_create(ports, sb->logical_port,
1314 NULL, NULL, sb);
1315 ovs_list_push_back(sb_only, &op->list);
1316 }
1317
1318 struct ovn_datapath *od;
1319 HMAP_FOR_EACH (od, key_node, datapaths) {
1320 if (od->nbs) {
1321 for (size_t i = 0; i < od->nbs->n_ports; i++) {
1322 const struct nbrec_logical_switch_port *nbsp
1323 = od->nbs->ports[i];
1324 struct ovn_port *op = ovn_port_find(ports, nbsp->name);
1325 if (op) {
1326 if (op->nbsp || op->nbrp) {
1327 static struct vlog_rate_limit rl
1328 = VLOG_RATE_LIMIT_INIT(5, 1);
1329 VLOG_WARN_RL(&rl, "duplicate logical port %s",
1330 nbsp->name);
1331 continue;
1332 }
1333 op->nbsp = nbsp;
1334 ovs_list_remove(&op->list);
1335
1336 uint32_t queue_id = smap_get_int(&op->sb->options,
1337 "qdisc_queue_id", 0);
1338 if (queue_id && op->sb->chassis) {
1339 add_chassis_queue(
1340 chassis_qdisc_queues, &op->sb->chassis->header_.uuid,
1341 queue_id);
1342 }
1343
1344 ovs_list_push_back(both, &op->list);
1345
1346 /* This port exists due to a SB binding, but should
1347 * not have been initialized fully. */
1348 ovs_assert(!op->n_lsp_addrs && !op->n_ps_addrs);
1349 } else {
1350 op = ovn_port_create(ports, nbsp->name, nbsp, NULL, NULL);
1351 ovs_list_push_back(nb_only, &op->list);
1352 }
1353
1354 op->lsp_addrs
1355 = xmalloc(sizeof *op->lsp_addrs * nbsp->n_addresses);
1356 for (size_t j = 0; j < nbsp->n_addresses; j++) {
1357 if (!strcmp(nbsp->addresses[j], "unknown")
1358 || !strcmp(nbsp->addresses[j], "router")) {
1359 continue;
1360 }
1361 if (is_dynamic_lsp_address(nbsp->addresses[j])) {
1362 if (nbsp->dynamic_addresses) {
1363 check_and_update_mac_in_dynamic_addresses(
1364 nbsp->addresses[j], nbsp);
1365 if (!extract_lsp_addresses(nbsp->dynamic_addresses,
1366 &op->lsp_addrs[op->n_lsp_addrs])) {
1367 static struct vlog_rate_limit rl
1368 = VLOG_RATE_LIMIT_INIT(1, 1);
1369 VLOG_INFO_RL(&rl, "invalid syntax '%s' in "
1370 "logical switch port "
1371 "dynamic_addresses. No "
1372 "MAC address found",
1373 op->nbsp->dynamic_addresses);
1374 continue;
1375 }
1376 } else {
1377 continue;
1378 }
1379 } else if (!extract_lsp_addresses(nbsp->addresses[j],
1380 &op->lsp_addrs[op->n_lsp_addrs])) {
1381 static struct vlog_rate_limit rl
1382 = VLOG_RATE_LIMIT_INIT(1, 1);
1383 VLOG_INFO_RL(&rl, "invalid syntax '%s' in logical "
1384 "switch port addresses. No MAC "
1385 "address found",
1386 op->nbsp->addresses[j]);
1387 continue;
1388 }
1389 op->n_lsp_addrs++;
1390 }
1391
1392 op->ps_addrs
1393 = xmalloc(sizeof *op->ps_addrs * nbsp->n_port_security);
1394 for (size_t j = 0; j < nbsp->n_port_security; j++) {
1395 if (!extract_lsp_addresses(nbsp->port_security[j],
1396 &op->ps_addrs[op->n_ps_addrs])) {
1397 static struct vlog_rate_limit rl
1398 = VLOG_RATE_LIMIT_INIT(1, 1);
1399 VLOG_INFO_RL(&rl, "invalid syntax '%s' in port "
1400 "security. No MAC address found",
1401 op->nbsp->port_security[j]);
1402 continue;
1403 }
1404 op->n_ps_addrs++;
1405 }
1406
1407 op->od = od;
1408 ipam_add_port_addresses(od, op);
1409 tag_alloc_add_existing_tags(tag_alloc_table, nbsp);
1410 }
1411 } else {
1412 for (size_t i = 0; i < od->nbr->n_ports; i++) {
1413 const struct nbrec_logical_router_port *nbrp
1414 = od->nbr->ports[i];
1415
1416 struct lport_addresses lrp_networks;
1417 if (!extract_lrp_networks(nbrp, &lrp_networks)) {
1418 static struct vlog_rate_limit rl
1419 = VLOG_RATE_LIMIT_INIT(5, 1);
1420 VLOG_WARN_RL(&rl, "bad 'mac' %s", nbrp->mac);
1421 continue;
1422 }
1423
1424 if (!lrp_networks.n_ipv4_addrs && !lrp_networks.n_ipv6_addrs) {
1425 continue;
1426 }
1427
1428 struct ovn_port *op = ovn_port_find(ports, nbrp->name);
1429 if (op) {
1430 if (op->nbsp || op->nbrp) {
1431 static struct vlog_rate_limit rl
1432 = VLOG_RATE_LIMIT_INIT(5, 1);
1433 VLOG_WARN_RL(&rl, "duplicate logical router port %s",
1434 nbrp->name);
1435 continue;
1436 }
1437 op->nbrp = nbrp;
1438 ovs_list_remove(&op->list);
1439 ovs_list_push_back(both, &op->list);
1440
1441 /* This port exists but should not have been
1442 * initialized fully. */
1443 ovs_assert(!op->lrp_networks.n_ipv4_addrs
1444 && !op->lrp_networks.n_ipv6_addrs);
1445 } else {
1446 op = ovn_port_create(ports, nbrp->name, NULL, nbrp, NULL);
1447 ovs_list_push_back(nb_only, &op->list);
1448 }
1449
1450 op->lrp_networks = lrp_networks;
1451 op->od = od;
1452 ipam_add_port_addresses(op->od, op);
1453
1454 const char *redirect_chassis = smap_get(&op->nbrp->options,
1455 "redirect-chassis");
1456 if (redirect_chassis) {
1457 /* Additional "derived" ovn_port crp represents the
1458 * instance of op on the "redirect-chassis". */
1459 const char *gw_chassis = smap_get(&op->od->nbr->options,
1460 "chassis");
1461 if (gw_chassis) {
1462 static struct vlog_rate_limit rl
1463 = VLOG_RATE_LIMIT_INIT(1, 1);
1464 VLOG_WARN_RL(&rl, "Bad configuration: "
1465 "redirect-chassis configured on port %s "
1466 "on L3 gateway router", nbrp->name);
1467 continue;
1468 }
1469 if (od->l3dgw_port || od->l3redirect_port) {
1470 static struct vlog_rate_limit rl
1471 = VLOG_RATE_LIMIT_INIT(1, 1);
1472 VLOG_WARN_RL(&rl, "Bad configuration: multiple ports "
1473 "with redirect-chassis on same logical "
1474 "router %s", od->nbr->name);
1475 continue;
1476 }
1477
1478 char *redirect_name = chassis_redirect_name(nbrp->name);
1479 struct ovn_port *crp = ovn_port_find(ports, redirect_name);
1480 if (crp) {
1481 crp->derived = true;
1482 crp->nbrp = nbrp;
1483 ovs_list_remove(&crp->list);
1484 ovs_list_push_back(both, &crp->list);
1485 } else {
1486 crp = ovn_port_create(ports, redirect_name,
1487 NULL, nbrp, NULL);
1488 crp->derived = true;
1489 ovs_list_push_back(nb_only, &crp->list);
1490 }
1491 crp->od = od;
1492 free(redirect_name);
1493
1494 /* Set l3dgw_port and l3redirect_port in od, for later
1495 * use during flow creation. */
1496 od->l3dgw_port = op;
1497 od->l3redirect_port = crp;
1498 }
1499 }
1500 }
1501 }
1502
1503 /* Connect logical router ports, and logical switch ports of type "router",
1504 * to their peers. */
1505 struct ovn_port *op;
1506 HMAP_FOR_EACH (op, key_node, ports) {
1507 if (op->nbsp && !strcmp(op->nbsp->type, "router") && !op->derived) {
1508 const char *peer_name = smap_get(&op->nbsp->options, "router-port");
1509 if (!peer_name) {
1510 continue;
1511 }
1512
1513 struct ovn_port *peer = ovn_port_find(ports, peer_name);
1514 if (!peer || !peer->nbrp) {
1515 continue;
1516 }
1517
1518 peer->peer = op;
1519 op->peer = peer;
1520 op->od->router_ports = xrealloc(
1521 op->od->router_ports,
1522 sizeof *op->od->router_ports * (op->od->n_router_ports + 1));
1523 op->od->router_ports[op->od->n_router_ports++] = op;
1524
1525 /* Fill op->lsp_addrs for op->nbsp->addresses[] with
1526 * contents "router", which was skipped in the loop above. */
1527 for (size_t j = 0; j < op->nbsp->n_addresses; j++) {
1528 if (!strcmp(op->nbsp->addresses[j], "router")) {
1529 if (extract_lrp_networks(peer->nbrp,
1530 &op->lsp_addrs[op->n_lsp_addrs])) {
1531 op->n_lsp_addrs++;
1532 }
1533 break;
1534 }
1535 }
1536 } else if (op->nbrp && op->nbrp->peer && !op->derived) {
1537 struct ovn_port *peer = ovn_port_find(ports, op->nbrp->peer);
1538 if (peer) {
1539 if (peer->nbrp) {
1540 op->peer = peer;
1541 } else if (peer->nbsp) {
1542 /* An ovn_port for a switch port of type "router" does have
1543 * a router port as its peer (see the case above for
1544 * "router" ports), but this is set via options:router-port
1545 * in Logical_Switch_Port and does not involve the
1546 * Logical_Router_Port's 'peer' column. */
1547 static struct vlog_rate_limit rl =
1548 VLOG_RATE_LIMIT_INIT(5, 1);
1549 VLOG_WARN_RL(&rl, "Bad configuration: The peer of router "
1550 "port %s is a switch port", op->key);
1551 }
1552 }
1553 }
1554 }
1555 }
1556
1557 static void
1558 ip_address_and_port_from_lb_key(const char *key, char **ip_address,
1559 uint16_t *port);
1560
1561 static void
1562 get_router_load_balancer_ips(const struct ovn_datapath *od,
1563 struct sset *all_ips)
1564 {
1565 if (!od->nbr) {
1566 return;
1567 }
1568
1569 for (int i = 0; i < od->nbr->n_load_balancer; i++) {
1570 struct nbrec_load_balancer *lb = od->nbr->load_balancer[i];
1571 struct smap *vips = &lb->vips;
1572 struct smap_node *node;
1573
1574 SMAP_FOR_EACH (node, vips) {
1575 /* node->key contains IP:port or just IP. */
1576 char *ip_address = NULL;
1577 uint16_t port;
1578
1579 ip_address_and_port_from_lb_key(node->key, &ip_address, &port);
1580 if (!ip_address) {
1581 continue;
1582 }
1583
1584 if (!sset_contains(all_ips, ip_address)) {
1585 sset_add(all_ips, ip_address);
1586 }
1587
1588 free(ip_address);
1589 }
1590 }
1591 }
1592
1593 /* Returns an array of strings, each consisting of a MAC address followed
1594 * by one or more IP addresses, and if the port is a distributed gateway
1595 * port, followed by 'is_chassis_resident("LPORT_NAME")', where the
1596 * LPORT_NAME is the name of the L3 redirect port or the name of the
1597 * logical_port specified in a NAT rule. These strings include the
1598 * external IP addresses of all NAT rules defined on that router, and all
1599 * of the IP addresses used in load balancer VIPs defined on that router.
1600 *
1601 * The caller must free each of the n returned strings with free(),
1602 * and must free the returned array when it is no longer needed. */
1603 static char **
1604 get_nat_addresses(const struct ovn_port *op, size_t *n)
1605 {
1606 size_t n_nats = 0;
1607 struct eth_addr mac;
1608 if (!op->nbrp || !op->od || !op->od->nbr
1609 || (!op->od->nbr->n_nat && !op->od->nbr->n_load_balancer)
1610 || !eth_addr_from_string(op->nbrp->mac, &mac)) {
1611 *n = n_nats;
1612 return NULL;
1613 }
1614
1615 struct ds c_addresses = DS_EMPTY_INITIALIZER;
1616 ds_put_format(&c_addresses, ETH_ADDR_FMT, ETH_ADDR_ARGS(mac));
1617 bool central_ip_address = false;
1618
1619 char **addresses;
1620 addresses = xmalloc(sizeof *addresses * (op->od->nbr->n_nat + 1));
1621
1622 /* Get NAT IP addresses. */
1623 for (size_t i = 0; i < op->od->nbr->n_nat; i++) {
1624 const struct nbrec_nat *nat = op->od->nbr->nat[i];
1625 ovs_be32 ip, mask;
1626
1627 char *error = ip_parse_masked(nat->external_ip, &ip, &mask);
1628 if (error || mask != OVS_BE32_MAX) {
1629 free(error);
1630 continue;
1631 }
1632
1633 /* Determine whether this NAT rule satisfies the conditions for
1634 * distributed NAT processing. */
1635 if (op->od->l3redirect_port && !strcmp(nat->type, "dnat_and_snat")
1636 && nat->logical_port && nat->external_mac) {
1637 /* Distributed NAT rule. */
1638 if (eth_addr_from_string(nat->external_mac, &mac)) {
1639 struct ds address = DS_EMPTY_INITIALIZER;
1640 ds_put_format(&address, ETH_ADDR_FMT, ETH_ADDR_ARGS(mac));
1641 ds_put_format(&address, " %s", nat->external_ip);
1642 ds_put_format(&address, " is_chassis_resident(\"%s\")",
1643 nat->logical_port);
1644 addresses[n_nats++] = ds_steal_cstr(&address);
1645 }
1646 } else {
1647 /* Centralized NAT rule, either on gateway router or distributed
1648 * router. */
1649 ds_put_format(&c_addresses, " %s", nat->external_ip);
1650 central_ip_address = true;
1651 }
1652 }
1653
1654 /* A set to hold all load-balancer vips. */
1655 struct sset all_ips = SSET_INITIALIZER(&all_ips);
1656 get_router_load_balancer_ips(op->od, &all_ips);
1657
1658 const char *ip_address;
1659 SSET_FOR_EACH (ip_address, &all_ips) {
1660 ds_put_format(&c_addresses, " %s", ip_address);
1661 central_ip_address = true;
1662 }
1663 sset_destroy(&all_ips);
1664
1665 if (central_ip_address) {
1666 /* Gratuitous ARP for centralized NAT rules on distributed gateway
1667 * ports should be restricted to the "redirect-chassis". */
1668 if (op->od->l3redirect_port) {
1669 ds_put_format(&c_addresses, " is_chassis_resident(%s)",
1670 op->od->l3redirect_port->json_key);
1671 }
1672
1673 addresses[n_nats++] = ds_steal_cstr(&c_addresses);
1674 }
1675
1676 *n = n_nats;
1677
1678 return addresses;
1679 }
1680
1681 static void
1682 ovn_port_update_sbrec(const struct ovn_port *op,
1683 struct hmap *chassis_qdisc_queues)
1684 {
1685 sbrec_port_binding_set_datapath(op->sb, op->od->sb);
1686 if (op->nbrp) {
1687 /* If the router is for l3 gateway, it resides on a chassis
1688 * and its port type is "l3gateway". */
1689 const char *chassis = smap_get(&op->od->nbr->options, "chassis");
1690 if (op->derived) {
1691 sbrec_port_binding_set_type(op->sb, "chassisredirect");
1692 } else if (chassis) {
1693 sbrec_port_binding_set_type(op->sb, "l3gateway");
1694 } else {
1695 sbrec_port_binding_set_type(op->sb, "patch");
1696 }
1697
1698 struct smap new;
1699 smap_init(&new);
1700 if (op->derived) {
1701 const char *redirect_chassis = smap_get(&op->nbrp->options,
1702 "redirect-chassis");
1703 if (redirect_chassis) {
1704 smap_add(&new, "redirect-chassis", redirect_chassis);
1705 }
1706 smap_add(&new, "distributed-port", op->nbrp->name);
1707 } else {
1708 const char *peer = op->peer ? op->peer->key : "<error>";
1709 smap_add(&new, "peer", peer);
1710 if (chassis) {
1711 smap_add(&new, "l3gateway-chassis", chassis);
1712 }
1713 }
1714 sbrec_port_binding_set_options(op->sb, &new);
1715 smap_destroy(&new);
1716
1717 sbrec_port_binding_set_parent_port(op->sb, NULL);
1718 sbrec_port_binding_set_tag(op->sb, NULL, 0);
1719 sbrec_port_binding_set_mac(op->sb, NULL, 0);
1720
1721 struct smap ids = SMAP_INITIALIZER(&ids);
1722 sbrec_port_binding_set_external_ids(op->sb, &ids);
1723 } else {
1724 if (strcmp(op->nbsp->type, "router")) {
1725 uint32_t queue_id = smap_get_int(
1726 &op->sb->options, "qdisc_queue_id", 0);
1727 bool has_qos = port_has_qos_params(&op->nbsp->options);
1728 struct smap options;
1729
1730 if (op->sb->chassis && has_qos && !queue_id) {
1731 queue_id = allocate_chassis_queueid(chassis_qdisc_queues,
1732 op->sb->chassis);
1733 } else if (!has_qos && queue_id) {
1734 free_chassis_queueid(chassis_qdisc_queues,
1735 op->sb->chassis,
1736 queue_id);
1737 queue_id = 0;
1738 }
1739
1740 smap_clone(&options, &op->nbsp->options);
1741 if (queue_id) {
1742 smap_add_format(&options,
1743 "qdisc_queue_id", "%d", queue_id);
1744 }
1745 sbrec_port_binding_set_options(op->sb, &options);
1746 smap_destroy(&options);
1747 sbrec_port_binding_set_type(op->sb, op->nbsp->type);
1748 } else {
1749 const char *chassis = NULL;
1750 if (op->peer && op->peer->od && op->peer->od->nbr) {
1751 chassis = smap_get(&op->peer->od->nbr->options, "chassis");
1752 }
1753
1754 /* A switch port connected to a gateway router is also of
1755 * type "l3gateway". */
1756 if (chassis) {
1757 sbrec_port_binding_set_type(op->sb, "l3gateway");
1758 } else {
1759 sbrec_port_binding_set_type(op->sb, "patch");
1760 }
1761
1762 const char *router_port = smap_get_def(&op->nbsp->options,
1763 "router-port", "<error>");
1764 struct smap new;
1765 smap_init(&new);
1766 smap_add(&new, "peer", router_port);
1767 if (chassis) {
1768 smap_add(&new, "l3gateway-chassis", chassis);
1769 }
1770 sbrec_port_binding_set_options(op->sb, &new);
1771 smap_destroy(&new);
1772
1773 const char *nat_addresses = smap_get(&op->nbsp->options,
1774 "nat-addresses");
1775 if (nat_addresses && !strcmp(nat_addresses, "router")) {
1776 if (op->peer && op->peer->od
1777 && (chassis || op->peer->od->l3redirect_port)) {
1778 size_t n_nats;
1779 char **nats = get_nat_addresses(op->peer, &n_nats);
1780 if (n_nats) {
1781 sbrec_port_binding_set_nat_addresses(op->sb,
1782 (const char **) nats, n_nats);
1783 for (size_t i = 0; i < n_nats; i++) {
1784 free(nats[i]);
1785 }
1786 free(nats);
1787 } else {
1788 sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0);
1789 }
1790 } else {
1791 sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0);
1792 }
1793 /* Only accept manual specification of ethernet address
1794 * followed by IPv4 addresses on type "l3gateway" ports. */
1795 } else if (nat_addresses && chassis) {
1796 struct lport_addresses laddrs;
1797 if (!extract_lsp_addresses(nat_addresses, &laddrs)) {
1798 static struct vlog_rate_limit rl =
1799 VLOG_RATE_LIMIT_INIT(1, 1);
1800 VLOG_WARN_RL(&rl, "Error extracting nat-addresses.");
1801 sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0);
1802 } else {
1803 sbrec_port_binding_set_nat_addresses(op->sb,
1804 &nat_addresses, 1);
1805 destroy_lport_addresses(&laddrs);
1806 }
1807 } else {
1808 sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0);
1809 }
1810 }
1811 sbrec_port_binding_set_parent_port(op->sb, op->nbsp->parent_name);
1812 sbrec_port_binding_set_tag(op->sb, op->nbsp->tag, op->nbsp->n_tag);
1813 sbrec_port_binding_set_mac(op->sb, (const char **) op->nbsp->addresses,
1814 op->nbsp->n_addresses);
1815
1816 struct smap ids = SMAP_INITIALIZER(&ids);
1817 smap_clone(&ids, &op->nbsp->external_ids);
1818 const char *name = smap_get(&ids, "neutron:port_name");
1819 if (name && name[0]) {
1820 smap_add(&ids, "name", name);
1821 }
1822 sbrec_port_binding_set_external_ids(op->sb, &ids);
1823 smap_destroy(&ids);
1824 }
1825 }
1826
1827 /* Remove mac_binding entries that refer to logical_ports which are
1828 * deleted. */
1829 static void
1830 cleanup_mac_bindings(struct northd_context *ctx, struct hmap *ports)
1831 {
1832 const struct sbrec_mac_binding *b, *n;
1833 SBREC_MAC_BINDING_FOR_EACH_SAFE (b, n, ctx->ovnsb_idl) {
1834 if (!ovn_port_find(ports, b->logical_port)) {
1835 sbrec_mac_binding_delete(b);
1836 }
1837 }
1838 }
1839
1840 /* Updates the southbound Port_Binding table so that it contains the logical
1841 * switch ports specified by the northbound database.
1842 *
1843 * Initializes 'ports' to contain a "struct ovn_port" for every logical port,
1844 * using the "struct ovn_datapath"s in 'datapaths' to look up logical
1845 * datapaths. */
1846 static void
1847 build_ports(struct northd_context *ctx, struct hmap *datapaths,
1848 struct hmap *ports)
1849 {
1850 struct ovs_list sb_only, nb_only, both;
1851 struct hmap tag_alloc_table = HMAP_INITIALIZER(&tag_alloc_table);
1852 struct hmap chassis_qdisc_queues = HMAP_INITIALIZER(&chassis_qdisc_queues);
1853
1854 join_logical_ports(ctx, datapaths, ports, &chassis_qdisc_queues,
1855 &tag_alloc_table, &sb_only, &nb_only, &both);
1856
1857 struct ovn_port *op, *next;
1858 /* For logical ports that are in both databases, update the southbound
1859 * record based on northbound data. Also index the in-use tunnel_keys.
1860 * For logical ports that are in NB database, do any tag allocation
1861 * needed. */
1862 LIST_FOR_EACH_SAFE (op, next, list, &both) {
1863 if (op->nbsp) {
1864 tag_alloc_create_new_tag(&tag_alloc_table, op->nbsp);
1865 }
1866 ovn_port_update_sbrec(op, &chassis_qdisc_queues);
1867
1868 add_tnlid(&op->od->port_tnlids, op->sb->tunnel_key);
1869 if (op->sb->tunnel_key > op->od->port_key_hint) {
1870 op->od->port_key_hint = op->sb->tunnel_key;
1871 }
1872 }
1873
1874 /* Add southbound record for each unmatched northbound record. */
1875 LIST_FOR_EACH_SAFE (op, next, list, &nb_only) {
1876 uint16_t tunnel_key = ovn_port_allocate_key(op->od);
1877 if (!tunnel_key) {
1878 continue;
1879 }
1880
1881 op->sb = sbrec_port_binding_insert(ctx->ovnsb_txn);
1882 ovn_port_update_sbrec(op, &chassis_qdisc_queues);
1883
1884 sbrec_port_binding_set_logical_port(op->sb, op->key);
1885 sbrec_port_binding_set_tunnel_key(op->sb, tunnel_key);
1886 }
1887
1888 bool remove_mac_bindings = false;
1889 if (!ovs_list_is_empty(&sb_only)) {
1890 remove_mac_bindings = true;
1891 }
1892
1893 /* Delete southbound records without northbound matches. */
1894 LIST_FOR_EACH_SAFE(op, next, list, &sb_only) {
1895 ovs_list_remove(&op->list);
1896 sbrec_port_binding_delete(op->sb);
1897 ovn_port_destroy(ports, op);
1898 }
1899 if (remove_mac_bindings) {
1900 cleanup_mac_bindings(ctx, ports);
1901 }
1902
1903 tag_alloc_destroy(&tag_alloc_table);
1904 destroy_chassis_queues(&chassis_qdisc_queues);
1905 }
1906 \f
1907 #define OVN_MIN_MULTICAST 32768
1908 #define OVN_MAX_MULTICAST 65535
1909
1910 struct multicast_group {
1911 const char *name;
1912 uint16_t key; /* OVN_MIN_MULTICAST...OVN_MAX_MULTICAST. */
1913 };
1914
1915 #define MC_FLOOD "_MC_flood"
1916 static const struct multicast_group mc_flood = { MC_FLOOD, 65535 };
1917
1918 #define MC_UNKNOWN "_MC_unknown"
1919 static const struct multicast_group mc_unknown = { MC_UNKNOWN, 65534 };
1920
1921 static bool
1922 multicast_group_equal(const struct multicast_group *a,
1923 const struct multicast_group *b)
1924 {
1925 return !strcmp(a->name, b->name) && a->key == b->key;
1926 }
1927
1928 /* Multicast group entry. */
1929 struct ovn_multicast {
1930 struct hmap_node hmap_node; /* Index on 'datapath' and 'key'. */
1931 struct ovn_datapath *datapath;
1932 const struct multicast_group *group;
1933
1934 struct ovn_port **ports;
1935 size_t n_ports, allocated_ports;
1936 };
1937
1938 static uint32_t
1939 ovn_multicast_hash(const struct ovn_datapath *datapath,
1940 const struct multicast_group *group)
1941 {
1942 return hash_pointer(datapath, group->key);
1943 }
1944
1945 static struct ovn_multicast *
1946 ovn_multicast_find(struct hmap *mcgroups, struct ovn_datapath *datapath,
1947 const struct multicast_group *group)
1948 {
1949 struct ovn_multicast *mc;
1950
1951 HMAP_FOR_EACH_WITH_HASH (mc, hmap_node,
1952 ovn_multicast_hash(datapath, group), mcgroups) {
1953 if (mc->datapath == datapath
1954 && multicast_group_equal(mc->group, group)) {
1955 return mc;
1956 }
1957 }
1958 return NULL;
1959 }
1960
1961 static void
1962 ovn_multicast_add(struct hmap *mcgroups, const struct multicast_group *group,
1963 struct ovn_port *port)
1964 {
1965 struct ovn_datapath *od = port->od;
1966 struct ovn_multicast *mc = ovn_multicast_find(mcgroups, od, group);
1967 if (!mc) {
1968 mc = xmalloc(sizeof *mc);
1969 hmap_insert(mcgroups, &mc->hmap_node, ovn_multicast_hash(od, group));
1970 mc->datapath = od;
1971 mc->group = group;
1972 mc->n_ports = 0;
1973 mc->allocated_ports = 4;
1974 mc->ports = xmalloc(mc->allocated_ports * sizeof *mc->ports);
1975 }
1976 if (mc->n_ports >= mc->allocated_ports) {
1977 mc->ports = x2nrealloc(mc->ports, &mc->allocated_ports,
1978 sizeof *mc->ports);
1979 }
1980 mc->ports[mc->n_ports++] = port;
1981 }
1982
1983 static void
1984 ovn_multicast_destroy(struct hmap *mcgroups, struct ovn_multicast *mc)
1985 {
1986 if (mc) {
1987 hmap_remove(mcgroups, &mc->hmap_node);
1988 free(mc->ports);
1989 free(mc);
1990 }
1991 }
1992
1993 static void
1994 ovn_multicast_update_sbrec(const struct ovn_multicast *mc,
1995 const struct sbrec_multicast_group *sb)
1996 {
1997 struct sbrec_port_binding **ports = xmalloc(mc->n_ports * sizeof *ports);
1998 for (size_t i = 0; i < mc->n_ports; i++) {
1999 ports[i] = CONST_CAST(struct sbrec_port_binding *, mc->ports[i]->sb);
2000 }
2001 sbrec_multicast_group_set_ports(sb, ports, mc->n_ports);
2002 free(ports);
2003 }
2004 \f
2005 /* Logical flow generation.
2006 *
2007 * This code generates the Logical_Flow table in the southbound database, as a
2008 * function of most of the northbound database.
2009 */
2010
2011 struct ovn_lflow {
2012 struct hmap_node hmap_node;
2013
2014 struct ovn_datapath *od;
2015 enum ovn_stage stage;
2016 uint16_t priority;
2017 char *match;
2018 char *actions;
2019 char *stage_hint;
2020 const char *where;
2021 };
2022
2023 static size_t
2024 ovn_lflow_hash(const struct ovn_lflow *lflow)
2025 {
2026 size_t hash = uuid_hash(&lflow->od->key);
2027 hash = hash_2words((lflow->stage << 16) | lflow->priority, hash);
2028 hash = hash_string(lflow->match, hash);
2029 return hash_string(lflow->actions, hash);
2030 }
2031
2032 static bool
2033 ovn_lflow_equal(const struct ovn_lflow *a, const struct ovn_lflow *b)
2034 {
2035 return (a->od == b->od
2036 && a->stage == b->stage
2037 && a->priority == b->priority
2038 && !strcmp(a->match, b->match)
2039 && !strcmp(a->actions, b->actions));
2040 }
2041
2042 static void
2043 ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
2044 enum ovn_stage stage, uint16_t priority,
2045 char *match, char *actions, char *stage_hint,
2046 const char *where)
2047 {
2048 lflow->od = od;
2049 lflow->stage = stage;
2050 lflow->priority = priority;
2051 lflow->match = match;
2052 lflow->actions = actions;
2053 lflow->stage_hint = stage_hint;
2054 lflow->where = where;
2055 }
2056
2057 /* Adds a row with the specified contents to the Logical_Flow table. */
2058 static void
2059 ovn_lflow_add_at(struct hmap *lflow_map, struct ovn_datapath *od,
2060 enum ovn_stage stage, uint16_t priority,
2061 const char *match, const char *actions,
2062 const char *stage_hint, const char *where)
2063 {
2064 ovs_assert(ovn_stage_to_datapath_type(stage) == ovn_datapath_get_type(od));
2065
2066 struct ovn_lflow *lflow = xmalloc(sizeof *lflow);
2067 ovn_lflow_init(lflow, od, stage, priority,
2068 xstrdup(match), xstrdup(actions),
2069 nullable_xstrdup(stage_hint), where);
2070 hmap_insert(lflow_map, &lflow->hmap_node, ovn_lflow_hash(lflow));
2071 }
2072
2073 /* Adds a row with the specified contents to the Logical_Flow table. */
2074 #define ovn_lflow_add_with_hint(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, \
2075 ACTIONS, STAGE_HINT) \
2076 ovn_lflow_add_at(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, ACTIONS, \
2077 STAGE_HINT, OVS_SOURCE_LOCATOR)
2078
2079 #define ovn_lflow_add(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, ACTIONS) \
2080 ovn_lflow_add_with_hint(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, \
2081 ACTIONS, NULL)
2082
2083 static struct ovn_lflow *
2084 ovn_lflow_find(struct hmap *lflows, struct ovn_datapath *od,
2085 enum ovn_stage stage, uint16_t priority,
2086 const char *match, const char *actions)
2087 {
2088 struct ovn_lflow target;
2089 ovn_lflow_init(&target, od, stage, priority,
2090 CONST_CAST(char *, match), CONST_CAST(char *, actions),
2091 NULL, NULL);
2092
2093 struct ovn_lflow *lflow;
2094 HMAP_FOR_EACH_WITH_HASH (lflow, hmap_node, ovn_lflow_hash(&target),
2095 lflows) {
2096 if (ovn_lflow_equal(lflow, &target)) {
2097 return lflow;
2098 }
2099 }
2100 return NULL;
2101 }
2102
2103 static void
2104 ovn_lflow_destroy(struct hmap *lflows, struct ovn_lflow *lflow)
2105 {
2106 if (lflow) {
2107 hmap_remove(lflows, &lflow->hmap_node);
2108 free(lflow->match);
2109 free(lflow->actions);
2110 free(lflow->stage_hint);
2111 free(lflow);
2112 }
2113 }
2114
2115 /* Appends port security constraints on L2 address field 'eth_addr_field'
2116 * (e.g. "eth.src" or "eth.dst") to 'match'. 'ps_addrs', with 'n_ps_addrs'
2117 * elements, is the collection of port_security constraints from an
2118 * OVN_NB Logical_Switch_Port row generated by extract_lsp_addresses(). */
2119 static void
2120 build_port_security_l2(const char *eth_addr_field,
2121 struct lport_addresses *ps_addrs,
2122 unsigned int n_ps_addrs,
2123 struct ds *match)
2124 {
2125 if (!n_ps_addrs) {
2126 return;
2127 }
2128
2129 ds_put_format(match, " && %s == {", eth_addr_field);
2130
2131 for (size_t i = 0; i < n_ps_addrs; i++) {
2132 ds_put_format(match, "%s ", ps_addrs[i].ea_s);
2133 }
2134 ds_chomp(match, ' ');
2135 ds_put_cstr(match, "}");
2136 }
2137
2138 static void
2139 build_port_security_ipv6_nd_flow(
2140 struct ds *match, struct eth_addr ea, struct ipv6_netaddr *ipv6_addrs,
2141 int n_ipv6_addrs)
2142 {
2143 ds_put_format(match, " && ip6 && nd && ((nd.sll == "ETH_ADDR_FMT" || "
2144 "nd.sll == "ETH_ADDR_FMT") || ((nd.tll == "ETH_ADDR_FMT" || "
2145 "nd.tll == "ETH_ADDR_FMT")", ETH_ADDR_ARGS(eth_addr_zero),
2146 ETH_ADDR_ARGS(ea), ETH_ADDR_ARGS(eth_addr_zero),
2147 ETH_ADDR_ARGS(ea));
2148 if (!n_ipv6_addrs) {
2149 ds_put_cstr(match, "))");
2150 return;
2151 }
2152
2153 char ip6_str[INET6_ADDRSTRLEN + 1];
2154 struct in6_addr lla;
2155 in6_generate_lla(ea, &lla);
2156 memset(ip6_str, 0, sizeof(ip6_str));
2157 ipv6_string_mapped(ip6_str, &lla);
2158 ds_put_format(match, " && (nd.target == %s", ip6_str);
2159
2160 for(int i = 0; i < n_ipv6_addrs; i++) {
2161 memset(ip6_str, 0, sizeof(ip6_str));
2162 ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr);
2163 ds_put_format(match, " || nd.target == %s", ip6_str);
2164 }
2165
2166 ds_put_format(match, ")))");
2167 }
2168
2169 static void
2170 build_port_security_ipv6_flow(
2171 enum ovn_pipeline pipeline, struct ds *match, struct eth_addr ea,
2172 struct ipv6_netaddr *ipv6_addrs, int n_ipv6_addrs)
2173 {
2174 char ip6_str[INET6_ADDRSTRLEN + 1];
2175
2176 ds_put_format(match, " && %s == {",
2177 pipeline == P_IN ? "ip6.src" : "ip6.dst");
2178
2179 /* Allow link-local address. */
2180 struct in6_addr lla;
2181 in6_generate_lla(ea, &lla);
2182 ipv6_string_mapped(ip6_str, &lla);
2183 ds_put_format(match, "%s, ", ip6_str);
2184
2185 /* Allow ip6.dst=ff00::/8 for multicast packets */
2186 if (pipeline == P_OUT) {
2187 ds_put_cstr(match, "ff00::/8, ");
2188 }
2189 for(int i = 0; i < n_ipv6_addrs; i++) {
2190 ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr);
2191 ds_put_format(match, "%s, ", ip6_str);
2192 }
2193 /* Replace ", " by "}". */
2194 ds_chomp(match, ' ');
2195 ds_chomp(match, ',');
2196 ds_put_cstr(match, "}");
2197 }
2198
2199 /**
2200 * Build port security constraints on ARP and IPv6 ND fields
2201 * and add logical flows to S_SWITCH_IN_PORT_SEC_ND stage.
2202 *
2203 * For each port security of the logical port, following
2204 * logical flows are added
2205 * - If the port security has no IP (both IPv4 and IPv6) or
2206 * if it has IPv4 address(es)
2207 * - Priority 90 flow to allow ARP packets for known MAC addresses
2208 * in the eth.src and arp.spa fields. If the port security
2209 * has IPv4 addresses, allow known IPv4 addresses in the arp.tpa field.
2210 *
2211 * - If the port security has no IP (both IPv4 and IPv6) or
2212 * if it has IPv6 address(es)
2213 * - Priority 90 flow to allow IPv6 ND packets for known MAC addresses
2214 * in the eth.src and nd.sll/nd.tll fields. If the port security
2215 * has IPv6 addresses, allow known IPv6 addresses in the nd.target field
2216 * for IPv6 Neighbor Advertisement packet.
2217 *
2218 * - Priority 80 flow to drop ARP and IPv6 ND packets.
2219 */
2220 static void
2221 build_port_security_nd(struct ovn_port *op, struct hmap *lflows)
2222 {
2223 struct ds match = DS_EMPTY_INITIALIZER;
2224
2225 for (size_t i = 0; i < op->n_ps_addrs; i++) {
2226 struct lport_addresses *ps = &op->ps_addrs[i];
2227
2228 bool no_ip = !(ps->n_ipv4_addrs || ps->n_ipv6_addrs);
2229
2230 ds_clear(&match);
2231 if (ps->n_ipv4_addrs || no_ip) {
2232 ds_put_format(&match,
2233 "inport == %s && eth.src == %s && arp.sha == %s",
2234 op->json_key, ps->ea_s, ps->ea_s);
2235
2236 if (ps->n_ipv4_addrs) {
2237 ds_put_cstr(&match, " && arp.spa == {");
2238 for (size_t j = 0; j < ps->n_ipv4_addrs; j++) {
2239 /* When the netmask is applied, if the host portion is
2240 * non-zero, the host can only use the specified
2241 * address in the arp.spa. If zero, the host is allowed
2242 * to use any address in the subnet. */
2243 if (ps->ipv4_addrs[j].plen == 32
2244 || ps->ipv4_addrs[j].addr & ~ps->ipv4_addrs[j].mask) {
2245 ds_put_cstr(&match, ps->ipv4_addrs[j].addr_s);
2246 } else {
2247 ds_put_format(&match, "%s/%d",
2248 ps->ipv4_addrs[j].network_s,
2249 ps->ipv4_addrs[j].plen);
2250 }
2251 ds_put_cstr(&match, ", ");
2252 }
2253 ds_chomp(&match, ' ');
2254 ds_chomp(&match, ',');
2255 ds_put_cstr(&match, "}");
2256 }
2257 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90,
2258 ds_cstr(&match), "next;");
2259 }
2260
2261 if (ps->n_ipv6_addrs || no_ip) {
2262 ds_clear(&match);
2263 ds_put_format(&match, "inport == %s && eth.src == %s",
2264 op->json_key, ps->ea_s);
2265 build_port_security_ipv6_nd_flow(&match, ps->ea, ps->ipv6_addrs,
2266 ps->n_ipv6_addrs);
2267 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90,
2268 ds_cstr(&match), "next;");
2269 }
2270 }
2271
2272 ds_clear(&match);
2273 ds_put_format(&match, "inport == %s && (arp || nd)", op->json_key);
2274 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 80,
2275 ds_cstr(&match), "drop;");
2276 ds_destroy(&match);
2277 }
2278
2279 /**
2280 * Build port security constraints on IPv4 and IPv6 src and dst fields
2281 * and add logical flows to S_SWITCH_(IN/OUT)_PORT_SEC_IP stage.
2282 *
2283 * For each port security of the logical port, following
2284 * logical flows are added
2285 * - If the port security has IPv4 addresses,
2286 * - Priority 90 flow to allow IPv4 packets for known IPv4 addresses
2287 *
2288 * - If the port security has IPv6 addresses,
2289 * - Priority 90 flow to allow IPv6 packets for known IPv6 addresses
2290 *
2291 * - If the port security has IPv4 addresses or IPv6 addresses or both
2292 * - Priority 80 flow to drop all IPv4 and IPv6 traffic
2293 */
2294 static void
2295 build_port_security_ip(enum ovn_pipeline pipeline, struct ovn_port *op,
2296 struct hmap *lflows)
2297 {
2298 char *port_direction;
2299 enum ovn_stage stage;
2300 if (pipeline == P_IN) {
2301 port_direction = "inport";
2302 stage = S_SWITCH_IN_PORT_SEC_IP;
2303 } else {
2304 port_direction = "outport";
2305 stage = S_SWITCH_OUT_PORT_SEC_IP;
2306 }
2307
2308 for (size_t i = 0; i < op->n_ps_addrs; i++) {
2309 struct lport_addresses *ps = &op->ps_addrs[i];
2310
2311 if (!(ps->n_ipv4_addrs || ps->n_ipv6_addrs)) {
2312 continue;
2313 }
2314
2315 if (ps->n_ipv4_addrs) {
2316 struct ds match = DS_EMPTY_INITIALIZER;
2317 if (pipeline == P_IN) {
2318 /* Permit use of the unspecified address for DHCP discovery */
2319 struct ds dhcp_match = DS_EMPTY_INITIALIZER;
2320 ds_put_format(&dhcp_match, "inport == %s"
2321 " && eth.src == %s"
2322 " && ip4.src == 0.0.0.0"
2323 " && ip4.dst == 255.255.255.255"
2324 " && udp.src == 68 && udp.dst == 67",
2325 op->json_key, ps->ea_s);
2326 ovn_lflow_add(lflows, op->od, stage, 90,
2327 ds_cstr(&dhcp_match), "next;");
2328 ds_destroy(&dhcp_match);
2329 ds_put_format(&match, "inport == %s && eth.src == %s"
2330 " && ip4.src == {", op->json_key,
2331 ps->ea_s);
2332 } else {
2333 ds_put_format(&match, "outport == %s && eth.dst == %s"
2334 " && ip4.dst == {255.255.255.255, 224.0.0.0/4, ",
2335 op->json_key, ps->ea_s);
2336 }
2337
2338 for (int j = 0; j < ps->n_ipv4_addrs; j++) {
2339 ovs_be32 mask = ps->ipv4_addrs[j].mask;
2340 /* When the netmask is applied, if the host portion is
2341 * non-zero, the host can only use the specified
2342 * address. If zero, the host is allowed to use any
2343 * address in the subnet.
2344 */
2345 if (ps->ipv4_addrs[j].plen == 32
2346 || ps->ipv4_addrs[j].addr & ~mask) {
2347 ds_put_format(&match, "%s", ps->ipv4_addrs[j].addr_s);
2348 if (pipeline == P_OUT && ps->ipv4_addrs[j].plen != 32) {
2349 /* Host is also allowed to receive packets to the
2350 * broadcast address in the specified subnet. */
2351 ds_put_format(&match, ", %s",
2352 ps->ipv4_addrs[j].bcast_s);
2353 }
2354 } else {
2355 /* host portion is zero */
2356 ds_put_format(&match, "%s/%d", ps->ipv4_addrs[j].network_s,
2357 ps->ipv4_addrs[j].plen);
2358 }
2359 ds_put_cstr(&match, ", ");
2360 }
2361
2362 /* Replace ", " by "}". */
2363 ds_chomp(&match, ' ');
2364 ds_chomp(&match, ',');
2365 ds_put_cstr(&match, "}");
2366 ovn_lflow_add(lflows, op->od, stage, 90, ds_cstr(&match), "next;");
2367 ds_destroy(&match);
2368 }
2369
2370 if (ps->n_ipv6_addrs) {
2371 struct ds match = DS_EMPTY_INITIALIZER;
2372 if (pipeline == P_IN) {
2373 /* Permit use of unspecified address for duplicate address
2374 * detection */
2375 struct ds dad_match = DS_EMPTY_INITIALIZER;
2376 ds_put_format(&dad_match, "inport == %s"
2377 " && eth.src == %s"
2378 " && ip6.src == ::"
2379 " && ip6.dst == ff02::/16"
2380 " && icmp6.type == {131, 135, 143}", op->json_key,
2381 ps->ea_s);
2382 ovn_lflow_add(lflows, op->od, stage, 90,
2383 ds_cstr(&dad_match), "next;");
2384 ds_destroy(&dad_match);
2385 }
2386 ds_put_format(&match, "%s == %s && %s == %s",
2387 port_direction, op->json_key,
2388 pipeline == P_IN ? "eth.src" : "eth.dst", ps->ea_s);
2389 build_port_security_ipv6_flow(pipeline, &match, ps->ea,
2390 ps->ipv6_addrs, ps->n_ipv6_addrs);
2391 ovn_lflow_add(lflows, op->od, stage, 90,
2392 ds_cstr(&match), "next;");
2393 ds_destroy(&match);
2394 }
2395
2396 char *match = xasprintf("%s == %s && %s == %s && ip",
2397 port_direction, op->json_key,
2398 pipeline == P_IN ? "eth.src" : "eth.dst",
2399 ps->ea_s);
2400 ovn_lflow_add(lflows, op->od, stage, 80, match, "drop;");
2401 free(match);
2402 }
2403
2404 }
2405
2406 static bool
2407 lsp_is_enabled(const struct nbrec_logical_switch_port *lsp)
2408 {
2409 return !lsp->enabled || *lsp->enabled;
2410 }
2411
2412 static bool
2413 lsp_is_up(const struct nbrec_logical_switch_port *lsp)
2414 {
2415 return !lsp->up || *lsp->up;
2416 }
2417
2418 static bool
2419 build_dhcpv4_action(struct ovn_port *op, ovs_be32 offer_ip,
2420 struct ds *options_action, struct ds *response_action,
2421 struct ds *ipv4_addr_match)
2422 {
2423 if (!op->nbsp->dhcpv4_options) {
2424 /* CMS has disabled native DHCPv4 for this lport. */
2425 return false;
2426 }
2427
2428 ovs_be32 host_ip, mask;
2429 char *error = ip_parse_masked(op->nbsp->dhcpv4_options->cidr, &host_ip,
2430 &mask);
2431 if (error || ((offer_ip ^ host_ip) & mask)) {
2432 /* Either
2433 * - cidr defined is invalid or
2434 * - the offer ip of the logical port doesn't belong to the cidr
2435 * defined in the DHCPv4 options.
2436 * */
2437 free(error);
2438 return false;
2439 }
2440
2441 const char *server_ip = smap_get(
2442 &op->nbsp->dhcpv4_options->options, "server_id");
2443 const char *server_mac = smap_get(
2444 &op->nbsp->dhcpv4_options->options, "server_mac");
2445 const char *lease_time = smap_get(
2446 &op->nbsp->dhcpv4_options->options, "lease_time");
2447
2448 if (!(server_ip && server_mac && lease_time)) {
2449 /* "server_id", "server_mac" and "lease_time" should be
2450 * present in the dhcp_options. */
2451 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
2452 VLOG_WARN_RL(&rl, "Required DHCPv4 options not defined for lport - %s",
2453 op->json_key);
2454 return false;
2455 }
2456
2457 struct smap dhcpv4_options = SMAP_INITIALIZER(&dhcpv4_options);
2458 smap_clone(&dhcpv4_options, &op->nbsp->dhcpv4_options->options);
2459
2460 /* server_mac is not DHCPv4 option, delete it from the smap. */
2461 smap_remove(&dhcpv4_options, "server_mac");
2462 char *netmask = xasprintf(IP_FMT, IP_ARGS(mask));
2463 smap_add(&dhcpv4_options, "netmask", netmask);
2464 free(netmask);
2465
2466 ds_put_format(options_action,
2467 REGBIT_DHCP_OPTS_RESULT" = put_dhcp_opts(offerip = "
2468 IP_FMT", ", IP_ARGS(offer_ip));
2469
2470 /* We're not using SMAP_FOR_EACH because we want a consistent order of the
2471 * options on different architectures (big or little endian, SSE4.2) */
2472 const struct smap_node **sorted_opts = smap_sort(&dhcpv4_options);
2473 for (size_t i = 0; i < smap_count(&dhcpv4_options); i++) {
2474 const struct smap_node *node = sorted_opts[i];
2475 ds_put_format(options_action, "%s = %s, ", node->key, node->value);
2476 }
2477 free(sorted_opts);
2478
2479 ds_chomp(options_action, ' ');
2480 ds_chomp(options_action, ',');
2481 ds_put_cstr(options_action, "); next;");
2482
2483 ds_put_format(response_action, "eth.dst = eth.src; eth.src = %s; "
2484 "ip4.dst = "IP_FMT"; ip4.src = %s; udp.src = 67; "
2485 "udp.dst = 68; outport = inport; flags.loopback = 1; "
2486 "output;",
2487 server_mac, IP_ARGS(offer_ip), server_ip);
2488
2489 ds_put_format(ipv4_addr_match,
2490 "ip4.src == "IP_FMT" && ip4.dst == {%s, 255.255.255.255}",
2491 IP_ARGS(offer_ip), server_ip);
2492 smap_destroy(&dhcpv4_options);
2493 return true;
2494 }
2495
2496 static bool
2497 build_dhcpv6_action(struct ovn_port *op, struct in6_addr *offer_ip,
2498 struct ds *options_action, struct ds *response_action)
2499 {
2500 if (!op->nbsp->dhcpv6_options) {
2501 /* CMS has disabled native DHCPv6 for this lport. */
2502 return false;
2503 }
2504
2505 struct in6_addr host_ip, mask;
2506
2507 char *error = ipv6_parse_masked(op->nbsp->dhcpv6_options->cidr, &host_ip,
2508 &mask);
2509 if (error) {
2510 free(error);
2511 return false;
2512 }
2513 struct in6_addr ip6_mask = ipv6_addr_bitxor(offer_ip, &host_ip);
2514 ip6_mask = ipv6_addr_bitand(&ip6_mask, &mask);
2515 if (!ipv6_mask_is_any(&ip6_mask)) {
2516 /* offer_ip doesn't belongs to the cidr defined in lport's DHCPv6
2517 * options.*/
2518 return false;
2519 }
2520
2521 const struct smap *options_map = &op->nbsp->dhcpv6_options->options;
2522 /* "server_id" should be the MAC address. */
2523 const char *server_mac = smap_get(options_map, "server_id");
2524 struct eth_addr ea;
2525 if (!server_mac || !eth_addr_from_string(server_mac, &ea)) {
2526 /* "server_id" should be present in the dhcpv6_options. */
2527 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2528 VLOG_WARN_RL(&rl, "server_id not present in the DHCPv6 options"
2529 " for lport %s", op->json_key);
2530 return false;
2531 }
2532
2533 /* Get the link local IP of the DHCPv6 server from the server MAC. */
2534 struct in6_addr lla;
2535 in6_generate_lla(ea, &lla);
2536
2537 char server_ip[INET6_ADDRSTRLEN + 1];
2538 ipv6_string_mapped(server_ip, &lla);
2539
2540 char ia_addr[INET6_ADDRSTRLEN + 1];
2541 ipv6_string_mapped(ia_addr, offer_ip);
2542
2543 ds_put_format(options_action,
2544 REGBIT_DHCP_OPTS_RESULT" = put_dhcpv6_opts(");
2545
2546 /* Check whether the dhcpv6 options should be configured as stateful.
2547 * Only reply with ia_addr option for dhcpv6 stateful address mode. */
2548 if (!smap_get_bool(options_map, "dhcpv6_stateless", false)) {
2549 char ia_addr[INET6_ADDRSTRLEN + 1];
2550 ipv6_string_mapped(ia_addr, offer_ip);
2551
2552 ds_put_format(options_action, "ia_addr = %s, ", ia_addr);
2553 }
2554
2555 /* We're not using SMAP_FOR_EACH because we want a consistent order of the
2556 * options on different architectures (big or little endian, SSE4.2) */
2557 const struct smap_node **sorted_opts = smap_sort(options_map);
2558 for (size_t i = 0; i < smap_count(options_map); i++) {
2559 const struct smap_node *node = sorted_opts[i];
2560 if (strcmp(node->key, "dhcpv6_stateless")) {
2561 ds_put_format(options_action, "%s = %s, ", node->key, node->value);
2562 }
2563 }
2564 free(sorted_opts);
2565
2566 ds_chomp(options_action, ' ');
2567 ds_chomp(options_action, ',');
2568 ds_put_cstr(options_action, "); next;");
2569
2570 ds_put_format(response_action, "eth.dst = eth.src; eth.src = %s; "
2571 "ip6.dst = ip6.src; ip6.src = %s; udp.src = 547; "
2572 "udp.dst = 546; outport = inport; flags.loopback = 1; "
2573 "output;",
2574 server_mac, server_ip);
2575
2576 return true;
2577 }
2578
2579 static bool
2580 has_stateful_acl(struct ovn_datapath *od)
2581 {
2582 for (size_t i = 0; i < od->nbs->n_acls; i++) {
2583 struct nbrec_acl *acl = od->nbs->acls[i];
2584 if (!strcmp(acl->action, "allow-related")) {
2585 return true;
2586 }
2587 }
2588
2589 return false;
2590 }
2591
2592 static void
2593 build_pre_acls(struct ovn_datapath *od, struct hmap *lflows)
2594 {
2595 bool has_stateful = has_stateful_acl(od);
2596
2597 /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are
2598 * allowed by default. */
2599 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 0, "1", "next;");
2600 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 0, "1", "next;");
2601
2602 /* If there are any stateful ACL rules in this datapath, we must
2603 * send all IP packets through the conntrack action, which handles
2604 * defragmentation, in order to match L4 headers. */
2605 if (has_stateful) {
2606 for (size_t i = 0; i < od->n_router_ports; i++) {
2607 struct ovn_port *op = od->router_ports[i];
2608 /* Can't use ct() for router ports. Consider the
2609 * following configuration: lp1(10.0.0.2) on
2610 * hostA--ls1--lr0--ls2--lp2(10.0.1.2) on hostB, For a
2611 * ping from lp1 to lp2, First, the response will go
2612 * through ct() with a zone for lp2 in the ls2 ingress
2613 * pipeline on hostB. That ct zone knows about this
2614 * connection. Next, it goes through ct() with the zone
2615 * for the router port in the egress pipeline of ls2 on
2616 * hostB. This zone does not know about the connection,
2617 * as the icmp request went through the logical router
2618 * on hostA, not hostB. This would only work with
2619 * distributed conntrack state across all chassis. */
2620 struct ds match_in = DS_EMPTY_INITIALIZER;
2621 struct ds match_out = DS_EMPTY_INITIALIZER;
2622
2623 ds_put_format(&match_in, "ip && inport == %s", op->json_key);
2624 ds_put_format(&match_out, "ip && outport == %s", op->json_key);
2625 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110,
2626 ds_cstr(&match_in), "next;");
2627 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110,
2628 ds_cstr(&match_out), "next;");
2629
2630 ds_destroy(&match_in);
2631 ds_destroy(&match_out);
2632 }
2633 /* Ingress and Egress Pre-ACL Table (Priority 110).
2634 *
2635 * Not to do conntrack on ND packets. */
2636 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110, "nd", "next;");
2637 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110, "nd", "next;");
2638
2639 /* Ingress and Egress Pre-ACL Table (Priority 100).
2640 *
2641 * Regardless of whether the ACL is "from-lport" or "to-lport",
2642 * we need rules in both the ingress and egress table, because
2643 * the return traffic needs to be followed.
2644 *
2645 * 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send
2646 * it to conntrack for tracking and defragmentation. */
2647 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 100, "ip",
2648 REGBIT_CONNTRACK_DEFRAG" = 1; next;");
2649 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip",
2650 REGBIT_CONNTRACK_DEFRAG" = 1; next;");
2651 }
2652 }
2653
2654 /* For a 'key' of the form "IP:port" or just "IP", sets 'port' and
2655 * 'ip_address'. The caller must free() the memory allocated for
2656 * 'ip_address'. */
2657 static void
2658 ip_address_and_port_from_lb_key(const char *key, char **ip_address,
2659 uint16_t *port)
2660 {
2661 char *ip_str, *start, *next;
2662 *ip_address = NULL;
2663 *port = 0;
2664
2665 next = start = xstrdup(key);
2666 ip_str = strsep(&next, ":");
2667 if (!ip_str || !ip_str[0]) {
2668 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2669 VLOG_WARN_RL(&rl, "bad ip address for load balancer key %s", key);
2670 free(start);
2671 return;
2672 }
2673
2674 ovs_be32 ip, mask;
2675 char *error = ip_parse_masked(ip_str, &ip, &mask);
2676 if (error || mask != OVS_BE32_MAX) {
2677 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2678 VLOG_WARN_RL(&rl, "bad ip address for load balancer key %s", key);
2679 free(start);
2680 free(error);
2681 return;
2682 }
2683
2684 int l4_port = 0;
2685 if (next && next[0]) {
2686 if (!str_to_int(next, 0, &l4_port) || l4_port < 0 || l4_port > 65535) {
2687 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2688 VLOG_WARN_RL(&rl, "bad ip port for load balancer key %s", key);
2689 free(start);
2690 return;
2691 }
2692 }
2693
2694 *port = l4_port;
2695 *ip_address = strdup(ip_str);
2696 free(start);
2697 }
2698
2699 /*
2700 * Returns true if logical switch is configured with DNS records, false
2701 * otherwise.
2702 */
2703 static bool
2704 ls_has_dns_records(const struct nbrec_logical_switch *nbs)
2705 {
2706 for (size_t i = 0; i < nbs->n_dns_records; i++) {
2707 if (!smap_is_empty(&nbs->dns_records[i]->records)) {
2708 return true;
2709 }
2710 }
2711
2712 return false;
2713 }
2714
2715 static void
2716 build_pre_lb(struct ovn_datapath *od, struct hmap *lflows)
2717 {
2718 /* Allow all packets to go to next tables by default. */
2719 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 0, "1", "next;");
2720 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 0, "1", "next;");
2721
2722 struct sset all_ips = SSET_INITIALIZER(&all_ips);
2723 bool vip_configured = false;
2724 for (int i = 0; i < od->nbs->n_load_balancer; i++) {
2725 struct nbrec_load_balancer *lb = od->nbs->load_balancer[i];
2726 struct smap *vips = &lb->vips;
2727 struct smap_node *node;
2728
2729 SMAP_FOR_EACH (node, vips) {
2730 vip_configured = true;
2731
2732 /* node->key contains IP:port or just IP. */
2733 char *ip_address = NULL;
2734 uint16_t port;
2735 ip_address_and_port_from_lb_key(node->key, &ip_address, &port);
2736 if (!ip_address) {
2737 continue;
2738 }
2739
2740 if (!sset_contains(&all_ips, ip_address)) {
2741 sset_add(&all_ips, ip_address);
2742 }
2743
2744 free(ip_address);
2745
2746 /* Ignore L4 port information in the key because fragmented packets
2747 * may not have L4 information. The pre-stateful table will send
2748 * the packet through ct() action to de-fragment. In stateful
2749 * table, we will eventually look at L4 information. */
2750 }
2751 }
2752
2753 /* 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send
2754 * packet to conntrack for defragmentation. */
2755 const char *ip_address;
2756 SSET_FOR_EACH(ip_address, &all_ips) {
2757 char *match = xasprintf("ip && ip4.dst == %s", ip_address);
2758 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB,
2759 100, match, REGBIT_CONNTRACK_DEFRAG" = 1; next;");
2760 free(match);
2761 }
2762
2763 sset_destroy(&all_ips);
2764
2765 if (vip_configured) {
2766 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB,
2767 100, "ip", REGBIT_CONNTRACK_DEFRAG" = 1; next;");
2768 }
2769 }
2770
2771 static void
2772 build_pre_stateful(struct ovn_datapath *od, struct hmap *lflows)
2773 {
2774 /* Ingress and Egress pre-stateful Table (Priority 0): Packets are
2775 * allowed by default. */
2776 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 0, "1", "next;");
2777 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 0, "1", "next;");
2778
2779 /* If REGBIT_CONNTRACK_DEFRAG is set as 1, then the packets should be
2780 * sent to conntrack for tracking and defragmentation. */
2781 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 100,
2782 REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;");
2783 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 100,
2784 REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;");
2785 }
2786
2787 static void
2788 build_acls(struct ovn_datapath *od, struct hmap *lflows)
2789 {
2790 bool has_stateful = has_stateful_acl(od);
2791
2792 /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
2793 * default. A related rule at priority 1 is added below if there
2794 * are any stateful ACLs in this datapath. */
2795 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 0, "1", "next;");
2796 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 0, "1", "next;");
2797
2798 if (has_stateful) {
2799 /* Ingress and Egress ACL Table (Priority 1).
2800 *
2801 * By default, traffic is allowed. This is partially handled by
2802 * the Priority 0 ACL flows added earlier, but we also need to
2803 * commit IP flows. This is because, while the initiater's
2804 * direction may not have any stateful rules, the server's may
2805 * and then its return traffic would not have an associated
2806 * conntrack entry and would return "+invalid".
2807 *
2808 * We use "ct_commit" for a connection that is not already known
2809 * by the connection tracker. Once a connection is committed,
2810 * subsequent packets will hit the flow at priority 0 that just
2811 * uses "next;"
2812 *
2813 * We also check for established connections that have ct_label.blocked
2814 * set on them. That's a connection that was disallowed, but is
2815 * now allowed by policy again since it hit this default-allow flow.
2816 * We need to set ct_label.blocked=0 to let the connection continue,
2817 * which will be done by ct_commit() in the "stateful" stage.
2818 * Subsequent packets will hit the flow at priority 0 that just
2819 * uses "next;". */
2820 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 1,
2821 "ip && (!ct.est || (ct.est && ct_label.blocked == 1))",
2822 REGBIT_CONNTRACK_COMMIT" = 1; next;");
2823 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 1,
2824 "ip && (!ct.est || (ct.est && ct_label.blocked == 1))",
2825 REGBIT_CONNTRACK_COMMIT" = 1; next;");
2826
2827 /* Ingress and Egress ACL Table (Priority 65535).
2828 *
2829 * Always drop traffic that's in an invalid state. Also drop
2830 * reply direction packets for connections that have been marked
2831 * for deletion (bit 0 of ct_label is set).
2832 *
2833 * This is enforced at a higher priority than ACLs can be defined. */
2834 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
2835 "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
2836 "drop;");
2837 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
2838 "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
2839 "drop;");
2840
2841 /* Ingress and Egress ACL Table (Priority 65535).
2842 *
2843 * Allow reply traffic that is part of an established
2844 * conntrack entry that has not been marked for deletion
2845 * (bit 0 of ct_label). We only match traffic in the
2846 * reply direction because we want traffic in the request
2847 * direction to hit the currently defined policy from ACLs.
2848 *
2849 * This is enforced at a higher priority than ACLs can be defined. */
2850 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
2851 "ct.est && !ct.rel && !ct.new && !ct.inv "
2852 "&& ct.rpl && ct_label.blocked == 0",
2853 "next;");
2854 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
2855 "ct.est && !ct.rel && !ct.new && !ct.inv "
2856 "&& ct.rpl && ct_label.blocked == 0",
2857 "next;");
2858
2859 /* Ingress and Egress ACL Table (Priority 65535).
2860 *
2861 * Allow traffic that is related to an existing conntrack entry that
2862 * has not been marked for deletion (bit 0 of ct_label).
2863 *
2864 * This is enforced at a higher priority than ACLs can be defined.
2865 *
2866 * NOTE: This does not support related data sessions (eg,
2867 * a dynamically negotiated FTP data channel), but will allow
2868 * related traffic such as an ICMP Port Unreachable through
2869 * that's generated from a non-listening UDP port. */
2870 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
2871 "!ct.est && ct.rel && !ct.new && !ct.inv "
2872 "&& ct_label.blocked == 0",
2873 "next;");
2874 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
2875 "!ct.est && ct.rel && !ct.new && !ct.inv "
2876 "&& ct_label.blocked == 0",
2877 "next;");
2878
2879 /* Ingress and Egress ACL Table (Priority 65535).
2880 *
2881 * Not to do conntrack on ND packets. */
2882 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, "nd", "next;");
2883 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, "nd", "next;");
2884 }
2885
2886 /* Ingress or Egress ACL Table (Various priorities). */
2887 for (size_t i = 0; i < od->nbs->n_acls; i++) {
2888 struct nbrec_acl *acl = od->nbs->acls[i];
2889 bool ingress = !strcmp(acl->direction, "from-lport") ? true :false;
2890 enum ovn_stage stage = ingress ? S_SWITCH_IN_ACL : S_SWITCH_OUT_ACL;
2891
2892 char *stage_hint = xasprintf("%08x", acl->header_.uuid.parts[0]);
2893 if (!strcmp(acl->action, "allow")
2894 || !strcmp(acl->action, "allow-related")) {
2895 /* If there are any stateful flows, we must even commit "allow"
2896 * actions. This is because, while the initiater's
2897 * direction may not have any stateful rules, the server's
2898 * may and then its return traffic would not have an
2899 * associated conntrack entry and would return "+invalid". */
2900 if (!has_stateful) {
2901 ovn_lflow_add_with_hint(lflows, od, stage,
2902 acl->priority + OVN_ACL_PRI_OFFSET,
2903 acl->match, "next;", stage_hint);
2904 } else {
2905 struct ds match = DS_EMPTY_INITIALIZER;
2906
2907 /* Commit the connection tracking entry if it's a new
2908 * connection that matches this ACL. After this commit,
2909 * the reply traffic is allowed by a flow we create at
2910 * priority 65535, defined earlier.
2911 *
2912 * It's also possible that a known connection was marked for
2913 * deletion after a policy was deleted, but the policy was
2914 * re-added while that connection is still known. We catch
2915 * that case here and un-set ct_label.blocked (which will be done
2916 * by ct_commit in the "stateful" stage) to indicate that the
2917 * connection should be allowed to resume.
2918 */
2919 ds_put_format(&match, "((ct.new && !ct.est)"
2920 " || (!ct.new && ct.est && !ct.rpl "
2921 "&& ct_label.blocked == 1)) "
2922 "&& (%s)", acl->match);
2923 ovn_lflow_add_with_hint(lflows, od, stage,
2924 acl->priority + OVN_ACL_PRI_OFFSET,
2925 ds_cstr(&match),
2926 REGBIT_CONNTRACK_COMMIT" = 1; next;",
2927 stage_hint);
2928
2929 /* Match on traffic in the request direction for an established
2930 * connection tracking entry that has not been marked for
2931 * deletion. There is no need to commit here, so we can just
2932 * proceed to the next table. We use this to ensure that this
2933 * connection is still allowed by the currently defined
2934 * policy. */
2935 ds_clear(&match);
2936 ds_put_format(&match,
2937 "!ct.new && ct.est && !ct.rpl"
2938 " && ct_label.blocked == 0 && (%s)",
2939 acl->match);
2940 ovn_lflow_add_with_hint(lflows, od, stage,
2941 acl->priority + OVN_ACL_PRI_OFFSET,
2942 ds_cstr(&match), "next;",
2943 stage_hint);
2944
2945 ds_destroy(&match);
2946 }
2947 } else if (!strcmp(acl->action, "drop")
2948 || !strcmp(acl->action, "reject")) {
2949 struct ds match = DS_EMPTY_INITIALIZER;
2950
2951 /* XXX Need to support "reject", treat it as "drop;" for now. */
2952 if (!strcmp(acl->action, "reject")) {
2953 VLOG_INFO("reject is not a supported action");
2954 }
2955
2956 /* The implementation of "drop" differs if stateful ACLs are in
2957 * use for this datapath. In that case, the actions differ
2958 * depending on whether the connection was previously committed
2959 * to the connection tracker with ct_commit. */
2960 if (has_stateful) {
2961 /* If the packet is not part of an established connection, then
2962 * we can simply drop it. */
2963 ds_put_format(&match,
2964 "(!ct.est || (ct.est && ct_label.blocked == 1)) "
2965 "&& (%s)",
2966 acl->match);
2967 ovn_lflow_add_with_hint(lflows, od, stage,
2968 acl->priority + OVN_ACL_PRI_OFFSET,
2969 ds_cstr(&match), "drop;",
2970 stage_hint);
2971
2972 /* For an existing connection without ct_label set, we've
2973 * encountered a policy change. ACLs previously allowed
2974 * this connection and we committed the connection tracking
2975 * entry. Current policy says that we should drop this
2976 * connection. First, we set bit 0 of ct_label to indicate
2977 * that this connection is set for deletion. By not
2978 * specifying "next;", we implicitly drop the packet after
2979 * updating conntrack state. We would normally defer
2980 * ct_commit() to the "stateful" stage, but since we're
2981 * dropping the packet, we go ahead and do it here. */
2982 ds_clear(&match);
2983 ds_put_format(&match,
2984 "ct.est && ct_label.blocked == 0 && (%s)",
2985 acl->match);
2986 ovn_lflow_add_with_hint(lflows, od, stage,
2987 acl->priority + OVN_ACL_PRI_OFFSET,
2988 ds_cstr(&match),
2989 "ct_commit(ct_label=1/1);",
2990 stage_hint);
2991
2992 ds_destroy(&match);
2993 } else {
2994 /* There are no stateful ACLs in use on this datapath,
2995 * so a "drop" ACL is simply the "drop" logical flow action
2996 * in all cases. */
2997 ovn_lflow_add_with_hint(lflows, od, stage,
2998 acl->priority + OVN_ACL_PRI_OFFSET,
2999 acl->match, "drop;", stage_hint);
3000 ds_destroy(&match);
3001 }
3002 }
3003 free(stage_hint);
3004 }
3005
3006 /* Add 34000 priority flow to allow DHCP reply from ovn-controller to all
3007 * logical ports of the datapath if the CMS has configured DHCPv4 options.
3008 * */
3009 for (size_t i = 0; i < od->nbs->n_ports; i++) {
3010 if (od->nbs->ports[i]->dhcpv4_options) {
3011 const char *server_id = smap_get(
3012 &od->nbs->ports[i]->dhcpv4_options->options, "server_id");
3013 const char *server_mac = smap_get(
3014 &od->nbs->ports[i]->dhcpv4_options->options, "server_mac");
3015 const char *lease_time = smap_get(
3016 &od->nbs->ports[i]->dhcpv4_options->options, "lease_time");
3017 if (server_id && server_mac && lease_time) {
3018 struct ds match = DS_EMPTY_INITIALIZER;
3019 const char *actions =
3020 has_stateful ? "ct_commit; next;" : "next;";
3021 ds_put_format(&match, "outport == \"%s\" && eth.src == %s "
3022 "&& ip4.src == %s && udp && udp.src == 67 "
3023 "&& udp.dst == 68", od->nbs->ports[i]->name,
3024 server_mac, server_id);
3025 ovn_lflow_add(
3026 lflows, od, S_SWITCH_OUT_ACL, 34000, ds_cstr(&match),
3027 actions);
3028 ds_destroy(&match);
3029 }
3030 }
3031
3032 if (od->nbs->ports[i]->dhcpv6_options) {
3033 const char *server_mac = smap_get(
3034 &od->nbs->ports[i]->dhcpv6_options->options, "server_id");
3035 struct eth_addr ea;
3036 if (server_mac && eth_addr_from_string(server_mac, &ea)) {
3037 /* Get the link local IP of the DHCPv6 server from the
3038 * server MAC. */
3039 struct in6_addr lla;
3040 in6_generate_lla(ea, &lla);
3041
3042 char server_ip[INET6_ADDRSTRLEN + 1];
3043 ipv6_string_mapped(server_ip, &lla);
3044
3045 struct ds match = DS_EMPTY_INITIALIZER;
3046 const char *actions = has_stateful ? "ct_commit; next;" :
3047 "next;";
3048 ds_put_format(&match, "outport == \"%s\" && eth.src == %s "
3049 "&& ip6.src == %s && udp && udp.src == 547 "
3050 "&& udp.dst == 546", od->nbs->ports[i]->name,
3051 server_mac, server_ip);
3052 ovn_lflow_add(
3053 lflows, od, S_SWITCH_OUT_ACL, 34000, ds_cstr(&match),
3054 actions);
3055 ds_destroy(&match);
3056 }
3057 }
3058 }
3059
3060 /* Add a 34000 priority flow to advance the DNS reply from ovn-controller,
3061 * if the CMS has configured DNS records for the datapath.
3062 */
3063 if (ls_has_dns_records(od->nbs)) {
3064 const char *actions = has_stateful ? "ct_commit; next;" : "next;";
3065 ovn_lflow_add(
3066 lflows, od, S_SWITCH_OUT_ACL, 34000, "udp.src == 53",
3067 actions);
3068 }
3069 }
3070
3071 static void
3072 build_qos(struct ovn_datapath *od, struct hmap *lflows) {
3073 ovn_lflow_add(lflows, od, S_SWITCH_IN_QOS_MARK, 0, "1", "next;");
3074 ovn_lflow_add(lflows, od, S_SWITCH_OUT_QOS_MARK, 0, "1", "next;");
3075
3076 for (size_t i = 0; i < od->nbs->n_qos_rules; i++) {
3077 struct nbrec_qos *qos = od->nbs->qos_rules[i];
3078 bool ingress = !strcmp(qos->direction, "from-lport") ? true :false;
3079 enum ovn_stage stage = ingress ? S_SWITCH_IN_QOS_MARK : S_SWITCH_OUT_QOS_MARK;
3080
3081 if (!strcmp(qos->key_action, "dscp")) {
3082 struct ds dscp_action = DS_EMPTY_INITIALIZER;
3083
3084 ds_put_format(&dscp_action, "ip.dscp = %d; next;",
3085 (uint8_t)qos->value_action);
3086 ovn_lflow_add(lflows, od, stage,
3087 qos->priority,
3088 qos->match, ds_cstr(&dscp_action));
3089 ds_destroy(&dscp_action);
3090 }
3091 }
3092 }
3093
3094 static void
3095 build_lb(struct ovn_datapath *od, struct hmap *lflows)
3096 {
3097 /* Ingress and Egress LB Table (Priority 0): Packets are allowed by
3098 * default. */
3099 ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, 0, "1", "next;");
3100 ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, 0, "1", "next;");
3101
3102 if (od->nbs->load_balancer) {
3103 /* Ingress and Egress LB Table (Priority 65535).
3104 *
3105 * Send established traffic through conntrack for just NAT. */
3106 ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, UINT16_MAX,
3107 "ct.est && !ct.rel && !ct.new && !ct.inv",
3108 REGBIT_CONNTRACK_NAT" = 1; next;");
3109 ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, UINT16_MAX,
3110 "ct.est && !ct.rel && !ct.new && !ct.inv",
3111 REGBIT_CONNTRACK_NAT" = 1; next;");
3112 }
3113 }
3114
3115 static void
3116 build_stateful(struct ovn_datapath *od, struct hmap *lflows)
3117 {
3118 /* Ingress and Egress stateful Table (Priority 0): Packets are
3119 * allowed by default. */
3120 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 0, "1", "next;");
3121 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 0, "1", "next;");
3122
3123 /* If REGBIT_CONNTRACK_COMMIT is set as 1, then the packets should be
3124 * committed to conntrack. We always set ct_label.blocked to 0 here as
3125 * any packet that makes it this far is part of a connection we
3126 * want to allow to continue. */
3127 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100,
3128 REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit(ct_label=0/1); next;");
3129 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100,
3130 REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit(ct_label=0/1); next;");
3131
3132 /* If REGBIT_CONNTRACK_NAT is set as 1, then packets should just be sent
3133 * through nat (without committing).
3134 *
3135 * REGBIT_CONNTRACK_COMMIT is set for new connections and
3136 * REGBIT_CONNTRACK_NAT is set for established connections. So they
3137 * don't overlap.
3138 */
3139 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100,
3140 REGBIT_CONNTRACK_NAT" == 1", "ct_lb;");
3141 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100,
3142 REGBIT_CONNTRACK_NAT" == 1", "ct_lb;");
3143
3144 /* Load balancing rules for new connections get committed to conntrack
3145 * table. So even if REGBIT_CONNTRACK_COMMIT is set in a previous table
3146 * a higher priority rule for load balancing below also commits the
3147 * connection, so it is okay if we do not hit the above match on
3148 * REGBIT_CONNTRACK_COMMIT. */
3149 for (int i = 0; i < od->nbs->n_load_balancer; i++) {
3150 struct nbrec_load_balancer *lb = od->nbs->load_balancer[i];
3151 struct smap *vips = &lb->vips;
3152 struct smap_node *node;
3153
3154 SMAP_FOR_EACH (node, vips) {
3155 uint16_t port = 0;
3156
3157 /* node->key contains IP:port or just IP. */
3158 char *ip_address = NULL;
3159 ip_address_and_port_from_lb_key(node->key, &ip_address, &port);
3160 if (!ip_address) {
3161 continue;
3162 }
3163
3164 /* New connections in Ingress table. */
3165 char *action = xasprintf("ct_lb(%s);", node->value);
3166 struct ds match = DS_EMPTY_INITIALIZER;
3167 ds_put_format(&match, "ct.new && ip4.dst == %s", ip_address);
3168 if (port) {
3169 if (lb->protocol && !strcmp(lb->protocol, "udp")) {
3170 ds_put_format(&match, " && udp.dst == %d", port);
3171 } else {
3172 ds_put_format(&match, " && tcp.dst == %d", port);
3173 }
3174 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL,
3175 120, ds_cstr(&match), action);
3176 } else {
3177 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL,
3178 110, ds_cstr(&match), action);
3179 }
3180
3181 free(ip_address);
3182 ds_destroy(&match);
3183 free(action);
3184 }
3185 }
3186 }
3187
3188 static void
3189 build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
3190 struct hmap *lflows, struct hmap *mcgroups)
3191 {
3192 /* This flow table structure is documented in ovn-northd(8), so please
3193 * update ovn-northd.8.xml if you change anything. */
3194
3195 struct ds match = DS_EMPTY_INITIALIZER;
3196 struct ds actions = DS_EMPTY_INITIALIZER;
3197
3198 /* Build pre-ACL and ACL tables for both ingress and egress.
3199 * Ingress tables 3 through 9. Egress tables 0 through 6. */
3200 struct ovn_datapath *od;
3201 HMAP_FOR_EACH (od, key_node, datapaths) {
3202 if (!od->nbs) {
3203 continue;
3204 }
3205
3206 build_pre_acls(od, lflows);
3207 build_pre_lb(od, lflows);
3208 build_pre_stateful(od, lflows);
3209 build_acls(od, lflows);
3210 build_qos(od, lflows);
3211 build_lb(od, lflows);
3212 build_stateful(od, lflows);
3213 }
3214
3215 /* Logical switch ingress table 0: Admission control framework (priority
3216 * 100). */
3217 HMAP_FOR_EACH (od, key_node, datapaths) {
3218 if (!od->nbs) {
3219 continue;
3220 }
3221
3222 /* Logical VLANs not supported. */
3223 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "vlan.present",
3224 "drop;");
3225
3226 /* Broadcast/multicast source address is invalid. */
3227 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "eth.src[40]",
3228 "drop;");
3229
3230 /* Port security flows have priority 50 (see below) and will continue
3231 * to the next table if packet source is acceptable. */
3232 }
3233
3234 /* Logical switch ingress table 0: Ingress port security - L2
3235 * (priority 50).
3236 * Ingress table 1: Ingress port security - IP (priority 90 and 80)
3237 * Ingress table 2: Ingress port security - ND (priority 90 and 80)
3238 */
3239 struct ovn_port *op;
3240 HMAP_FOR_EACH (op, key_node, ports) {
3241 if (!op->nbsp) {
3242 continue;
3243 }
3244
3245 if (!lsp_is_enabled(op->nbsp)) {
3246 /* Drop packets from disabled logical ports (since logical flow
3247 * tables are default-drop). */
3248 continue;
3249 }
3250
3251 ds_clear(&match);
3252 ds_clear(&actions);
3253 ds_put_format(&match, "inport == %s", op->json_key);
3254 build_port_security_l2("eth.src", op->ps_addrs, op->n_ps_addrs,
3255 &match);
3256
3257 const char *queue_id = smap_get(&op->sb->options, "qdisc_queue_id");
3258 if (queue_id) {
3259 ds_put_format(&actions, "set_queue(%s); ", queue_id);
3260 }
3261 ds_put_cstr(&actions, "next;");
3262 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_L2, 50,
3263 ds_cstr(&match), ds_cstr(&actions));
3264
3265 if (op->nbsp->n_port_security) {
3266 build_port_security_ip(P_IN, op, lflows);
3267 build_port_security_nd(op, lflows);
3268 }
3269 }
3270
3271 /* Ingress table 1 and 2: Port security - IP and ND, by default goto next.
3272 * (priority 0)*/
3273 HMAP_FOR_EACH (od, key_node, datapaths) {
3274 if (!od->nbs) {
3275 continue;
3276 }
3277
3278 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_ND, 0, "1", "next;");
3279 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_IP, 0, "1", "next;");
3280 }
3281
3282 /* Ingress table 10: ARP/ND responder, skip requests coming from localnet
3283 * and vtep ports. (priority 100); see ovn-northd.8.xml for the
3284 * rationale. */
3285 HMAP_FOR_EACH (op, key_node, ports) {
3286 if (!op->nbsp) {
3287 continue;
3288 }
3289
3290 if ((!strcmp(op->nbsp->type, "localnet")) ||
3291 (!strcmp(op->nbsp->type, "vtep"))) {
3292 ds_clear(&match);
3293 ds_put_format(&match, "inport == %s", op->json_key);
3294 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100,
3295 ds_cstr(&match), "next;");
3296 }
3297 }
3298
3299 /* Ingress table 10: ARP/ND responder, reply for known IPs.
3300 * (priority 50). */
3301 HMAP_FOR_EACH (op, key_node, ports) {
3302 if (!op->nbsp) {
3303 continue;
3304 }
3305
3306 /*
3307 * Add ARP/ND reply flows if either the
3308 * - port is up or
3309 * - port type is router or
3310 * - port type is localport
3311 */
3312 if (!lsp_is_up(op->nbsp) && strcmp(op->nbsp->type, "router") &&
3313 strcmp(op->nbsp->type, "localport")) {
3314 continue;
3315 }
3316
3317 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
3318 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
3319 ds_clear(&match);
3320 ds_put_format(&match, "arp.tpa == %s && arp.op == 1",
3321 op->lsp_addrs[i].ipv4_addrs[j].addr_s);
3322 ds_clear(&actions);
3323 ds_put_format(&actions,
3324 "eth.dst = eth.src; "
3325 "eth.src = %s; "
3326 "arp.op = 2; /* ARP reply */ "
3327 "arp.tha = arp.sha; "
3328 "arp.sha = %s; "
3329 "arp.tpa = arp.spa; "
3330 "arp.spa = %s; "
3331 "outport = inport; "
3332 "flags.loopback = 1; "
3333 "output;",
3334 op->lsp_addrs[i].ea_s, op->lsp_addrs[i].ea_s,
3335 op->lsp_addrs[i].ipv4_addrs[j].addr_s);
3336 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50,
3337 ds_cstr(&match), ds_cstr(&actions));
3338
3339 /* Do not reply to an ARP request from the port that owns the
3340 * address (otherwise a DHCP client that ARPs to check for a
3341 * duplicate address will fail). Instead, forward it the usual
3342 * way.
3343 *
3344 * (Another alternative would be to simply drop the packet. If
3345 * everything is working as it is configured, then this would
3346 * produce equivalent results, since no one should reply to the
3347 * request. But ARPing for one's own IP address is intended to
3348 * detect situations where the network is not working as
3349 * configured, so dropping the request would frustrate that
3350 * intent.) */
3351 ds_put_format(&match, " && inport == %s", op->json_key);
3352 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100,
3353 ds_cstr(&match), "next;");
3354 }
3355
3356 /* For ND solicitations, we need to listen for both the
3357 * unicast IPv6 address and its all-nodes multicast address,
3358 * but always respond with the unicast IPv6 address. */
3359 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
3360 ds_clear(&match);
3361 ds_put_format(&match,
3362 "nd_ns && ip6.dst == {%s, %s} && nd.target == %s",
3363 op->lsp_addrs[i].ipv6_addrs[j].addr_s,
3364 op->lsp_addrs[i].ipv6_addrs[j].sn_addr_s,
3365 op->lsp_addrs[i].ipv6_addrs[j].addr_s);
3366
3367 ds_clear(&actions);
3368 ds_put_format(&actions,
3369 "nd_na { "
3370 "eth.src = %s; "
3371 "ip6.src = %s; "
3372 "nd.target = %s; "
3373 "nd.tll = %s; "
3374 "outport = inport; "
3375 "flags.loopback = 1; "
3376 "output; "
3377 "};",
3378 op->lsp_addrs[i].ea_s,
3379 op->lsp_addrs[i].ipv6_addrs[j].addr_s,
3380 op->lsp_addrs[i].ipv6_addrs[j].addr_s,
3381 op->lsp_addrs[i].ea_s);
3382 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50,
3383 ds_cstr(&match), ds_cstr(&actions));
3384
3385 /* Do not reply to a solicitation from the port that owns the
3386 * address (otherwise DAD detection will fail). */
3387 ds_put_format(&match, " && inport == %s", op->json_key);
3388 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100,
3389 ds_cstr(&match), "next;");
3390 }
3391 }
3392 }
3393
3394 /* Ingress table 10: ARP/ND responder, by default goto next.
3395 * (priority 0)*/
3396 HMAP_FOR_EACH (od, key_node, datapaths) {
3397 if (!od->nbs) {
3398 continue;
3399 }
3400
3401 ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_ND_RSP, 0, "1", "next;");
3402 }
3403
3404 /* Logical switch ingress table 11 and 12: DHCP options and response
3405 * priority 100 flows. */
3406 HMAP_FOR_EACH (op, key_node, ports) {
3407 if (!op->nbsp) {
3408 continue;
3409 }
3410
3411 if (!lsp_is_enabled(op->nbsp) || !strcmp(op->nbsp->type, "router")) {
3412 /* Don't add the DHCP flows if the port is not enabled or if the
3413 * port is a router port. */
3414 continue;
3415 }
3416
3417 if (!op->nbsp->dhcpv4_options && !op->nbsp->dhcpv6_options) {
3418 /* CMS has disabled both native DHCPv4 and DHCPv6 for this lport.
3419 */
3420 continue;
3421 }
3422
3423 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
3424 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
3425 struct ds options_action = DS_EMPTY_INITIALIZER;
3426 struct ds response_action = DS_EMPTY_INITIALIZER;
3427 struct ds ipv4_addr_match = DS_EMPTY_INITIALIZER;
3428 if (build_dhcpv4_action(
3429 op, op->lsp_addrs[i].ipv4_addrs[j].addr,
3430 &options_action, &response_action, &ipv4_addr_match)) {
3431 struct ds match = DS_EMPTY_INITIALIZER;
3432 ds_put_format(
3433 &match, "inport == %s && eth.src == %s && "
3434 "ip4.src == 0.0.0.0 && ip4.dst == 255.255.255.255 && "
3435 "udp.src == 68 && udp.dst == 67", op->json_key,
3436 op->lsp_addrs[i].ea_s);
3437
3438 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS,
3439 100, ds_cstr(&match),
3440 ds_cstr(&options_action));
3441 ds_clear(&match);
3442 /* Allow ip4.src = OFFER_IP and
3443 * ip4.dst = {SERVER_IP, 255.255.255.255} for the below
3444 * cases
3445 * - When the client wants to renew the IP by sending
3446 * the DHCPREQUEST to the server ip.
3447 * - When the client wants to renew the IP by
3448 * broadcasting the DHCPREQUEST.
3449 */
3450 ds_put_format(
3451 &match, "inport == %s && eth.src == %s && "
3452 "%s && udp.src == 68 && udp.dst == 67", op->json_key,
3453 op->lsp_addrs[i].ea_s, ds_cstr(&ipv4_addr_match));
3454
3455 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS,
3456 100, ds_cstr(&match),
3457 ds_cstr(&options_action));
3458 ds_clear(&match);
3459
3460 /* If REGBIT_DHCP_OPTS_RESULT is set, it means the
3461 * put_dhcp_opts action is successful. */
3462 ds_put_format(
3463 &match, "inport == %s && eth.src == %s && "
3464 "ip4 && udp.src == 68 && udp.dst == 67"
3465 " && "REGBIT_DHCP_OPTS_RESULT, op->json_key,
3466 op->lsp_addrs[i].ea_s);
3467 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_RESPONSE,
3468 100, ds_cstr(&match),
3469 ds_cstr(&response_action));
3470 ds_destroy(&match);
3471 ds_destroy(&options_action);
3472 ds_destroy(&response_action);
3473 ds_destroy(&ipv4_addr_match);
3474 break;
3475 }
3476 }
3477
3478 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
3479 struct ds options_action = DS_EMPTY_INITIALIZER;
3480 struct ds response_action = DS_EMPTY_INITIALIZER;
3481 if (build_dhcpv6_action(
3482 op, &op->lsp_addrs[i].ipv6_addrs[j].addr,
3483 &options_action, &response_action)) {
3484 struct ds match = DS_EMPTY_INITIALIZER;
3485 ds_put_format(
3486 &match, "inport == %s && eth.src == %s"
3487 " && ip6.dst == ff02::1:2 && udp.src == 546 &&"
3488 " udp.dst == 547", op->json_key,
3489 op->lsp_addrs[i].ea_s);
3490
3491 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS, 100,
3492 ds_cstr(&match), ds_cstr(&options_action));
3493
3494 /* If REGBIT_DHCP_OPTS_RESULT is set to 1, it means the
3495 * put_dhcpv6_opts action is successful */
3496 ds_put_cstr(&match, " && "REGBIT_DHCP_OPTS_RESULT);
3497 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_RESPONSE, 100,
3498 ds_cstr(&match), ds_cstr(&response_action));
3499 ds_destroy(&match);
3500 ds_destroy(&options_action);
3501 ds_destroy(&response_action);
3502 break;
3503 }
3504 }
3505 }
3506 }
3507
3508 /* Logical switch ingress table 13 and 14: DNS lookup and response
3509 * priority 100 flows.
3510 */
3511 HMAP_FOR_EACH (od, key_node, datapaths) {
3512 if (!od->nbs || !ls_has_dns_records(od->nbs)) {
3513 continue;
3514 }
3515
3516 struct ds match;
3517 struct ds action;
3518 ds_init(&match);
3519 ds_init(&action);
3520 ds_put_cstr(&match, "udp.dst == 53");
3521 ds_put_format(&action,
3522 REGBIT_DNS_LOOKUP_RESULT" = dns_lookup(); next;");
3523 ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_LOOKUP, 100,
3524 ds_cstr(&match), ds_cstr(&action));
3525 ds_clear(&action);
3526 ds_put_cstr(&match, " && "REGBIT_DNS_LOOKUP_RESULT);
3527 ds_put_format(&action, "eth.dst <-> eth.src; ip4.src <-> ip4.dst; "
3528 "udp.dst = udp.src; udp.src = 53; outport = inport; "
3529 "flags.loopback = 1; output;");
3530 ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_RESPONSE, 100,
3531 ds_cstr(&match), ds_cstr(&action));
3532 ds_clear(&action);
3533 ds_put_format(&action, "eth.dst <-> eth.src; ip6.src <-> ip6.dst; "
3534 "udp.dst = udp.src; udp.src = 53; outport = inport; "
3535 "flags.loopback = 1; output;");
3536 ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_RESPONSE, 100,
3537 ds_cstr(&match), ds_cstr(&action));
3538 ds_destroy(&match);
3539 ds_destroy(&action);
3540 }
3541
3542 /* Ingress table 11 and 12: DHCP options and response, by default goto next.
3543 * (priority 0).
3544 * Ingress table 13 and 14: DNS lookup and response, by default goto next.
3545 * (priority 0).*/
3546
3547 HMAP_FOR_EACH (od, key_node, datapaths) {
3548 if (!od->nbs) {
3549 continue;
3550 }
3551
3552 ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_OPTIONS, 0, "1", "next;");
3553 ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_RESPONSE, 0, "1", "next;");
3554 ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_LOOKUP, 0, "1", "next;");
3555 ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_RESPONSE, 0, "1", "next;");
3556 }
3557
3558 /* Ingress table 15: Destination lookup, broadcast and multicast handling
3559 * (priority 100). */
3560 HMAP_FOR_EACH (op, key_node, ports) {
3561 if (!op->nbsp) {
3562 continue;
3563 }
3564
3565 if (lsp_is_enabled(op->nbsp)) {
3566 ovn_multicast_add(mcgroups, &mc_flood, op);
3567 }
3568 }
3569 HMAP_FOR_EACH (od, key_node, datapaths) {
3570 if (!od->nbs) {
3571 continue;
3572 }
3573
3574 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 100, "eth.mcast",
3575 "outport = \""MC_FLOOD"\"; output;");
3576 }
3577
3578 /* Ingress table 13: Destination lookup, unicast handling (priority 50), */
3579 HMAP_FOR_EACH (op, key_node, ports) {
3580 if (!op->nbsp) {
3581 continue;
3582 }
3583
3584 for (size_t i = 0; i < op->nbsp->n_addresses; i++) {
3585 /* Addresses are owned by the logical port.
3586 * Ethernet address followed by zero or more IPv4
3587 * or IPv6 addresses (or both). */
3588 struct eth_addr mac;
3589 if (ovs_scan(op->nbsp->addresses[i],
3590 ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) {
3591 ds_clear(&match);
3592 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
3593 ETH_ADDR_ARGS(mac));
3594
3595 ds_clear(&actions);
3596 ds_put_format(&actions, "outport = %s; output;", op->json_key);
3597 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
3598 ds_cstr(&match), ds_cstr(&actions));
3599 } else if (!strcmp(op->nbsp->addresses[i], "unknown")) {
3600 if (lsp_is_enabled(op->nbsp)) {
3601 ovn_multicast_add(mcgroups, &mc_unknown, op);
3602 op->od->has_unknown = true;
3603 }
3604 } else if (is_dynamic_lsp_address(op->nbsp->addresses[i])) {
3605 if (!op->nbsp->dynamic_addresses
3606 || !ovs_scan(op->nbsp->dynamic_addresses,
3607 ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) {
3608 continue;
3609 }
3610 ds_clear(&match);
3611 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
3612 ETH_ADDR_ARGS(mac));
3613
3614 ds_clear(&actions);
3615 ds_put_format(&actions, "outport = %s; output;", op->json_key);
3616 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
3617 ds_cstr(&match), ds_cstr(&actions));
3618 } else if (!strcmp(op->nbsp->addresses[i], "router")) {
3619 if (!op->peer || !op->peer->nbrp
3620 || !ovs_scan(op->peer->nbrp->mac,
3621 ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) {
3622 continue;
3623 }
3624 ds_clear(&match);
3625 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
3626 ETH_ADDR_ARGS(mac));
3627 if (op->peer->od->l3dgw_port
3628 && op->peer == op->peer->od->l3dgw_port
3629 && op->peer->od->l3redirect_port) {
3630 /* The destination lookup flow for the router's
3631 * distributed gateway port MAC address should only be
3632 * programmed on the "redirect-chassis". */
3633 ds_put_format(&match, " && is_chassis_resident(%s)",
3634 op->peer->od->l3redirect_port->json_key);
3635 }
3636
3637 ds_clear(&actions);
3638 ds_put_format(&actions, "outport = %s; output;", op->json_key);
3639 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
3640 ds_cstr(&match), ds_cstr(&actions));
3641
3642 /* Add ethernet addresses specified in NAT rules on
3643 * distributed logical routers. */
3644 if (op->peer->od->l3dgw_port
3645 && op->peer == op->peer->od->l3dgw_port) {
3646 for (int i = 0; i < op->peer->od->nbr->n_nat; i++) {
3647 const struct nbrec_nat *nat
3648 = op->peer->od->nbr->nat[i];
3649 if (!strcmp(nat->type, "dnat_and_snat")
3650 && nat->logical_port && nat->external_mac
3651 && eth_addr_from_string(nat->external_mac, &mac)) {
3652
3653 ds_clear(&match);
3654 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT
3655 " && is_chassis_resident(\"%s\")",
3656 ETH_ADDR_ARGS(mac),
3657 nat->logical_port);
3658
3659 ds_clear(&actions);
3660 ds_put_format(&actions, "outport = %s; output;",
3661 op->json_key);
3662 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP,
3663 50, ds_cstr(&match),
3664 ds_cstr(&actions));
3665 }
3666 }
3667 }
3668 } else {
3669 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
3670
3671 VLOG_INFO_RL(&rl,
3672 "%s: invalid syntax '%s' in addresses column",
3673 op->nbsp->name, op->nbsp->addresses[i]);
3674 }
3675 }
3676 }
3677
3678 /* Ingress table 13: Destination lookup for unknown MACs (priority 0). */
3679 HMAP_FOR_EACH (od, key_node, datapaths) {
3680 if (!od->nbs) {
3681 continue;
3682 }
3683
3684 if (od->has_unknown) {
3685 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1",
3686 "outport = \""MC_UNKNOWN"\"; output;");
3687 }
3688 }
3689
3690 /* Egress tables 6: Egress port security - IP (priority 0)
3691 * Egress table 7: Egress port security L2 - multicast/broadcast
3692 * (priority 100). */
3693 HMAP_FOR_EACH (od, key_node, datapaths) {
3694 if (!od->nbs) {
3695 continue;
3696 }
3697
3698 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_IP, 0, "1", "next;");
3699 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_L2, 100, "eth.mcast",
3700 "output;");
3701 }
3702
3703 /* Egress table 6: Egress port security - IP (priorities 90 and 80)
3704 * if port security enabled.
3705 *
3706 * Egress table 7: Egress port security - L2 (priorities 50 and 150).
3707 *
3708 * Priority 50 rules implement port security for enabled logical port.
3709 *
3710 * Priority 150 rules drop packets to disabled logical ports, so that they
3711 * don't even receive multicast or broadcast packets. */
3712 HMAP_FOR_EACH (op, key_node, ports) {
3713 if (!op->nbsp) {
3714 continue;
3715 }
3716
3717 ds_clear(&match);
3718 ds_put_format(&match, "outport == %s", op->json_key);
3719 if (lsp_is_enabled(op->nbsp)) {
3720 build_port_security_l2("eth.dst", op->ps_addrs, op->n_ps_addrs,
3721 &match);
3722 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 50,
3723 ds_cstr(&match), "output;");
3724 } else {
3725 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 150,
3726 ds_cstr(&match), "drop;");
3727 }
3728
3729 if (op->nbsp->n_port_security) {
3730 build_port_security_ip(P_OUT, op, lflows);
3731 }
3732 }
3733
3734 ds_destroy(&match);
3735 ds_destroy(&actions);
3736 }
3737
3738 static bool
3739 lrport_is_enabled(const struct nbrec_logical_router_port *lrport)
3740 {
3741 return !lrport->enabled || *lrport->enabled;
3742 }
3743
3744 /* Returns a string of the IP address of the router port 'op' that
3745 * overlaps with 'ip_s". If one is not found, returns NULL.
3746 *
3747 * The caller must not free the returned string. */
3748 static const char *
3749 find_lrp_member_ip(const struct ovn_port *op, const char *ip_s)
3750 {
3751 bool is_ipv4 = strchr(ip_s, '.') ? true : false;
3752
3753 if (is_ipv4) {
3754 ovs_be32 ip;
3755
3756 if (!ip_parse(ip_s, &ip)) {
3757 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3758 VLOG_WARN_RL(&rl, "bad ip address %s", ip_s);
3759 return NULL;
3760 }
3761
3762 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
3763 const struct ipv4_netaddr *na = &op->lrp_networks.ipv4_addrs[i];
3764
3765 if (!((na->network ^ ip) & na->mask)) {
3766 /* There should be only 1 interface that matches the
3767 * supplied IP. Otherwise, it's a configuration error,
3768 * because subnets of a router's interfaces should NOT
3769 * overlap. */
3770 return na->addr_s;
3771 }
3772 }
3773 } else {
3774 struct in6_addr ip6;
3775
3776 if (!ipv6_parse(ip_s, &ip6)) {
3777 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3778 VLOG_WARN_RL(&rl, "bad ipv6 address %s", ip_s);
3779 return NULL;
3780 }
3781
3782 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
3783 const struct ipv6_netaddr *na = &op->lrp_networks.ipv6_addrs[i];
3784 struct in6_addr xor_addr = ipv6_addr_bitxor(&na->network, &ip6);
3785 struct in6_addr and_addr = ipv6_addr_bitand(&xor_addr, &na->mask);
3786
3787 if (ipv6_is_zero(&and_addr)) {
3788 /* There should be only 1 interface that matches the
3789 * supplied IP. Otherwise, it's a configuration error,
3790 * because subnets of a router's interfaces should NOT
3791 * overlap. */
3792 return na->addr_s;
3793 }
3794 }
3795 }
3796
3797 return NULL;
3798 }
3799
3800 static void
3801 add_route(struct hmap *lflows, const struct ovn_port *op,
3802 const char *lrp_addr_s, const char *network_s, int plen,
3803 const char *gateway, const char *policy)
3804 {
3805 bool is_ipv4 = strchr(network_s, '.') ? true : false;
3806 struct ds match = DS_EMPTY_INITIALIZER;
3807 const char *dir;
3808 uint16_t priority;
3809
3810 if (policy && !strcmp(policy, "src-ip")) {
3811 dir = "src";
3812 priority = plen * 2;
3813 } else {
3814 dir = "dst";
3815 priority = (plen * 2) + 1;
3816 }
3817
3818 /* IPv6 link-local addresses must be scoped to the local router port. */
3819 if (!is_ipv4) {
3820 struct in6_addr network;
3821 ovs_assert(ipv6_parse(network_s, &network));
3822 if (in6_is_lla(&network)) {
3823 ds_put_format(&match, "inport == %s && ", op->json_key);
3824 }
3825 }
3826 ds_put_format(&match, "ip%s.%s == %s/%d", is_ipv4 ? "4" : "6", dir,
3827 network_s, plen);
3828
3829 struct ds actions = DS_EMPTY_INITIALIZER;
3830 ds_put_format(&actions, "ip.ttl--; %sreg0 = ", is_ipv4 ? "" : "xx");
3831
3832 if (gateway) {
3833 ds_put_cstr(&actions, gateway);
3834 } else {
3835 ds_put_format(&actions, "ip%s.dst", is_ipv4 ? "4" : "6");
3836 }
3837 ds_put_format(&actions, "; "
3838 "%sreg1 = %s; "
3839 "eth.src = %s; "
3840 "outport = %s; "
3841 "flags.loopback = 1; "
3842 "next;",
3843 is_ipv4 ? "" : "xx",
3844 lrp_addr_s,
3845 op->lrp_networks.ea_s,
3846 op->json_key);
3847
3848 /* The priority here is calculated to implement longest-prefix-match
3849 * routing. */
3850 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_ROUTING, priority,
3851 ds_cstr(&match), ds_cstr(&actions));
3852 ds_destroy(&match);
3853 ds_destroy(&actions);
3854 }
3855
3856 static void
3857 build_static_route_flow(struct hmap *lflows, struct ovn_datapath *od,
3858 struct hmap *ports,
3859 const struct nbrec_logical_router_static_route *route)
3860 {
3861 ovs_be32 nexthop;
3862 const char *lrp_addr_s = NULL;
3863 unsigned int plen;
3864 bool is_ipv4;
3865
3866 /* Verify that the next hop is an IP address with an all-ones mask. */
3867 char *error = ip_parse_cidr(route->nexthop, &nexthop, &plen);
3868 if (!error) {
3869 if (plen != 32) {
3870 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3871 VLOG_WARN_RL(&rl, "bad next hop mask %s", route->nexthop);
3872 return;
3873 }
3874 is_ipv4 = true;
3875 } else {
3876 free(error);
3877
3878 struct in6_addr ip6;
3879 char *error = ipv6_parse_cidr(route->nexthop, &ip6, &plen);
3880 if (!error) {
3881 if (plen != 128) {
3882 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3883 VLOG_WARN_RL(&rl, "bad next hop mask %s", route->nexthop);
3884 return;
3885 }
3886 is_ipv4 = false;
3887 } else {
3888 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3889 VLOG_WARN_RL(&rl, "bad next hop ip address %s", route->nexthop);
3890 free(error);
3891 return;
3892 }
3893 }
3894
3895 char *prefix_s;
3896 if (is_ipv4) {
3897 ovs_be32 prefix;
3898 /* Verify that ip prefix is a valid IPv4 address. */
3899 error = ip_parse_cidr(route->ip_prefix, &prefix, &plen);
3900 if (error) {
3901 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3902 VLOG_WARN_RL(&rl, "bad 'ip_prefix' in static routes %s",
3903 route->ip_prefix);
3904 free(error);
3905 return;
3906 }
3907 prefix_s = xasprintf(IP_FMT, IP_ARGS(prefix & be32_prefix_mask(plen)));
3908 } else {
3909 /* Verify that ip prefix is a valid IPv6 address. */
3910 struct in6_addr prefix;
3911 error = ipv6_parse_cidr(route->ip_prefix, &prefix, &plen);
3912 if (error) {
3913 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3914 VLOG_WARN_RL(&rl, "bad 'ip_prefix' in static routes %s",
3915 route->ip_prefix);
3916 free(error);
3917 return;
3918 }
3919 struct in6_addr mask = ipv6_create_mask(plen);
3920 struct in6_addr network = ipv6_addr_bitand(&prefix, &mask);
3921 prefix_s = xmalloc(INET6_ADDRSTRLEN);
3922 inet_ntop(AF_INET6, &network, prefix_s, INET6_ADDRSTRLEN);
3923 }
3924
3925 /* Find the outgoing port. */
3926 struct ovn_port *out_port = NULL;
3927 if (route->output_port) {
3928 out_port = ovn_port_find(ports, route->output_port);
3929 if (!out_port) {
3930 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3931 VLOG_WARN_RL(&rl, "Bad out port %s for static route %s",
3932 route->output_port, route->ip_prefix);
3933 goto free_prefix_s;
3934 }
3935 lrp_addr_s = find_lrp_member_ip(out_port, route->nexthop);
3936 if (!lrp_addr_s) {
3937 /* There are no IP networks configured on the router's port via
3938 * which 'route->nexthop' is theoretically reachable. But since
3939 * 'out_port' has been specified, we honor it by trying to reach
3940 * 'route->nexthop' via the first IP address of 'out_port'.
3941 * (There are cases, e.g in GCE, where each VM gets a /32 IP
3942 * address and the default gateway is still reachable from it.) */
3943 if (is_ipv4) {
3944 if (out_port->lrp_networks.n_ipv4_addrs) {
3945 lrp_addr_s = out_port->lrp_networks.ipv4_addrs[0].addr_s;
3946 }
3947 } else {
3948 if (out_port->lrp_networks.n_ipv6_addrs) {
3949 lrp_addr_s = out_port->lrp_networks.ipv6_addrs[0].addr_s;
3950 }
3951 }
3952 }
3953 } else {
3954 /* output_port is not specified, find the
3955 * router port matching the next hop. */
3956 int i;
3957 for (i = 0; i < od->nbr->n_ports; i++) {
3958 struct nbrec_logical_router_port *lrp = od->nbr->ports[i];
3959 out_port = ovn_port_find(ports, lrp->name);
3960 if (!out_port) {
3961 /* This should not happen. */
3962 continue;
3963 }
3964
3965 lrp_addr_s = find_lrp_member_ip(out_port, route->nexthop);
3966 if (lrp_addr_s) {
3967 break;
3968 }
3969 }
3970 }
3971
3972 if (!out_port || !lrp_addr_s) {
3973 /* There is no matched out port. */
3974 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3975 VLOG_WARN_RL(&rl, "No path for static route %s; next hop %s",
3976 route->ip_prefix, route->nexthop);
3977 goto free_prefix_s;
3978 }
3979
3980 char *policy = route->policy ? route->policy : "dst-ip";
3981 add_route(lflows, out_port, lrp_addr_s, prefix_s, plen, route->nexthop,
3982 policy);
3983
3984 free_prefix_s:
3985 free(prefix_s);
3986 }
3987
3988 static void
3989 op_put_v4_networks(struct ds *ds, const struct ovn_port *op, bool add_bcast)
3990 {
3991 if (!add_bcast && op->lrp_networks.n_ipv4_addrs == 1) {
3992 ds_put_format(ds, "%s", op->lrp_networks.ipv4_addrs[0].addr_s);
3993 return;
3994 }
3995
3996 ds_put_cstr(ds, "{");
3997 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
3998 ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].addr_s);
3999 if (add_bcast) {
4000 ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].bcast_s);
4001 }
4002 }
4003 ds_chomp(ds, ' ');
4004 ds_chomp(ds, ',');
4005 ds_put_cstr(ds, "}");
4006 }
4007
4008 static void
4009 op_put_v6_networks(struct ds *ds, const struct ovn_port *op)
4010 {
4011 if (op->lrp_networks.n_ipv6_addrs == 1) {
4012 ds_put_format(ds, "%s", op->lrp_networks.ipv6_addrs[0].addr_s);
4013 return;
4014 }
4015
4016 ds_put_cstr(ds, "{");
4017 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
4018 ds_put_format(ds, "%s, ", op->lrp_networks.ipv6_addrs[i].addr_s);
4019 }
4020 ds_chomp(ds, ' ');
4021 ds_chomp(ds, ',');
4022 ds_put_cstr(ds, "}");
4023 }
4024
4025 static const char *
4026 get_force_snat_ip(struct ovn_datapath *od, const char *key_type, ovs_be32 *ip)
4027 {
4028 char *key = xasprintf("%s_force_snat_ip", key_type);
4029 const char *ip_address = smap_get(&od->nbr->options, key);
4030 free(key);
4031
4032 if (ip_address) {
4033 ovs_be32 mask;
4034 char *error = ip_parse_masked(ip_address, ip, &mask);
4035 if (error || mask != OVS_BE32_MAX) {
4036 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4037 VLOG_WARN_RL(&rl, "bad ip %s in options of router "UUID_FMT"",
4038 ip_address, UUID_ARGS(&od->key));
4039 free(error);
4040 *ip = 0;
4041 return NULL;
4042 }
4043 return ip_address;
4044 }
4045
4046 *ip = 0;
4047 return NULL;
4048 }
4049
4050 static void
4051 add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od,
4052 struct ds *match, struct ds *actions, int priority,
4053 const char *lb_force_snat_ip)
4054 {
4055 /* A match and actions for new connections. */
4056 char *new_match = xasprintf("ct.new && %s", ds_cstr(match));
4057 if (lb_force_snat_ip) {
4058 char *new_actions = xasprintf("flags.force_snat_for_lb = 1; %s",
4059 ds_cstr(actions));
4060 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, new_match,
4061 new_actions);
4062 free(new_actions);
4063 } else {
4064 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, new_match,
4065 ds_cstr(actions));
4066 }
4067
4068 /* A match and actions for established connections. */
4069 char *est_match = xasprintf("ct.est && %s", ds_cstr(match));
4070 if (lb_force_snat_ip) {
4071 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, est_match,
4072 "flags.force_snat_for_lb = 1; ct_dnat;");
4073 } else {
4074 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, est_match,
4075 "ct_dnat;");
4076 }
4077
4078 free(new_match);
4079 free(est_match);
4080 }
4081
4082 static void
4083 build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
4084 struct hmap *lflows)
4085 {
4086 /* This flow table structure is documented in ovn-northd(8), so please
4087 * update ovn-northd.8.xml if you change anything. */
4088
4089 struct ds match = DS_EMPTY_INITIALIZER;
4090 struct ds actions = DS_EMPTY_INITIALIZER;
4091
4092 /* Logical router ingress table 0: Admission control framework. */
4093 struct ovn_datapath *od;
4094 HMAP_FOR_EACH (od, key_node, datapaths) {
4095 if (!od->nbr) {
4096 continue;
4097 }
4098
4099 /* Logical VLANs not supported.
4100 * Broadcast/multicast source address is invalid. */
4101 ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100,
4102 "vlan.present || eth.src[40]", "drop;");
4103 }
4104
4105 /* Logical router ingress table 0: match (priority 50). */
4106 struct ovn_port *op;
4107 HMAP_FOR_EACH (op, key_node, ports) {
4108 if (!op->nbrp) {
4109 continue;
4110 }
4111
4112 if (!lrport_is_enabled(op->nbrp)) {
4113 /* Drop packets from disabled logical ports (since logical flow
4114 * tables are default-drop). */
4115 continue;
4116 }
4117
4118 if (op->derived) {
4119 /* No ingress packets should be received on a chassisredirect
4120 * port. */
4121 continue;
4122 }
4123
4124 ds_clear(&match);
4125 ds_put_format(&match, "eth.mcast && inport == %s", op->json_key);
4126 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
4127 ds_cstr(&match), "next;");
4128
4129 ds_clear(&match);
4130 ds_put_format(&match, "eth.dst == %s && inport == %s",
4131 op->lrp_networks.ea_s, op->json_key);
4132 if (op->od->l3dgw_port && op == op->od->l3dgw_port
4133 && op->od->l3redirect_port) {
4134 /* Traffic with eth.dst = l3dgw_port->lrp_networks.ea_s
4135 * should only be received on the "redirect-chassis". */
4136 ds_put_format(&match, " && is_chassis_resident(%s)",
4137 op->od->l3redirect_port->json_key);
4138 }
4139 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
4140 ds_cstr(&match), "next;");
4141 }
4142
4143 /* Logical router ingress table 1: IP Input. */
4144 HMAP_FOR_EACH (od, key_node, datapaths) {
4145 if (!od->nbr) {
4146 continue;
4147 }
4148
4149 /* L3 admission control: drop multicast and broadcast source, localhost
4150 * source or destination, and zero network source or destination
4151 * (priority 100). */
4152 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100,
4153 "ip4.mcast || "
4154 "ip4.src == 255.255.255.255 || "
4155 "ip4.src == 127.0.0.0/8 || "
4156 "ip4.dst == 127.0.0.0/8 || "
4157 "ip4.src == 0.0.0.0/8 || "
4158 "ip4.dst == 0.0.0.0/8",
4159 "drop;");
4160
4161 /* ARP reply handling. Use ARP replies to populate the logical
4162 * router's ARP table. */
4163 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "arp.op == 2",
4164 "put_arp(inport, arp.spa, arp.sha);");
4165
4166 /* Drop Ethernet local broadcast. By definition this traffic should
4167 * not be forwarded.*/
4168 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
4169 "eth.bcast", "drop;");
4170
4171 /* TTL discard.
4172 *
4173 * XXX Need to send ICMP time exceeded if !ip.later_frag. */
4174 ds_clear(&match);
4175 ds_put_cstr(&match, "ip4 && ip.ttl == {0, 1}");
4176 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30,
4177 ds_cstr(&match), "drop;");
4178
4179 /* ND advertisement handling. Use advertisements to populate
4180 * the logical router's ARP/ND table. */
4181 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "nd_na",
4182 "put_nd(inport, nd.target, nd.tll);");
4183
4184 /* Lean from neighbor solicitations that were not directed at
4185 * us. (A priority-90 flow will respond to requests to us and
4186 * learn the sender's mac address. */
4187 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 80, "nd_ns",
4188 "put_nd(inport, ip6.src, nd.sll);");
4189
4190 /* Pass other traffic not already handled to the next table for
4191 * routing. */
4192 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;");
4193 }
4194
4195 /* Logical router ingress table 1: IP Input for IPv4. */
4196 HMAP_FOR_EACH (op, key_node, ports) {
4197 if (!op->nbrp) {
4198 continue;
4199 }
4200
4201 if (op->derived) {
4202 /* No ingress packets are accepted on a chassisredirect
4203 * port, so no need to program flows for that port. */
4204 continue;
4205 }
4206
4207 if (op->lrp_networks.n_ipv4_addrs) {
4208 /* L3 admission control: drop packets that originate from an
4209 * IPv4 address owned by the router or a broadcast address
4210 * known to the router (priority 100). */
4211 ds_clear(&match);
4212 ds_put_cstr(&match, "ip4.src == ");
4213 op_put_v4_networks(&match, op, true);
4214 ds_put_cstr(&match, " && "REGBIT_EGRESS_LOOPBACK" == 0");
4215 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
4216 ds_cstr(&match), "drop;");
4217
4218 /* ICMP echo reply. These flows reply to ICMP echo requests
4219 * received for the router's IP address. Since packets only
4220 * get here as part of the logical router datapath, the inport
4221 * (i.e. the incoming locally attached net) does not matter.
4222 * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */
4223 ds_clear(&match);
4224 ds_put_cstr(&match, "ip4.dst == ");
4225 op_put_v4_networks(&match, op, false);
4226 ds_put_cstr(&match, " && icmp4.type == 8 && icmp4.code == 0");
4227
4228 ds_clear(&actions);
4229 ds_put_format(&actions,
4230 "ip4.dst <-> ip4.src; "
4231 "ip.ttl = 255; "
4232 "icmp4.type = 0; "
4233 "flags.loopback = 1; "
4234 "next; ");
4235 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
4236 ds_cstr(&match), ds_cstr(&actions));
4237 }
4238
4239 /* ARP reply. These flows reply to ARP requests for the router's own
4240 * IP address. */
4241 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
4242 ds_clear(&match);
4243 ds_put_format(&match,
4244 "inport == %s && arp.tpa == %s && arp.op == 1",
4245 op->json_key, op->lrp_networks.ipv4_addrs[i].addr_s);
4246 if (op->od->l3dgw_port && op == op->od->l3dgw_port
4247 && op->od->l3redirect_port) {
4248 /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
4249 * should only be sent from the "redirect-chassis", so that
4250 * upstream MAC learning points to the "redirect-chassis".
4251 * Also need to avoid generation of multiple ARP responses
4252 * from different chassis. */
4253 ds_put_format(&match, " && is_chassis_resident(%s)",
4254 op->od->l3redirect_port->json_key);
4255 }
4256
4257 ds_clear(&actions);
4258 ds_put_format(&actions,
4259 "eth.dst = eth.src; "
4260 "eth.src = %s; "
4261 "arp.op = 2; /* ARP reply */ "
4262 "arp.tha = arp.sha; "
4263 "arp.sha = %s; "
4264 "arp.tpa = arp.spa; "
4265 "arp.spa = %s; "
4266 "outport = %s; "
4267 "flags.loopback = 1; "
4268 "output;",
4269 op->lrp_networks.ea_s,
4270 op->lrp_networks.ea_s,
4271 op->lrp_networks.ipv4_addrs[i].addr_s,
4272 op->json_key);
4273 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
4274 ds_cstr(&match), ds_cstr(&actions));
4275 }
4276
4277 /* A set to hold all load-balancer vips that need ARP responses. */
4278 struct sset all_ips = SSET_INITIALIZER(&all_ips);
4279 get_router_load_balancer_ips(op->od, &all_ips);
4280
4281 const char *ip_address;
4282 SSET_FOR_EACH(ip_address, &all_ips) {
4283 ovs_be32 ip;
4284 if (!ip_parse(ip_address, &ip) || !ip) {
4285 continue;
4286 }
4287
4288 ds_clear(&match);
4289 ds_put_format(&match,
4290 "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1",
4291 op->json_key, IP_ARGS(ip));
4292
4293 ds_clear(&actions);
4294 ds_put_format(&actions,
4295 "eth.dst = eth.src; "
4296 "eth.src = %s; "
4297 "arp.op = 2; /* ARP reply */ "
4298 "arp.tha = arp.sha; "
4299 "arp.sha = %s; "
4300 "arp.tpa = arp.spa; "
4301 "arp.spa = "IP_FMT"; "
4302 "outport = %s; "
4303 "flags.loopback = 1; "
4304 "output;",
4305 op->lrp_networks.ea_s,
4306 op->lrp_networks.ea_s,
4307 IP_ARGS(ip),
4308 op->json_key);
4309 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
4310 ds_cstr(&match), ds_cstr(&actions));
4311 }
4312
4313 sset_destroy(&all_ips);
4314
4315 /* A gateway router can have 2 SNAT IP addresses to force DNATed and
4316 * LBed traffic respectively to be SNATed. In addition, there can be
4317 * a number of SNAT rules in the NAT table. */
4318 ovs_be32 *snat_ips = xmalloc(sizeof *snat_ips *
4319 (op->od->nbr->n_nat + 2));
4320 size_t n_snat_ips = 0;
4321
4322 ovs_be32 snat_ip;
4323 const char *dnat_force_snat_ip = get_force_snat_ip(op->od, "dnat",
4324 &snat_ip);
4325 if (dnat_force_snat_ip) {
4326 snat_ips[n_snat_ips++] = snat_ip;
4327 }
4328
4329 const char *lb_force_snat_ip = get_force_snat_ip(op->od, "lb",
4330 &snat_ip);
4331 if (lb_force_snat_ip) {
4332 snat_ips[n_snat_ips++] = snat_ip;
4333 }
4334
4335 for (int i = 0; i < op->od->nbr->n_nat; i++) {
4336 const struct nbrec_nat *nat;
4337
4338 nat = op->od->nbr->nat[i];
4339
4340 ovs_be32 ip;
4341 if (!ip_parse(nat->external_ip, &ip) || !ip) {
4342 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4343 VLOG_WARN_RL(&rl, "bad ip address %s in nat configuration "
4344 "for router %s", nat->external_ip, op->key);
4345 continue;
4346 }
4347
4348 if (!strcmp(nat->type, "snat")) {
4349 snat_ips[n_snat_ips++] = ip;
4350 continue;
4351 }
4352
4353 /* ARP handling for external IP addresses.
4354 *
4355 * DNAT IP addresses are external IP addresses that need ARP
4356 * handling. */
4357 ds_clear(&match);
4358 ds_put_format(&match,
4359 "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1",
4360 op->json_key, IP_ARGS(ip));
4361
4362 ds_clear(&actions);
4363 ds_put_format(&actions,
4364 "eth.dst = eth.src; "
4365 "arp.op = 2; /* ARP reply */ "
4366 "arp.tha = arp.sha; ");
4367
4368 if (op->od->l3dgw_port && op == op->od->l3dgw_port) {
4369 struct eth_addr mac;
4370 if (nat->external_mac &&
4371 eth_addr_from_string(nat->external_mac, &mac)
4372 && nat->logical_port) {
4373 /* distributed NAT case, use nat->external_mac */
4374 ds_put_format(&actions,
4375 "eth.src = "ETH_ADDR_FMT"; "
4376 "arp.sha = "ETH_ADDR_FMT"; ",
4377 ETH_ADDR_ARGS(mac),
4378 ETH_ADDR_ARGS(mac));
4379 /* Traffic with eth.src = nat->external_mac should only be
4380 * sent from the chassis where nat->logical_port is
4381 * resident, so that upstream MAC learning points to the
4382 * correct chassis. Also need to avoid generation of
4383 * multiple ARP responses from different chassis. */
4384 ds_put_format(&match, " && is_chassis_resident(\"%s\")",
4385 nat->logical_port);
4386 } else {
4387 ds_put_format(&actions,
4388 "eth.src = %s; "
4389 "arp.sha = %s; ",
4390 op->lrp_networks.ea_s,
4391 op->lrp_networks.ea_s);
4392 /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
4393 * should only be sent from the "redirect-chassis", so that
4394 * upstream MAC learning points to the "redirect-chassis".
4395 * Also need to avoid generation of multiple ARP responses
4396 * from different chassis. */
4397 if (op->od->l3redirect_port) {
4398 ds_put_format(&match, " && is_chassis_resident(%s)",
4399 op->od->l3redirect_port->json_key);
4400 }
4401 }
4402 } else {
4403 ds_put_format(&actions,
4404 "eth.src = %s; "
4405 "arp.sha = %s; ",
4406 op->lrp_networks.ea_s,
4407 op->lrp_networks.ea_s);
4408 }
4409 ds_put_format(&actions,
4410 "arp.tpa = arp.spa; "
4411 "arp.spa = "IP_FMT"; "
4412 "outport = %s; "
4413 "flags.loopback = 1; "
4414 "output;",
4415 IP_ARGS(ip),
4416 op->json_key);
4417 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
4418 ds_cstr(&match), ds_cstr(&actions));
4419 }
4420
4421 ds_clear(&match);
4422 ds_put_cstr(&match, "ip4.dst == {");
4423 bool has_drop_ips = false;
4424 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
4425 bool snat_ip_is_router_ip = false;
4426 for (int j = 0; j < n_snat_ips; j++) {
4427 /* Packets to SNAT IPs should not be dropped. */
4428 if (op->lrp_networks.ipv4_addrs[i].addr == snat_ips[j]) {
4429 snat_ip_is_router_ip = true;
4430 break;
4431 }
4432 }
4433 if (snat_ip_is_router_ip) {
4434 continue;
4435 }
4436 ds_put_format(&match, "%s, ",
4437 op->lrp_networks.ipv4_addrs[i].addr_s);
4438 has_drop_ips = true;
4439 }
4440 ds_chomp(&match, ' ');
4441 ds_chomp(&match, ',');
4442 ds_put_cstr(&match, "}");
4443
4444 if (has_drop_ips) {
4445 /* Drop IP traffic to this router. */
4446 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
4447 ds_cstr(&match), "drop;");
4448 }
4449
4450 free(snat_ips);
4451 }
4452
4453 /* Logical router ingress table 1: IP Input for IPv6. */
4454 HMAP_FOR_EACH (op, key_node, ports) {
4455 if (!op->nbrp) {
4456 continue;
4457 }
4458
4459 if (op->derived) {
4460 /* No ingress packets are accepted on a chassisredirect
4461 * port, so no need to program flows for that port. */
4462 continue;
4463 }
4464
4465 if (op->lrp_networks.n_ipv6_addrs) {
4466 /* L3 admission control: drop packets that originate from an
4467 * IPv6 address owned by the router (priority 100). */
4468 ds_clear(&match);
4469 ds_put_cstr(&match, "ip6.src == ");
4470 op_put_v6_networks(&match, op);
4471 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
4472 ds_cstr(&match), "drop;");
4473
4474 /* ICMPv6 echo reply. These flows reply to echo requests
4475 * received for the router's IP address. */
4476 ds_clear(&match);
4477 ds_put_cstr(&match, "ip6.dst == ");
4478 op_put_v6_networks(&match, op);
4479 ds_put_cstr(&match, " && icmp6.type == 128 && icmp6.code == 0");
4480
4481 ds_clear(&actions);
4482 ds_put_cstr(&actions,
4483 "ip6.dst <-> ip6.src; "
4484 "ip.ttl = 255; "
4485 "icmp6.type = 129; "
4486 "flags.loopback = 1; "
4487 "next; ");
4488 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
4489 ds_cstr(&match), ds_cstr(&actions));
4490
4491 /* Drop IPv6 traffic to this router. */
4492 ds_clear(&match);
4493 ds_put_cstr(&match, "ip6.dst == ");
4494 op_put_v6_networks(&match, op);
4495 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
4496 ds_cstr(&match), "drop;");
4497 }
4498
4499 /* ND reply. These flows reply to ND solicitations for the
4500 * router's own IP address. */
4501 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
4502 ds_clear(&match);
4503 ds_put_format(&match,
4504 "inport == %s && nd_ns && ip6.dst == {%s, %s} "
4505 "&& nd.target == %s",
4506 op->json_key,
4507 op->lrp_networks.ipv6_addrs[i].addr_s,
4508 op->lrp_networks.ipv6_addrs[i].sn_addr_s,
4509 op->lrp_networks.ipv6_addrs[i].addr_s);
4510 if (op->od->l3dgw_port && op == op->od->l3dgw_port
4511 && op->od->l3redirect_port) {
4512 /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
4513 * should only be sent from the "redirect-chassis", so that
4514 * upstream MAC learning points to the "redirect-chassis".
4515 * Also need to avoid generation of multiple ND replies
4516 * from different chassis. */
4517 ds_put_format(&match, " && is_chassis_resident(%s)",
4518 op->od->l3redirect_port->json_key);
4519 }
4520
4521 ds_clear(&actions);
4522 ds_put_format(&actions,
4523 "put_nd(inport, ip6.src, nd.sll); "
4524 "nd_na { "
4525 "eth.src = %s; "
4526 "ip6.src = %s; "
4527 "nd.target = %s; "
4528 "nd.tll = %s; "
4529 "outport = inport; "
4530 "flags.loopback = 1; "
4531 "output; "
4532 "};",
4533 op->lrp_networks.ea_s,
4534 op->lrp_networks.ipv6_addrs[i].addr_s,
4535 op->lrp_networks.ipv6_addrs[i].addr_s,
4536 op->lrp_networks.ea_s);
4537 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
4538 ds_cstr(&match), ds_cstr(&actions));
4539 }
4540 }
4541
4542 /* NAT, Defrag and load balancing. */
4543 HMAP_FOR_EACH (od, key_node, datapaths) {
4544 if (!od->nbr) {
4545 continue;
4546 }
4547
4548 /* Packets are allowed by default. */
4549 ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG, 0, "1", "next;");
4550 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;");
4551 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;");
4552 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;");
4553 ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 0, "1", "next;");
4554 ovn_lflow_add(lflows, od, S_ROUTER_OUT_EGR_LOOP, 0, "1", "next;");
4555
4556 /* NAT rules are only valid on Gateway routers and routers with
4557 * l3dgw_port (router has a port with "redirect-chassis"
4558 * specified). */
4559 if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) {
4560 continue;
4561 }
4562
4563 ovs_be32 snat_ip;
4564 const char *dnat_force_snat_ip = get_force_snat_ip(od, "dnat",
4565 &snat_ip);
4566 const char *lb_force_snat_ip = get_force_snat_ip(od, "lb",
4567 &snat_ip);
4568
4569 for (int i = 0; i < od->nbr->n_nat; i++) {
4570 const struct nbrec_nat *nat;
4571
4572 nat = od->nbr->nat[i];
4573
4574 ovs_be32 ip, mask;
4575
4576 char *error = ip_parse_masked(nat->external_ip, &ip, &mask);
4577 if (error || mask != OVS_BE32_MAX) {
4578 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4579 VLOG_WARN_RL(&rl, "bad external ip %s for nat",
4580 nat->external_ip);
4581 free(error);
4582 continue;
4583 }
4584
4585 /* Check the validity of nat->logical_ip. 'logical_ip' can
4586 * be a subnet when the type is "snat". */
4587 error = ip_parse_masked(nat->logical_ip, &ip, &mask);
4588 if (!strcmp(nat->type, "snat")) {
4589 if (error) {
4590 static struct vlog_rate_limit rl =
4591 VLOG_RATE_LIMIT_INIT(5, 1);
4592 VLOG_WARN_RL(&rl, "bad ip network or ip %s for snat "
4593 "in router "UUID_FMT"",
4594 nat->logical_ip, UUID_ARGS(&od->key));
4595 free(error);
4596 continue;
4597 }
4598 } else {
4599 if (error || mask != OVS_BE32_MAX) {
4600 static struct vlog_rate_limit rl =
4601 VLOG_RATE_LIMIT_INIT(5, 1);
4602 VLOG_WARN_RL(&rl, "bad ip %s for dnat in router "
4603 ""UUID_FMT"", nat->logical_ip, UUID_ARGS(&od->key));
4604 free(error);
4605 continue;
4606 }
4607 }
4608
4609 /* For distributed router NAT, determine whether this NAT rule
4610 * satisfies the conditions for distributed NAT processing. */
4611 bool distributed = false;
4612 struct eth_addr mac;
4613 if (od->l3dgw_port && !strcmp(nat->type, "dnat_and_snat") &&
4614 nat->logical_port && nat->external_mac) {
4615 if (eth_addr_from_string(nat->external_mac, &mac)) {
4616 distributed = true;
4617 } else {
4618 static struct vlog_rate_limit rl =
4619 VLOG_RATE_LIMIT_INIT(5, 1);
4620 VLOG_WARN_RL(&rl, "bad mac %s for dnat in router "
4621 ""UUID_FMT"", nat->external_mac, UUID_ARGS(&od->key));
4622 continue;
4623 }
4624 }
4625
4626 /* Ingress UNSNAT table: It is for already established connections'
4627 * reverse traffic. i.e., SNAT has already been done in egress
4628 * pipeline and now the packet has entered the ingress pipeline as
4629 * part of a reply. We undo the SNAT here.
4630 *
4631 * Undoing SNAT has to happen before DNAT processing. This is
4632 * because when the packet was DNATed in ingress pipeline, it did
4633 * not know about the possibility of eventual additional SNAT in
4634 * egress pipeline. */
4635 if (!strcmp(nat->type, "snat")
4636 || !strcmp(nat->type, "dnat_and_snat")) {
4637 if (!od->l3dgw_port) {
4638 /* Gateway router. */
4639 ds_clear(&match);
4640 ds_put_format(&match, "ip && ip4.dst == %s",
4641 nat->external_ip);
4642 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 90,
4643 ds_cstr(&match), "ct_snat; next;");
4644 } else {
4645 /* Distributed router. */
4646
4647 /* Traffic received on l3dgw_port is subject to NAT. */
4648 ds_clear(&match);
4649 ds_put_format(&match, "ip && ip4.dst == %s"
4650 " && inport == %s",
4651 nat->external_ip,
4652 od->l3dgw_port->json_key);
4653 if (!distributed && od->l3redirect_port) {
4654 /* Flows for NAT rules that are centralized are only
4655 * programmed on the "redirect-chassis". */
4656 ds_put_format(&match, " && is_chassis_resident(%s)",
4657 od->l3redirect_port->json_key);
4658 }
4659 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 100,
4660 ds_cstr(&match), "ct_snat;");
4661
4662 /* Traffic received on other router ports must be
4663 * redirected to the central instance of the l3dgw_port
4664 * for NAT processing. */
4665 ds_clear(&match);
4666 ds_put_format(&match, "ip && ip4.dst == %s",
4667 nat->external_ip);
4668 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 50,
4669 ds_cstr(&match),
4670 REGBIT_NAT_REDIRECT" = 1; next;");
4671 }
4672 }
4673
4674 /* Ingress DNAT table: Packets enter the pipeline with destination
4675 * IP address that needs to be DNATted from a external IP address
4676 * to a logical IP address. */
4677 if (!strcmp(nat->type, "dnat")
4678 || !strcmp(nat->type, "dnat_and_snat")) {
4679 if (!od->l3dgw_port) {
4680 /* Gateway router. */
4681 /* Packet when it goes from the initiator to destination.
4682 * We need to set flags.loopback because the router can
4683 * send the packet back through the same interface. */
4684 ds_clear(&match);
4685 ds_put_format(&match, "ip && ip4.dst == %s",
4686 nat->external_ip);
4687 ds_clear(&actions);
4688 if (dnat_force_snat_ip) {
4689 /* Indicate to the future tables that a DNAT has taken
4690 * place and a force SNAT needs to be done in the
4691 * Egress SNAT table. */
4692 ds_put_format(&actions,
4693 "flags.force_snat_for_dnat = 1; ");
4694 }
4695 ds_put_format(&actions, "flags.loopback = 1; ct_dnat(%s);",
4696 nat->logical_ip);
4697 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100,
4698 ds_cstr(&match), ds_cstr(&actions));
4699 } else {
4700 /* Distributed router. */
4701
4702 /* Traffic received on l3dgw_port is subject to NAT. */
4703 ds_clear(&match);
4704 ds_put_format(&match, "ip && ip4.dst == %s"
4705 " && inport == %s",
4706 nat->external_ip,
4707 od->l3dgw_port->json_key);
4708 if (!distributed && od->l3redirect_port) {
4709 /* Flows for NAT rules that are centralized are only
4710 * programmed on the "redirect-chassis". */
4711 ds_put_format(&match, " && is_chassis_resident(%s)",
4712 od->l3redirect_port->json_key);
4713 }
4714 ds_clear(&actions);
4715 ds_put_format(&actions, "ct_dnat(%s);",
4716 nat->logical_ip);
4717 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100,
4718 ds_cstr(&match), ds_cstr(&actions));
4719
4720 /* Traffic received on other router ports must be
4721 * redirected to the central instance of the l3dgw_port
4722 * for NAT processing. */
4723 ds_clear(&match);
4724 ds_put_format(&match, "ip && ip4.dst == %s",
4725 nat->external_ip);
4726 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50,
4727 ds_cstr(&match),
4728 REGBIT_NAT_REDIRECT" = 1; next;");
4729 }
4730 }
4731
4732 /* Egress UNDNAT table: It is for already established connections'
4733 * reverse traffic. i.e., DNAT has already been done in ingress
4734 * pipeline and now the packet has entered the egress pipeline as
4735 * part of a reply. We undo the DNAT here.
4736 *
4737 * Note that this only applies for NAT on a distributed router.
4738 * Undo DNAT on a gateway router is done in the ingress DNAT
4739 * pipeline stage. */
4740 if (od->l3dgw_port && (!strcmp(nat->type, "dnat")
4741 || !strcmp(nat->type, "dnat_and_snat"))) {
4742 ds_clear(&match);
4743 ds_put_format(&match, "ip && ip4.src == %s"
4744 " && outport == %s",
4745 nat->logical_ip,
4746 od->l3dgw_port->json_key);
4747 if (!distributed && od->l3redirect_port) {
4748 /* Flows for NAT rules that are centralized are only
4749 * programmed on the "redirect-chassis". */
4750 ds_put_format(&match, " && is_chassis_resident(%s)",
4751 od->l3redirect_port->json_key);
4752 }
4753 ds_clear(&actions);
4754 if (distributed) {
4755 ds_put_format(&actions, "eth.src = "ETH_ADDR_FMT"; ",
4756 ETH_ADDR_ARGS(mac));
4757 }
4758 ds_put_format(&actions, "ct_dnat;");
4759 ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 100,
4760 ds_cstr(&match), ds_cstr(&actions));
4761 }
4762
4763 /* Egress SNAT table: Packets enter the egress pipeline with
4764 * source ip address that needs to be SNATted to a external ip
4765 * address. */
4766 if (!strcmp(nat->type, "snat")
4767 || !strcmp(nat->type, "dnat_and_snat")) {
4768 if (!od->l3dgw_port) {
4769 /* Gateway router. */
4770 ds_clear(&match);
4771 ds_put_format(&match, "ip && ip4.src == %s",
4772 nat->logical_ip);
4773 ds_clear(&actions);
4774 ds_put_format(&actions, "ct_snat(%s);", nat->external_ip);
4775
4776 /* The priority here is calculated such that the
4777 * nat->logical_ip with the longest mask gets a higher
4778 * priority. */
4779 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT,
4780 count_1bits(ntohl(mask)) + 1,
4781 ds_cstr(&match), ds_cstr(&actions));
4782 } else {
4783 /* Distributed router. */
4784 ds_clear(&match);
4785 ds_put_format(&match, "ip && ip4.src == %s"
4786 " && outport == %s",
4787 nat->logical_ip,
4788 od->l3dgw_port->json_key);
4789 if (!distributed && od->l3redirect_port) {
4790 /* Flows for NAT rules that are centralized are only
4791 * programmed on the "redirect-chassis". */
4792 ds_put_format(&match, " && is_chassis_resident(%s)",
4793 od->l3redirect_port->json_key);
4794 }
4795 ds_clear(&actions);
4796 if (distributed) {
4797 ds_put_format(&actions, "eth.src = "ETH_ADDR_FMT"; ",
4798 ETH_ADDR_ARGS(mac));
4799 }
4800 ds_put_format(&actions, "ct_snat(%s);", nat->external_ip);
4801
4802 /* The priority here is calculated such that the
4803 * nat->logical_ip with the longest mask gets a higher
4804 * priority. */
4805 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT,
4806 count_1bits(ntohl(mask)) + 1,
4807 ds_cstr(&match), ds_cstr(&actions));
4808 }
4809 }
4810
4811 /* Logical router ingress table 0:
4812 * For NAT on a distributed router, add rules allowing
4813 * ingress traffic with eth.dst matching nat->external_mac
4814 * on the l3dgw_port instance where nat->logical_port is
4815 * resident. */
4816 if (distributed) {
4817 ds_clear(&match);
4818 ds_put_format(&match,
4819 "eth.dst == "ETH_ADDR_FMT" && inport == %s"
4820 " && is_chassis_resident(\"%s\")",
4821 ETH_ADDR_ARGS(mac),
4822 od->l3dgw_port->json_key,
4823 nat->logical_port);
4824 ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 50,
4825 ds_cstr(&match), "next;");
4826 }
4827
4828 /* Ingress Gateway Redirect Table: For NAT on a distributed
4829 * router, add flows that are specific to a NAT rule. These
4830 * flows indicate the presence of an applicable NAT rule that
4831 * can be applied in a distributed manner. */
4832 if (distributed) {
4833 ds_clear(&match);
4834 ds_put_format(&match, "ip4.src == %s && outport == %s",
4835 nat->logical_ip,
4836 od->l3dgw_port->json_key);
4837 ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 100,
4838 ds_cstr(&match), "next;");
4839 }
4840
4841 /* Egress Loopback table: For NAT on a distributed router.
4842 * If packets in the egress pipeline on the distributed
4843 * gateway port have ip.dst matching a NAT external IP, then
4844 * loop a clone of the packet back to the beginning of the
4845 * ingress pipeline with inport = outport. */
4846 if (od->l3dgw_port) {
4847 /* Distributed router. */
4848 ds_clear(&match);
4849 ds_put_format(&match, "ip4.dst == %s && outport == %s",
4850 nat->external_ip,
4851 od->l3dgw_port->json_key);
4852 ds_clear(&actions);
4853 ds_put_format(&actions,
4854 "clone { ct_clear; "
4855 "inport = outport; outport = \"\"; "
4856 "flags = 0; flags.loopback = 1; ");
4857 for (int i = 0; i < MFF_N_LOG_REGS; i++) {
4858 ds_put_format(&actions, "reg%d = 0; ", i);
4859 }
4860 ds_put_format(&actions, REGBIT_EGRESS_LOOPBACK" = 1; "
4861 "next(pipeline=ingress, table=0); };");
4862 ovn_lflow_add(lflows, od, S_ROUTER_OUT_EGR_LOOP, 100,
4863 ds_cstr(&match), ds_cstr(&actions));
4864 }
4865 }
4866
4867 /* Handle force SNAT options set in the gateway router. */
4868 if (dnat_force_snat_ip && !od->l3dgw_port) {
4869 /* If a packet with destination IP address as that of the
4870 * gateway router (as set in options:dnat_force_snat_ip) is seen,
4871 * UNSNAT it. */
4872 ds_clear(&match);
4873 ds_put_format(&match, "ip && ip4.dst == %s", dnat_force_snat_ip);
4874 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 110,
4875 ds_cstr(&match), "ct_snat; next;");
4876
4877 /* Higher priority rules to force SNAT with the IP addresses
4878 * configured in the Gateway router. This only takes effect
4879 * when the packet has already been DNATed once. */
4880 ds_clear(&match);
4881 ds_put_format(&match, "flags.force_snat_for_dnat == 1 && ip");
4882 ds_clear(&actions);
4883 ds_put_format(&actions, "ct_snat(%s);", dnat_force_snat_ip);
4884 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 100,
4885 ds_cstr(&match), ds_cstr(&actions));
4886 }
4887 if (lb_force_snat_ip && !od->l3dgw_port) {
4888 /* If a packet with destination IP address as that of the
4889 * gateway router (as set in options:lb_force_snat_ip) is seen,
4890 * UNSNAT it. */
4891 ds_clear(&match);
4892 ds_put_format(&match, "ip && ip4.dst == %s", lb_force_snat_ip);
4893 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 100,
4894 ds_cstr(&match), "ct_snat; next;");
4895
4896 /* Load balanced traffic will have flags.force_snat_for_lb set.
4897 * Force SNAT it. */
4898 ds_clear(&match);
4899 ds_put_format(&match, "flags.force_snat_for_lb == 1 && ip");
4900 ds_clear(&actions);
4901 ds_put_format(&actions, "ct_snat(%s);", lb_force_snat_ip);
4902 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 100,
4903 ds_cstr(&match), ds_cstr(&actions));
4904 }
4905
4906 if (!od->l3dgw_port) {
4907 /* For gateway router, re-circulate every packet through
4908 * the DNAT zone. This helps with two things.
4909 *
4910 * 1. Any packet that needs to be unDNATed in the reverse
4911 * direction gets unDNATed. Ideally this could be done in
4912 * the egress pipeline. But since the gateway router
4913 * does not have any feature that depends on the source
4914 * ip address being external IP address for IP routing,
4915 * we can do it here, saving a future re-circulation.
4916 *
4917 * 2. Any packet that was sent through SNAT zone in the
4918 * previous table automatically gets re-circulated to get
4919 * back the new destination IP address that is needed for
4920 * routing in the openflow pipeline. */
4921 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50,
4922 "ip", "flags.loopback = 1; ct_dnat;");
4923 } else {
4924 /* For NAT on a distributed router, add flows to Ingress
4925 * IP Routing table, Ingress ARP Resolution table, and
4926 * Ingress Gateway Redirect Table that are not specific to a
4927 * NAT rule. */
4928
4929 /* The highest priority IN_IP_ROUTING rule matches packets
4930 * with REGBIT_NAT_REDIRECT (set in DNAT or UNSNAT stages),
4931 * with action "ip.ttl--; next;". The IN_GW_REDIRECT table
4932 * will take care of setting the outport. */
4933 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 300,
4934 REGBIT_NAT_REDIRECT" == 1", "ip.ttl--; next;");
4935
4936 /* The highest priority IN_ARP_RESOLVE rule matches packets
4937 * with REGBIT_NAT_REDIRECT (set in DNAT or UNSNAT stages),
4938 * then sets eth.dst to the distributed gateway port's
4939 * ethernet address. */
4940 ds_clear(&actions);
4941 ds_put_format(&actions, "eth.dst = %s; next;",
4942 od->l3dgw_port->lrp_networks.ea_s);
4943 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 200,
4944 REGBIT_NAT_REDIRECT" == 1", ds_cstr(&actions));
4945
4946 /* The highest priority IN_GW_REDIRECT rule redirects packets
4947 * with REGBIT_NAT_REDIRECT (set in DNAT or UNSNAT stages) to
4948 * the central instance of the l3dgw_port for NAT processing. */
4949 ds_clear(&actions);
4950 ds_put_format(&actions, "outport = %s; next;",
4951 od->l3redirect_port->json_key);
4952 ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 200,
4953 REGBIT_NAT_REDIRECT" == 1", ds_cstr(&actions));
4954 }
4955
4956 /* Load balancing and packet defrag are only valid on
4957 * Gateway routers. */
4958 if (!smap_get(&od->nbr->options, "chassis")) {
4959 continue;
4960 }
4961
4962 /* A set to hold all ips that need defragmentation and tracking. */
4963 struct sset all_ips = SSET_INITIALIZER(&all_ips);
4964
4965 for (int i = 0; i < od->nbr->n_load_balancer; i++) {
4966 struct nbrec_load_balancer *lb = od->nbr->load_balancer[i];
4967 struct smap *vips = &lb->vips;
4968 struct smap_node *node;
4969
4970 SMAP_FOR_EACH (node, vips) {
4971 uint16_t port = 0;
4972
4973 /* node->key contains IP:port or just IP. */
4974 char *ip_address = NULL;
4975 ip_address_and_port_from_lb_key(node->key, &ip_address, &port);
4976 if (!ip_address) {
4977 continue;
4978 }
4979
4980 if (!sset_contains(&all_ips, ip_address)) {
4981 sset_add(&all_ips, ip_address);
4982 }
4983
4984 /* Higher priority rules are added for load-balancing in DNAT
4985 * table. For every match (on a VIP[:port]), we add two flows
4986 * via add_router_lb_flow(). One flow is for specific matching
4987 * on ct.new with an action of "ct_lb($targets);". The other
4988 * flow is for ct.est with an action of "ct_dnat;". */
4989 ds_clear(&actions);
4990 ds_put_format(&actions, "ct_lb(%s);", node->value);
4991
4992 ds_clear(&match);
4993 ds_put_format(&match, "ip && ip4.dst == %s",
4994 ip_address);
4995 free(ip_address);
4996
4997 if (port) {
4998 if (lb->protocol && !strcmp(lb->protocol, "udp")) {
4999 ds_put_format(&match, " && udp && udp.dst == %d",
5000 port);
5001 } else {
5002 ds_put_format(&match, " && tcp && tcp.dst == %d",
5003 port);
5004 }
5005 add_router_lb_flow(lflows, od, &match, &actions, 120,
5006 lb_force_snat_ip);
5007 } else {
5008 add_router_lb_flow(lflows, od, &match, &actions, 110,
5009 lb_force_snat_ip);
5010 }
5011 }
5012 }
5013
5014 /* If there are any load balancing rules, we should send the
5015 * packet to conntrack for defragmentation and tracking. This helps
5016 * with two things.
5017 *
5018 * 1. With tracking, we can send only new connections to pick a
5019 * DNAT ip address from a group.
5020 * 2. If there are L4 ports in load balancing rules, we need the
5021 * defragmentation to match on L4 ports. */
5022 const char *ip_address;
5023 SSET_FOR_EACH(ip_address, &all_ips) {
5024 ds_clear(&match);
5025 ds_put_format(&match, "ip && ip4.dst == %s", ip_address);
5026 ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG,
5027 100, ds_cstr(&match), "ct_next;");
5028 }
5029
5030 sset_destroy(&all_ips);
5031 }
5032
5033 /* Logical router ingress table 5: IP Routing.
5034 *
5035 * A packet that arrives at this table is an IP packet that should be
5036 * routed to the address in 'ip[46].dst'. This table sets outport to
5037 * the correct output port, eth.src to the output port's MAC
5038 * address, and '[xx]reg0' to the next-hop IP address (leaving
5039 * 'ip[46].dst', the packet’s final destination, unchanged), and
5040 * advances to the next table for ARP/ND resolution. */
5041 HMAP_FOR_EACH (op, key_node, ports) {
5042 if (!op->nbrp) {
5043 continue;
5044 }
5045
5046 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
5047 add_route(lflows, op, op->lrp_networks.ipv4_addrs[i].addr_s,
5048 op->lrp_networks.ipv4_addrs[i].network_s,
5049 op->lrp_networks.ipv4_addrs[i].plen, NULL, NULL);
5050 }
5051
5052 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
5053 add_route(lflows, op, op->lrp_networks.ipv6_addrs[i].addr_s,
5054 op->lrp_networks.ipv6_addrs[i].network_s,
5055 op->lrp_networks.ipv6_addrs[i].plen, NULL, NULL);
5056 }
5057 }
5058
5059 /* Convert the static routes to flows. */
5060 HMAP_FOR_EACH (od, key_node, datapaths) {
5061 if (!od->nbr) {
5062 continue;
5063 }
5064
5065 for (int i = 0; i < od->nbr->n_static_routes; i++) {
5066 const struct nbrec_logical_router_static_route *route;
5067
5068 route = od->nbr->static_routes[i];
5069 build_static_route_flow(lflows, od, ports, route);
5070 }
5071 }
5072
5073 /* XXX destination unreachable */
5074
5075 /* Local router ingress table 6: ARP Resolution.
5076 *
5077 * Any packet that reaches this table is an IP packet whose next-hop IP
5078 * address is in reg0. (ip4.dst is the final destination.) This table
5079 * resolves the IP address in reg0 into an output port in outport and an
5080 * Ethernet address in eth.dst. */
5081 HMAP_FOR_EACH (op, key_node, ports) {
5082 if (op->nbrp) {
5083 /* This is a logical router port. If next-hop IP address in
5084 * '[xx]reg0' matches IP address of this router port, then
5085 * the packet is intended to eventually be sent to this
5086 * logical port. Set the destination mac address using this
5087 * port's mac address.
5088 *
5089 * The packet is still in peer's logical pipeline. So the match
5090 * should be on peer's outport. */
5091 if (op->peer && op->nbrp->peer) {
5092 if (op->lrp_networks.n_ipv4_addrs) {
5093 ds_clear(&match);
5094 ds_put_format(&match, "outport == %s && reg0 == ",
5095 op->peer->json_key);
5096 op_put_v4_networks(&match, op, false);
5097
5098 ds_clear(&actions);
5099 ds_put_format(&actions, "eth.dst = %s; next;",
5100 op->lrp_networks.ea_s);
5101 ovn_lflow_add(lflows, op->peer->od, S_ROUTER_IN_ARP_RESOLVE,
5102 100, ds_cstr(&match), ds_cstr(&actions));
5103 }
5104
5105 if (op->lrp_networks.n_ipv6_addrs) {
5106 ds_clear(&match);
5107 ds_put_format(&match, "outport == %s && xxreg0 == ",
5108 op->peer->json_key);
5109 op_put_v6_networks(&match, op);
5110
5111 ds_clear(&actions);
5112 ds_put_format(&actions, "eth.dst = %s; next;",
5113 op->lrp_networks.ea_s);
5114 ovn_lflow_add(lflows, op->peer->od, S_ROUTER_IN_ARP_RESOLVE,
5115 100, ds_cstr(&match), ds_cstr(&actions));
5116 }
5117 }
5118 } else if (op->od->n_router_ports && strcmp(op->nbsp->type, "router")) {
5119 /* This is a logical switch port that backs a VM or a container.
5120 * Extract its addresses. For each of the address, go through all
5121 * the router ports attached to the switch (to which this port
5122 * connects) and if the address in question is reachable from the
5123 * router port, add an ARP/ND entry in that router's pipeline. */
5124
5125 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
5126 const char *ea_s = op->lsp_addrs[i].ea_s;
5127 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
5128 const char *ip_s = op->lsp_addrs[i].ipv4_addrs[j].addr_s;
5129 for (size_t k = 0; k < op->od->n_router_ports; k++) {
5130 /* Get the Logical_Router_Port that the
5131 * Logical_Switch_Port is connected to, as
5132 * 'peer'. */
5133 const char *peer_name = smap_get(
5134 &op->od->router_ports[k]->nbsp->options,
5135 "router-port");
5136 if (!peer_name) {
5137 continue;
5138 }
5139
5140 struct ovn_port *peer = ovn_port_find(ports, peer_name);
5141 if (!peer || !peer->nbrp) {
5142 continue;
5143 }
5144
5145 if (!find_lrp_member_ip(peer, ip_s)) {
5146 continue;
5147 }
5148
5149 ds_clear(&match);
5150 ds_put_format(&match, "outport == %s && reg0 == %s",
5151 peer->json_key, ip_s);
5152
5153 ds_clear(&actions);
5154 ds_put_format(&actions, "eth.dst = %s; next;", ea_s);
5155 ovn_lflow_add(lflows, peer->od,
5156 S_ROUTER_IN_ARP_RESOLVE, 100,
5157 ds_cstr(&match), ds_cstr(&actions));
5158 }
5159 }
5160
5161 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
5162 const char *ip_s = op->lsp_addrs[i].ipv6_addrs[j].addr_s;
5163 for (size_t k = 0; k < op->od->n_router_ports; k++) {
5164 /* Get the Logical_Router_Port that the
5165 * Logical_Switch_Port is connected to, as
5166 * 'peer'. */
5167 const char *peer_name = smap_get(
5168 &op->od->router_ports[k]->nbsp->options,
5169 "router-port");
5170 if (!peer_name) {
5171 continue;
5172 }
5173
5174 struct ovn_port *peer = ovn_port_find(ports, peer_name);
5175 if (!peer || !peer->nbrp) {
5176 continue;
5177 }
5178
5179 if (!find_lrp_member_ip(peer, ip_s)) {
5180 continue;
5181 }
5182
5183 ds_clear(&match);
5184 ds_put_format(&match, "outport == %s && xxreg0 == %s",
5185 peer->json_key, ip_s);
5186
5187 ds_clear(&actions);
5188 ds_put_format(&actions, "eth.dst = %s; next;", ea_s);
5189 ovn_lflow_add(lflows, peer->od,
5190 S_ROUTER_IN_ARP_RESOLVE, 100,
5191 ds_cstr(&match), ds_cstr(&actions));
5192 }
5193 }
5194 }
5195 } else if (!strcmp(op->nbsp->type, "router")) {
5196 /* This is a logical switch port that connects to a router. */
5197
5198 /* The peer of this switch port is the router port for which
5199 * we need to add logical flows such that it can resolve
5200 * ARP entries for all the other router ports connected to
5201 * the switch in question. */
5202
5203 const char *peer_name = smap_get(&op->nbsp->options,
5204 "router-port");
5205 if (!peer_name) {
5206 continue;
5207 }
5208
5209 struct ovn_port *peer = ovn_port_find(ports, peer_name);
5210 if (!peer || !peer->nbrp) {
5211 continue;
5212 }
5213
5214 for (size_t i = 0; i < op->od->n_router_ports; i++) {
5215 const char *router_port_name = smap_get(
5216 &op->od->router_ports[i]->nbsp->options,
5217 "router-port");
5218 struct ovn_port *router_port = ovn_port_find(ports,
5219 router_port_name);
5220 if (!router_port || !router_port->nbrp) {
5221 continue;
5222 }
5223
5224 /* Skip the router port under consideration. */
5225 if (router_port == peer) {
5226 continue;
5227 }
5228
5229 if (router_port->lrp_networks.n_ipv4_addrs) {
5230 ds_clear(&match);
5231 ds_put_format(&match, "outport == %s && reg0 == ",
5232 peer->json_key);
5233 op_put_v4_networks(&match, router_port, false);
5234
5235 ds_clear(&actions);
5236 ds_put_format(&actions, "eth.dst = %s; next;",
5237 router_port->lrp_networks.ea_s);
5238 ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE,
5239 100, ds_cstr(&match), ds_cstr(&actions));
5240 }
5241
5242 if (router_port->lrp_networks.n_ipv6_addrs) {
5243 ds_clear(&match);
5244 ds_put_format(&match, "outport == %s && xxreg0 == ",
5245 peer->json_key);
5246 op_put_v6_networks(&match, router_port);
5247
5248 ds_clear(&actions);
5249 ds_put_format(&actions, "eth.dst = %s; next;",
5250 router_port->lrp_networks.ea_s);
5251 ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE,
5252 100, ds_cstr(&match), ds_cstr(&actions));
5253 }
5254 }
5255 }
5256 }
5257
5258 HMAP_FOR_EACH (od, key_node, datapaths) {
5259 if (!od->nbr) {
5260 continue;
5261 }
5262
5263 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip4",
5264 "get_arp(outport, reg0); next;");
5265
5266 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip6",
5267 "get_nd(outport, xxreg0); next;");
5268 }
5269
5270 /* Logical router ingress table 7: Gateway redirect.
5271 *
5272 * For traffic with outport equal to the l3dgw_port
5273 * on a distributed router, this table redirects a subset
5274 * of the traffic to the l3redirect_port which represents
5275 * the central instance of the l3dgw_port.
5276 */
5277 HMAP_FOR_EACH (od, key_node, datapaths) {
5278 if (!od->nbr) {
5279 continue;
5280 }
5281 if (od->l3dgw_port && od->l3redirect_port) {
5282 /* For traffic with outport == l3dgw_port, if the
5283 * packet did not match any higher priority redirect
5284 * rule, then the traffic is redirected to the central
5285 * instance of the l3dgw_port. */
5286 ds_clear(&match);
5287 ds_put_format(&match, "outport == %s",
5288 od->l3dgw_port->json_key);
5289 ds_clear(&actions);
5290 ds_put_format(&actions, "outport = %s; next;",
5291 od->l3redirect_port->json_key);
5292 ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 50,
5293 ds_cstr(&match), ds_cstr(&actions));
5294
5295 /* If the Ethernet destination has not been resolved,
5296 * redirect to the central instance of the l3dgw_port.
5297 * Such traffic will be replaced by an ARP request or ND
5298 * Neighbor Solicitation in the ARP request ingress
5299 * table, before being redirected to the central instance.
5300 */
5301 ds_put_format(&match, " && eth.dst == 00:00:00:00:00:00");
5302 ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 150,
5303 ds_cstr(&match), ds_cstr(&actions));
5304 }
5305
5306 /* Packets are allowed by default. */
5307 ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 0, "1", "next;");
5308 }
5309
5310 /* Local router ingress table 8: ARP request.
5311 *
5312 * In the common case where the Ethernet destination has been resolved,
5313 * this table outputs the packet (priority 0). Otherwise, it composes
5314 * and sends an ARP request (priority 100). */
5315 HMAP_FOR_EACH (od, key_node, datapaths) {
5316 if (!od->nbr) {
5317 continue;
5318 }
5319
5320 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100,
5321 "eth.dst == 00:00:00:00:00:00",
5322 "arp { "
5323 "eth.dst = ff:ff:ff:ff:ff:ff; "
5324 "arp.spa = reg1; "
5325 "arp.tpa = reg0; "
5326 "arp.op = 1; " /* ARP request */
5327 "output; "
5328 "};");
5329 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1", "output;");
5330 }
5331
5332 /* Logical router egress table 1: Delivery (priority 100).
5333 *
5334 * Priority 100 rules deliver packets to enabled logical ports. */
5335 HMAP_FOR_EACH (op, key_node, ports) {
5336 if (!op->nbrp) {
5337 continue;
5338 }
5339
5340 if (!lrport_is_enabled(op->nbrp)) {
5341 /* Drop packets to disabled logical ports (since logical flow
5342 * tables are default-drop). */
5343 continue;
5344 }
5345
5346 if (op->derived) {
5347 /* No egress packets should be processed in the context of
5348 * a chassisredirect port. The chassisredirect port should
5349 * be replaced by the l3dgw port in the local output
5350 * pipeline stage before egress processing. */
5351 continue;
5352 }
5353
5354 ds_clear(&match);
5355 ds_put_format(&match, "outport == %s", op->json_key);
5356 ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100,
5357 ds_cstr(&match), "output;");
5358 }
5359
5360 ds_destroy(&match);
5361 ds_destroy(&actions);
5362 }
5363
5364 /* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
5365 * constructing their contents based on the OVN_NB database. */
5366 static void
5367 build_lflows(struct northd_context *ctx, struct hmap *datapaths,
5368 struct hmap *ports)
5369 {
5370 struct hmap lflows = HMAP_INITIALIZER(&lflows);
5371 struct hmap mcgroups = HMAP_INITIALIZER(&mcgroups);
5372
5373 build_lswitch_flows(datapaths, ports, &lflows, &mcgroups);
5374 build_lrouter_flows(datapaths, ports, &lflows);
5375
5376 /* Push changes to the Logical_Flow table to database. */
5377 const struct sbrec_logical_flow *sbflow, *next_sbflow;
5378 SBREC_LOGICAL_FLOW_FOR_EACH_SAFE (sbflow, next_sbflow, ctx->ovnsb_idl) {
5379 struct ovn_datapath *od
5380 = ovn_datapath_from_sbrec(datapaths, sbflow->logical_datapath);
5381 if (!od) {
5382 sbrec_logical_flow_delete(sbflow);
5383 continue;
5384 }
5385
5386 enum ovn_datapath_type dp_type = od->nbs ? DP_SWITCH : DP_ROUTER;
5387 enum ovn_pipeline pipeline
5388 = !strcmp(sbflow->pipeline, "ingress") ? P_IN : P_OUT;
5389 struct ovn_lflow *lflow = ovn_lflow_find(
5390 &lflows, od, ovn_stage_build(dp_type, pipeline, sbflow->table_id),
5391 sbflow->priority, sbflow->match, sbflow->actions);
5392 if (lflow) {
5393 ovn_lflow_destroy(&lflows, lflow);
5394 } else {
5395 sbrec_logical_flow_delete(sbflow);
5396 }
5397 }
5398 struct ovn_lflow *lflow, *next_lflow;
5399 HMAP_FOR_EACH_SAFE (lflow, next_lflow, hmap_node, &lflows) {
5400 enum ovn_pipeline pipeline = ovn_stage_get_pipeline(lflow->stage);
5401 uint8_t table = ovn_stage_get_table(lflow->stage);
5402
5403 sbflow = sbrec_logical_flow_insert(ctx->ovnsb_txn);
5404 sbrec_logical_flow_set_logical_datapath(sbflow, lflow->od->sb);
5405 sbrec_logical_flow_set_pipeline(
5406 sbflow, pipeline == P_IN ? "ingress" : "egress");
5407 sbrec_logical_flow_set_table_id(sbflow, table);
5408 sbrec_logical_flow_set_priority(sbflow, lflow->priority);
5409 sbrec_logical_flow_set_match(sbflow, lflow->match);
5410 sbrec_logical_flow_set_actions(sbflow, lflow->actions);
5411
5412 /* Trim the source locator lflow->where, which looks something like
5413 * "ovn/northd/ovn-northd.c:1234", down to just the part following the
5414 * last slash, e.g. "ovn-northd.c:1234". */
5415 const char *slash = strrchr(lflow->where, '/');
5416 #if _WIN32
5417 const char *backslash = strrchr(lflow->where, '\\');
5418 if (!slash || backslash > slash) {
5419 slash = backslash;
5420 }
5421 #endif
5422 const char *where = slash ? slash + 1 : lflow->where;
5423
5424 struct smap ids = SMAP_INITIALIZER(&ids);
5425 smap_add(&ids, "stage-name", ovn_stage_to_str(lflow->stage));
5426 smap_add(&ids, "source", where);
5427 if (lflow->stage_hint) {
5428 smap_add(&ids, "stage-hint", lflow->stage_hint);
5429 }
5430 sbrec_logical_flow_set_external_ids(sbflow, &ids);
5431 smap_destroy(&ids);
5432
5433 ovn_lflow_destroy(&lflows, lflow);
5434 }
5435 hmap_destroy(&lflows);
5436
5437 /* Push changes to the Multicast_Group table to database. */
5438 const struct sbrec_multicast_group *sbmc, *next_sbmc;
5439 SBREC_MULTICAST_GROUP_FOR_EACH_SAFE (sbmc, next_sbmc, ctx->ovnsb_idl) {
5440 struct ovn_datapath *od = ovn_datapath_from_sbrec(datapaths,
5441 sbmc->datapath);
5442 if (!od) {
5443 sbrec_multicast_group_delete(sbmc);
5444 continue;
5445 }
5446
5447 struct multicast_group group = { .name = sbmc->name,
5448 .key = sbmc->tunnel_key };
5449 struct ovn_multicast *mc = ovn_multicast_find(&mcgroups, od, &group);
5450 if (mc) {
5451 ovn_multicast_update_sbrec(mc, sbmc);
5452 ovn_multicast_destroy(&mcgroups, mc);
5453 } else {
5454 sbrec_multicast_group_delete(sbmc);
5455 }
5456 }
5457 struct ovn_multicast *mc, *next_mc;
5458 HMAP_FOR_EACH_SAFE (mc, next_mc, hmap_node, &mcgroups) {
5459 sbmc = sbrec_multicast_group_insert(ctx->ovnsb_txn);
5460 sbrec_multicast_group_set_datapath(sbmc, mc->datapath->sb);
5461 sbrec_multicast_group_set_name(sbmc, mc->group->name);
5462 sbrec_multicast_group_set_tunnel_key(sbmc, mc->group->key);
5463 ovn_multicast_update_sbrec(mc, sbmc);
5464 ovn_multicast_destroy(&mcgroups, mc);
5465 }
5466 hmap_destroy(&mcgroups);
5467 }
5468
5469 /* OVN_Northbound and OVN_Southbound have an identical Address_Set table.
5470 * We always update OVN_Southbound to match the current data in
5471 * OVN_Northbound, so that the address sets used in Logical_Flows in
5472 * OVN_Southbound is checked against the proper set.*/
5473 static void
5474 sync_address_sets(struct northd_context *ctx)
5475 {
5476 struct shash sb_address_sets = SHASH_INITIALIZER(&sb_address_sets);
5477
5478 const struct sbrec_address_set *sb_address_set;
5479 SBREC_ADDRESS_SET_FOR_EACH (sb_address_set, ctx->ovnsb_idl) {
5480 shash_add(&sb_address_sets, sb_address_set->name, sb_address_set);
5481 }
5482
5483 const struct nbrec_address_set *nb_address_set;
5484 NBREC_ADDRESS_SET_FOR_EACH (nb_address_set, ctx->ovnnb_idl) {
5485 sb_address_set = shash_find_and_delete(&sb_address_sets,
5486 nb_address_set->name);
5487 if (!sb_address_set) {
5488 sb_address_set = sbrec_address_set_insert(ctx->ovnsb_txn);
5489 sbrec_address_set_set_name(sb_address_set, nb_address_set->name);
5490 }
5491
5492 sbrec_address_set_set_addresses(sb_address_set,
5493 /* "char **" is not compatible with "const char **" */
5494 (const char **) nb_address_set->addresses,
5495 nb_address_set->n_addresses);
5496 }
5497
5498 struct shash_node *node, *next;
5499 SHASH_FOR_EACH_SAFE (node, next, &sb_address_sets) {
5500 sbrec_address_set_delete(node->data);
5501 shash_delete(&sb_address_sets, node);
5502 }
5503 shash_destroy(&sb_address_sets);
5504 }
5505
5506 /*
5507 * struct 'dns_info' is used to sync the DNS records between OVN Northbound db
5508 * and Southbound db.
5509 */
5510 struct dns_info {
5511 struct hmap_node hmap_node;
5512 const struct nbrec_dns *nb_dns; /* DNS record in the Northbound db. */
5513 const struct sbrec_dns *sb_dns; /* DNS record in the Soutbound db. */
5514
5515 /* Datapaths to which the DNS entry is associated with it. */
5516 const struct sbrec_datapath_binding **sbs;
5517 size_t n_sbs;
5518 };
5519
5520 static inline struct dns_info *
5521 get_dns_info_from_hmap(struct hmap *dns_map, struct uuid *uuid)
5522 {
5523 struct dns_info *dns_info;
5524 size_t hash = uuid_hash(uuid);
5525 HMAP_FOR_EACH_WITH_HASH (dns_info, hmap_node, hash, dns_map) {
5526 if (uuid_equals(&dns_info->nb_dns->header_.uuid, uuid)) {
5527 return dns_info;
5528 }
5529 }
5530
5531 return NULL;
5532 }
5533
5534 static void
5535 sync_dns_entries(struct northd_context *ctx, struct hmap *datapaths)
5536 {
5537 struct hmap dns_map = HMAP_INITIALIZER(&dns_map);
5538 struct ovn_datapath *od;
5539 HMAP_FOR_EACH (od, key_node, datapaths) {
5540 if (!od->nbs || !od->nbs->n_dns_records) {
5541 continue;
5542 }
5543
5544 for (size_t i = 0; i < od->nbs->n_dns_records; i++) {
5545 struct dns_info *dns_info = get_dns_info_from_hmap(
5546 &dns_map, &od->nbs->dns_records[i]->header_.uuid);
5547 if (!dns_info) {
5548 size_t hash = uuid_hash(
5549 &od->nbs->dns_records[i]->header_.uuid);
5550 dns_info = xzalloc(sizeof *dns_info);;
5551 dns_info->nb_dns = od->nbs->dns_records[i];
5552 hmap_insert(&dns_map, &dns_info->hmap_node, hash);
5553 }
5554
5555 dns_info->n_sbs++;
5556 dns_info->sbs = xrealloc(dns_info->sbs,
5557 dns_info->n_sbs * sizeof *dns_info->sbs);
5558 dns_info->sbs[dns_info->n_sbs - 1] = od->sb;
5559 }
5560 }
5561
5562 const struct sbrec_dns *sbrec_dns, *next;
5563 SBREC_DNS_FOR_EACH_SAFE (sbrec_dns, next, ctx->ovnsb_idl) {
5564 const char *nb_dns_uuid = smap_get(&sbrec_dns->external_ids, "dns_id");
5565 struct uuid dns_uuid;
5566 if (!nb_dns_uuid || !uuid_from_string(&dns_uuid, nb_dns_uuid)) {
5567 sbrec_dns_delete(sbrec_dns);
5568 continue;
5569 }
5570
5571 struct dns_info *dns_info =
5572 get_dns_info_from_hmap(&dns_map, &dns_uuid);
5573 if (dns_info) {
5574 dns_info->sb_dns = sbrec_dns;
5575 } else {
5576 sbrec_dns_delete(sbrec_dns);
5577 }
5578 }
5579
5580 struct dns_info *dns_info;
5581 HMAP_FOR_EACH_POP (dns_info, hmap_node, &dns_map) {
5582 if (!dns_info->sb_dns) {
5583 struct sbrec_dns *sbrec_dns = sbrec_dns_insert(ctx->ovnsb_txn);
5584 dns_info->sb_dns = sbrec_dns;
5585 char *dns_id = xasprintf(
5586 UUID_FMT, UUID_ARGS(&dns_info->nb_dns->header_.uuid));
5587 const struct smap external_ids =
5588 SMAP_CONST1(&external_ids, "dns_id", dns_id);
5589 sbrec_dns_set_external_ids(sbrec_dns, &external_ids);
5590 free(dns_id);
5591 }
5592
5593 /* Set the datapaths and records. If nothing has changed, then
5594 * this will be a no-op.
5595 */
5596 sbrec_dns_set_datapaths(
5597 dns_info->sb_dns,
5598 (struct sbrec_datapath_binding **)dns_info->sbs,
5599 dns_info->n_sbs);
5600 sbrec_dns_set_records(dns_info->sb_dns, &dns_info->nb_dns->records);
5601 free(dns_info->sbs);
5602 free(dns_info);
5603 }
5604 hmap_destroy(&dns_map);
5605 }
5606
5607 \f
5608 static void
5609 ovnnb_db_run(struct northd_context *ctx, struct ovsdb_idl_loop *sb_loop)
5610 {
5611 if (!ctx->ovnsb_txn || !ctx->ovnnb_txn) {
5612 return;
5613 }
5614 struct hmap datapaths, ports;
5615 build_datapaths(ctx, &datapaths);
5616 build_ports(ctx, &datapaths, &ports);
5617 build_ipam(&datapaths, &ports);
5618 build_lflows(ctx, &datapaths, &ports);
5619
5620 sync_address_sets(ctx);
5621 sync_dns_entries(ctx, &datapaths);
5622
5623 struct ovn_datapath *dp, *next_dp;
5624 HMAP_FOR_EACH_SAFE (dp, next_dp, key_node, &datapaths) {
5625 ovn_datapath_destroy(&datapaths, dp);
5626 }
5627 hmap_destroy(&datapaths);
5628
5629 struct ovn_port *port, *next_port;
5630 HMAP_FOR_EACH_SAFE (port, next_port, key_node, &ports) {
5631 ovn_port_destroy(&ports, port);
5632 }
5633 hmap_destroy(&ports);
5634
5635 /* Copy nb_cfg from northbound to southbound database.
5636 *
5637 * Also set up to update sb_cfg once our southbound transaction commits. */
5638 const struct nbrec_nb_global *nb = nbrec_nb_global_first(ctx->ovnnb_idl);
5639 if (!nb) {
5640 nb = nbrec_nb_global_insert(ctx->ovnnb_txn);
5641 }
5642 const struct sbrec_sb_global *sb = sbrec_sb_global_first(ctx->ovnsb_idl);
5643 if (!sb) {
5644 sb = sbrec_sb_global_insert(ctx->ovnsb_txn);
5645 }
5646 sbrec_sb_global_set_nb_cfg(sb, nb->nb_cfg);
5647 sb_loop->next_cfg = nb->nb_cfg;
5648
5649 cleanup_macam(&macam);
5650 }
5651
5652 /* Handle changes to the 'chassis' column of the 'Port_Binding' table. When
5653 * this column is not empty, it means we need to set the corresponding logical
5654 * port as 'up' in the northbound DB. */
5655 static void
5656 update_logical_port_status(struct northd_context *ctx)
5657 {
5658 struct hmap lports_hmap;
5659 const struct sbrec_port_binding *sb;
5660 const struct nbrec_logical_switch_port *nbsp;
5661
5662 struct lport_hash_node {
5663 struct hmap_node node;
5664 const struct nbrec_logical_switch_port *nbsp;
5665 } *hash_node;
5666
5667 hmap_init(&lports_hmap);
5668
5669 NBREC_LOGICAL_SWITCH_PORT_FOR_EACH(nbsp, ctx->ovnnb_idl) {
5670 hash_node = xzalloc(sizeof *hash_node);
5671 hash_node->nbsp = nbsp;
5672 hmap_insert(&lports_hmap, &hash_node->node, hash_string(nbsp->name, 0));
5673 }
5674
5675 SBREC_PORT_BINDING_FOR_EACH(sb, ctx->ovnsb_idl) {
5676 nbsp = NULL;
5677 HMAP_FOR_EACH_WITH_HASH(hash_node, node,
5678 hash_string(sb->logical_port, 0),
5679 &lports_hmap) {
5680 if (!strcmp(sb->logical_port, hash_node->nbsp->name)) {
5681 nbsp = hash_node->nbsp;
5682 break;
5683 }
5684 }
5685
5686 if (!nbsp) {
5687 /* The logical port doesn't exist for this port binding. This can
5688 * happen under normal circumstances when ovn-northd hasn't gotten
5689 * around to pruning the Port_Binding yet. */
5690 continue;
5691 }
5692
5693 if (sb->chassis && (!nbsp->up || !*nbsp->up)) {
5694 bool up = true;
5695 nbrec_logical_switch_port_set_up(nbsp, &up, 1);
5696 } else if (!sb->chassis && (!nbsp->up || *nbsp->up)) {
5697 bool up = false;
5698 nbrec_logical_switch_port_set_up(nbsp, &up, 1);
5699 }
5700 }
5701
5702 HMAP_FOR_EACH_POP(hash_node, node, &lports_hmap) {
5703 free(hash_node);
5704 }
5705 hmap_destroy(&lports_hmap);
5706 }
5707
5708 static struct dhcp_opts_map supported_dhcp_opts[] = {
5709 OFFERIP,
5710 DHCP_OPT_NETMASK,
5711 DHCP_OPT_ROUTER,
5712 DHCP_OPT_DNS_SERVER,
5713 DHCP_OPT_LOG_SERVER,
5714 DHCP_OPT_LPR_SERVER,
5715 DHCP_OPT_SWAP_SERVER,
5716 DHCP_OPT_POLICY_FILTER,
5717 DHCP_OPT_ROUTER_SOLICITATION,
5718 DHCP_OPT_NIS_SERVER,
5719 DHCP_OPT_NTP_SERVER,
5720 DHCP_OPT_SERVER_ID,
5721 DHCP_OPT_TFTP_SERVER,
5722 DHCP_OPT_CLASSLESS_STATIC_ROUTE,
5723 DHCP_OPT_MS_CLASSLESS_STATIC_ROUTE,
5724 DHCP_OPT_IP_FORWARD_ENABLE,
5725 DHCP_OPT_ROUTER_DISCOVERY,
5726 DHCP_OPT_ETHERNET_ENCAP,
5727 DHCP_OPT_DEFAULT_TTL,
5728 DHCP_OPT_TCP_TTL,
5729 DHCP_OPT_MTU,
5730 DHCP_OPT_LEASE_TIME,
5731 DHCP_OPT_T1,
5732 DHCP_OPT_T2
5733 };
5734
5735 static struct dhcp_opts_map supported_dhcpv6_opts[] = {
5736 DHCPV6_OPT_IA_ADDR,
5737 DHCPV6_OPT_SERVER_ID,
5738 DHCPV6_OPT_DOMAIN_SEARCH,
5739 DHCPV6_OPT_DNS_SERVER
5740 };
5741
5742 static void
5743 check_and_add_supported_dhcp_opts_to_sb_db(struct northd_context *ctx)
5744 {
5745 struct hmap dhcp_opts_to_add = HMAP_INITIALIZER(&dhcp_opts_to_add);
5746 for (size_t i = 0; (i < sizeof(supported_dhcp_opts) /
5747 sizeof(supported_dhcp_opts[0])); i++) {
5748 hmap_insert(&dhcp_opts_to_add, &supported_dhcp_opts[i].hmap_node,
5749 dhcp_opt_hash(supported_dhcp_opts[i].name));
5750 }
5751
5752 const struct sbrec_dhcp_options *opt_row, *opt_row_next;
5753 SBREC_DHCP_OPTIONS_FOR_EACH_SAFE(opt_row, opt_row_next, ctx->ovnsb_idl) {
5754 struct dhcp_opts_map *dhcp_opt =
5755 dhcp_opts_find(&dhcp_opts_to_add, opt_row->name);
5756 if (dhcp_opt) {
5757 hmap_remove(&dhcp_opts_to_add, &dhcp_opt->hmap_node);
5758 } else {
5759 sbrec_dhcp_options_delete(opt_row);
5760 }
5761 }
5762
5763 struct dhcp_opts_map *opt;
5764 HMAP_FOR_EACH (opt, hmap_node, &dhcp_opts_to_add) {
5765 struct sbrec_dhcp_options *sbrec_dhcp_option =
5766 sbrec_dhcp_options_insert(ctx->ovnsb_txn);
5767 sbrec_dhcp_options_set_name(sbrec_dhcp_option, opt->name);
5768 sbrec_dhcp_options_set_code(sbrec_dhcp_option, opt->code);
5769 sbrec_dhcp_options_set_type(sbrec_dhcp_option, opt->type);
5770 }
5771
5772 hmap_destroy(&dhcp_opts_to_add);
5773 }
5774
5775 static void
5776 check_and_add_supported_dhcpv6_opts_to_sb_db(struct northd_context *ctx)
5777 {
5778 struct hmap dhcpv6_opts_to_add = HMAP_INITIALIZER(&dhcpv6_opts_to_add);
5779 for (size_t i = 0; (i < sizeof(supported_dhcpv6_opts) /
5780 sizeof(supported_dhcpv6_opts[0])); i++) {
5781 hmap_insert(&dhcpv6_opts_to_add, &supported_dhcpv6_opts[i].hmap_node,
5782 dhcp_opt_hash(supported_dhcpv6_opts[i].name));
5783 }
5784
5785 const struct sbrec_dhcpv6_options *opt_row, *opt_row_next;
5786 SBREC_DHCPV6_OPTIONS_FOR_EACH_SAFE(opt_row, opt_row_next, ctx->ovnsb_idl) {
5787 struct dhcp_opts_map *dhcp_opt =
5788 dhcp_opts_find(&dhcpv6_opts_to_add, opt_row->name);
5789 if (dhcp_opt) {
5790 hmap_remove(&dhcpv6_opts_to_add, &dhcp_opt->hmap_node);
5791 } else {
5792 sbrec_dhcpv6_options_delete(opt_row);
5793 }
5794 }
5795
5796 struct dhcp_opts_map *opt;
5797 HMAP_FOR_EACH(opt, hmap_node, &dhcpv6_opts_to_add) {
5798 struct sbrec_dhcpv6_options *sbrec_dhcpv6_option =
5799 sbrec_dhcpv6_options_insert(ctx->ovnsb_txn);
5800 sbrec_dhcpv6_options_set_name(sbrec_dhcpv6_option, opt->name);
5801 sbrec_dhcpv6_options_set_code(sbrec_dhcpv6_option, opt->code);
5802 sbrec_dhcpv6_options_set_type(sbrec_dhcpv6_option, opt->type);
5803 }
5804
5805 hmap_destroy(&dhcpv6_opts_to_add);
5806 }
5807
5808 static const char *rbac_chassis_auth[] =
5809 {"name"};
5810 static const char *rbac_chassis_update[] =
5811 {"nb_cfg", "external_ids", "encaps", "vtep_logical_switches"};
5812
5813 static const char *rbac_encap_auth[] =
5814 {""};
5815 static const char *rbac_encap_update[] =
5816 {"type", "options", "ip"};
5817
5818 static const char *rbac_port_binding_auth[] =
5819 {""};
5820 static const char *rbac_port_binding_update[] =
5821 {"chassis"};
5822
5823 static const char *rbac_mac_binding_auth[] =
5824 {""};
5825 static const char *rbac_mac_binding_update[] =
5826 {"logical_port", "ip", "mac", "datapath"};
5827
5828 static struct rbac_perm_cfg {
5829 const char *table;
5830 const char **auth;
5831 int n_auth;
5832 bool insdel;
5833 const char **update;
5834 int n_update;
5835 const struct sbrec_rbac_permission *row;
5836 } rbac_perm_cfg[] = {
5837 {
5838 .table = "Chassis",
5839 .auth = rbac_chassis_auth,
5840 .n_auth = ARRAY_SIZE(rbac_chassis_auth),
5841 .insdel = true,
5842 .update = rbac_chassis_update,
5843 .n_update = ARRAY_SIZE(rbac_chassis_update),
5844 .row = NULL
5845 },{
5846 .table = "Encap",
5847 .auth = rbac_encap_auth,
5848 .n_auth = ARRAY_SIZE(rbac_encap_auth),
5849 .insdel = true,
5850 .update = rbac_encap_update,
5851 .n_update = ARRAY_SIZE(rbac_encap_update),
5852 .row = NULL
5853 },{
5854 .table = "Port_Binding",
5855 .auth = rbac_port_binding_auth,
5856 .n_auth = ARRAY_SIZE(rbac_port_binding_auth),
5857 .insdel = false,
5858 .update = rbac_port_binding_update,
5859 .n_update = ARRAY_SIZE(rbac_port_binding_update),
5860 .row = NULL
5861 },{
5862 .table = "MAC_Binding",
5863 .auth = rbac_mac_binding_auth,
5864 .n_auth = ARRAY_SIZE(rbac_mac_binding_auth),
5865 .insdel = true,
5866 .update = rbac_mac_binding_update,
5867 .n_update = ARRAY_SIZE(rbac_mac_binding_update),
5868 .row = NULL
5869 },{
5870 .table = NULL,
5871 .auth = NULL,
5872 .n_auth = 0,
5873 .insdel = false,
5874 .update = NULL,
5875 .n_update = 0,
5876 .row = NULL
5877 }
5878 };
5879
5880 static bool
5881 ovn_rbac_validate_perm(const struct sbrec_rbac_permission *perm)
5882 {
5883 struct rbac_perm_cfg *pcfg;
5884 int i, j, n_found;
5885
5886 for (pcfg = rbac_perm_cfg; pcfg->table; pcfg++) {
5887 if (!strcmp(perm->table, pcfg->table)) {
5888 break;
5889 }
5890 }
5891 if (!pcfg->table) {
5892 return false;
5893 }
5894 if (perm->n_authorization != pcfg->n_auth ||
5895 perm->n_update != pcfg->n_update) {
5896 return false;
5897 }
5898 if (perm->insert_delete != pcfg->insdel) {
5899 return false;
5900 }
5901 /* verify perm->authorization vs. pcfg->auth */
5902 n_found = 0;
5903 for (i = 0; i < pcfg->n_auth; i++) {
5904 for (j = 0; j < perm->n_authorization; j++) {
5905 if (!strcmp(pcfg->auth[i], perm->authorization[j])) {
5906 n_found++;
5907 break;
5908 }
5909 }
5910 }
5911 if (n_found != pcfg->n_auth) {
5912 return false;
5913 }
5914
5915 /* verify perm->update vs. pcfg->update */
5916 n_found = 0;
5917 for (i = 0; i < pcfg->n_update; i++) {
5918 for (j = 0; j < perm->n_update; j++) {
5919 if (!strcmp(pcfg->update[i], perm->update[j])) {
5920 n_found++;
5921 break;
5922 }
5923 }
5924 }
5925 if (n_found != pcfg->n_update) {
5926 return false;
5927 }
5928
5929 /* Success, db state matches expected state */
5930 pcfg->row = perm;
5931 return true;
5932 }
5933
5934 static void
5935 ovn_rbac_create_perm(struct rbac_perm_cfg *pcfg,
5936 struct northd_context *ctx,
5937 const struct sbrec_rbac_role *rbac_role)
5938 {
5939 struct sbrec_rbac_permission *rbac_perm;
5940
5941 rbac_perm = sbrec_rbac_permission_insert(ctx->ovnsb_txn);
5942 sbrec_rbac_permission_set_table(rbac_perm, pcfg->table);
5943 sbrec_rbac_permission_set_authorization(rbac_perm,
5944 pcfg->auth,
5945 pcfg->n_auth);
5946 sbrec_rbac_permission_set_insert_delete(rbac_perm, pcfg->insdel);
5947 sbrec_rbac_permission_set_update(rbac_perm,
5948 pcfg->update,
5949 pcfg->n_update);
5950 sbrec_rbac_role_update_permissions_setkey(rbac_role, pcfg->table,
5951 rbac_perm);
5952 }
5953
5954 static void
5955 check_and_update_rbac(struct northd_context *ctx)
5956 {
5957 const struct sbrec_rbac_role *rbac_role = NULL;
5958 const struct sbrec_rbac_permission *perm_row, *perm_next;
5959 const struct sbrec_rbac_role *role_row, *role_row_next;
5960 struct rbac_perm_cfg *pcfg;
5961
5962 for (pcfg = rbac_perm_cfg; pcfg->table; pcfg++) {
5963 pcfg->row = NULL;
5964 }
5965
5966 SBREC_RBAC_PERMISSION_FOR_EACH_SAFE (perm_row, perm_next, ctx->ovnsb_idl) {
5967 if (!ovn_rbac_validate_perm(perm_row)) {
5968 sbrec_rbac_permission_delete(perm_row);
5969 }
5970 }
5971 SBREC_RBAC_ROLE_FOR_EACH_SAFE (role_row, role_row_next, ctx->ovnsb_idl) {
5972 if (strcmp(role_row->name, "ovn-controller")) {
5973 sbrec_rbac_role_delete(role_row);
5974 } else {
5975 rbac_role = role_row;
5976 }
5977 }
5978
5979 if (!rbac_role) {
5980 rbac_role = sbrec_rbac_role_insert(ctx->ovnsb_txn);
5981 sbrec_rbac_role_set_name(rbac_role, "ovn-controller");
5982 }
5983
5984 for (pcfg = rbac_perm_cfg; pcfg->table; pcfg++) {
5985 if (!pcfg->row) {
5986 ovn_rbac_create_perm(pcfg, ctx, rbac_role);
5987 }
5988 }
5989 }
5990
5991 /* Updates the sb_cfg and hv_cfg columns in the northbound NB_Global table. */
5992 static void
5993 update_northbound_cfg(struct northd_context *ctx,
5994 struct ovsdb_idl_loop *sb_loop)
5995 {
5996 /* Update northbound sb_cfg if appropriate. */
5997 const struct nbrec_nb_global *nbg = nbrec_nb_global_first(ctx->ovnnb_idl);
5998 int64_t sb_cfg = sb_loop->cur_cfg;
5999 if (nbg && sb_cfg && nbg->sb_cfg != sb_cfg) {
6000 nbrec_nb_global_set_sb_cfg(nbg, sb_cfg);
6001 }
6002
6003 /* Update northbound hv_cfg if appropriate. */
6004 if (nbg) {
6005 /* Find minimum nb_cfg among all chassis. */
6006 const struct sbrec_chassis *chassis;
6007 int64_t hv_cfg = nbg->nb_cfg;
6008 SBREC_CHASSIS_FOR_EACH (chassis, ctx->ovnsb_idl) {
6009 if (chassis->nb_cfg < hv_cfg) {
6010 hv_cfg = chassis->nb_cfg;
6011 }
6012 }
6013
6014 /* Update hv_cfg. */
6015 if (nbg->hv_cfg != hv_cfg) {
6016 nbrec_nb_global_set_hv_cfg(nbg, hv_cfg);
6017 }
6018 }
6019 }
6020
6021 /* Handle a fairly small set of changes in the southbound database. */
6022 static void
6023 ovnsb_db_run(struct northd_context *ctx, struct ovsdb_idl_loop *sb_loop)
6024 {
6025 if (!ctx->ovnnb_txn || !ovsdb_idl_has_ever_connected(ctx->ovnsb_idl)) {
6026 return;
6027 }
6028
6029 update_logical_port_status(ctx);
6030 update_northbound_cfg(ctx, sb_loop);
6031 }
6032 \f
6033 static void
6034 parse_options(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
6035 {
6036 enum {
6037 DAEMON_OPTION_ENUMS,
6038 VLOG_OPTION_ENUMS,
6039 SSL_OPTION_ENUMS,
6040 };
6041 static const struct option long_options[] = {
6042 {"ovnsb-db", required_argument, NULL, 'd'},
6043 {"ovnnb-db", required_argument, NULL, 'D'},
6044 {"help", no_argument, NULL, 'h'},
6045 {"options", no_argument, NULL, 'o'},
6046 {"version", no_argument, NULL, 'V'},
6047 DAEMON_LONG_OPTIONS,
6048 VLOG_LONG_OPTIONS,
6049 STREAM_SSL_LONG_OPTIONS,
6050 {NULL, 0, NULL, 0},
6051 };
6052 char *short_options = ovs_cmdl_long_options_to_short_options(long_options);
6053
6054 for (;;) {
6055 int c;
6056
6057 c = getopt_long(argc, argv, short_options, long_options, NULL);
6058 if (c == -1) {
6059 break;
6060 }
6061
6062 switch (c) {
6063 DAEMON_OPTION_HANDLERS;
6064 VLOG_OPTION_HANDLERS;
6065 STREAM_SSL_OPTION_HANDLERS;
6066
6067 case 'd':
6068 ovnsb_db = optarg;
6069 break;
6070
6071 case 'D':
6072 ovnnb_db = optarg;
6073 break;
6074
6075 case 'h':
6076 usage();
6077 exit(EXIT_SUCCESS);
6078
6079 case 'o':
6080 ovs_cmdl_print_options(long_options);
6081 exit(EXIT_SUCCESS);
6082
6083 case 'V':
6084 ovs_print_version(0, 0);
6085 exit(EXIT_SUCCESS);
6086
6087 default:
6088 break;
6089 }
6090 }
6091
6092 if (!ovnsb_db) {
6093 ovnsb_db = default_sb_db();
6094 }
6095
6096 if (!ovnnb_db) {
6097 ovnnb_db = default_nb_db();
6098 }
6099
6100 free(short_options);
6101 }
6102
6103 static void
6104 add_column_noalert(struct ovsdb_idl *idl,
6105 const struct ovsdb_idl_column *column)
6106 {
6107 ovsdb_idl_add_column(idl, column);
6108 ovsdb_idl_omit_alert(idl, column);
6109 }
6110
6111 int
6112 main(int argc, char *argv[])
6113 {
6114 int res = EXIT_SUCCESS;
6115 struct unixctl_server *unixctl;
6116 int retval;
6117 bool exiting;
6118
6119 fatal_ignore_sigpipe();
6120 ovs_cmdl_proctitle_init(argc, argv);
6121 set_program_name(argv[0]);
6122 service_start(&argc, &argv);
6123 parse_options(argc, argv);
6124
6125 daemonize_start(false);
6126
6127 retval = unixctl_server_create(NULL, &unixctl);
6128 if (retval) {
6129 exit(EXIT_FAILURE);
6130 }
6131 unixctl_command_register("exit", "", 0, 0, ovn_northd_exit, &exiting);
6132
6133 daemonize_complete();
6134
6135 /* We want to detect (almost) all changes to the ovn-nb db. */
6136 struct ovsdb_idl_loop ovnnb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
6137 ovsdb_idl_create(ovnnb_db, &nbrec_idl_class, true, true));
6138 ovsdb_idl_omit_alert(ovnnb_idl_loop.idl, &nbrec_nb_global_col_sb_cfg);
6139 ovsdb_idl_omit_alert(ovnnb_idl_loop.idl, &nbrec_nb_global_col_hv_cfg);
6140
6141 /* We want to detect only selected changes to the ovn-sb db. */
6142 struct ovsdb_idl_loop ovnsb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
6143 ovsdb_idl_create(ovnsb_db, &sbrec_idl_class, false, true));
6144
6145 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_sb_global);
6146 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_sb_global_col_nb_cfg);
6147
6148 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_logical_flow);
6149 add_column_noalert(ovnsb_idl_loop.idl,
6150 &sbrec_logical_flow_col_logical_datapath);
6151 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_pipeline);
6152 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_table_id);
6153 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_priority);
6154 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_match);
6155 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_actions);
6156
6157 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_multicast_group);
6158 add_column_noalert(ovnsb_idl_loop.idl,
6159 &sbrec_multicast_group_col_datapath);
6160 add_column_noalert(ovnsb_idl_loop.idl,
6161 &sbrec_multicast_group_col_tunnel_key);
6162 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_name);
6163 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_ports);
6164
6165 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_datapath_binding);
6166 add_column_noalert(ovnsb_idl_loop.idl,
6167 &sbrec_datapath_binding_col_tunnel_key);
6168 add_column_noalert(ovnsb_idl_loop.idl,
6169 &sbrec_datapath_binding_col_external_ids);
6170
6171 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_port_binding);
6172 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_datapath);
6173 add_column_noalert(ovnsb_idl_loop.idl,
6174 &sbrec_port_binding_col_logical_port);
6175 add_column_noalert(ovnsb_idl_loop.idl,
6176 &sbrec_port_binding_col_tunnel_key);
6177 add_column_noalert(ovnsb_idl_loop.idl,
6178 &sbrec_port_binding_col_parent_port);
6179 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_tag);
6180 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_type);
6181 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_options);
6182 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_mac);
6183 add_column_noalert(ovnsb_idl_loop.idl,
6184 &sbrec_port_binding_col_nat_addresses);
6185 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_port_binding_col_chassis);
6186 add_column_noalert(ovnsb_idl_loop.idl,
6187 &sbrec_port_binding_col_external_ids);
6188 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_mac_binding);
6189 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_datapath);
6190 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_ip);
6191 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_mac);
6192 add_column_noalert(ovnsb_idl_loop.idl,
6193 &sbrec_mac_binding_col_logical_port);
6194 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_dhcp_options);
6195 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_code);
6196 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_type);
6197 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_name);
6198 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_dhcpv6_options);
6199 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_code);
6200 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_type);
6201 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_name);
6202 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_address_set);
6203 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_name);
6204 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_addresses);
6205
6206 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_dns);
6207 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dns_col_datapaths);
6208 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dns_col_records);
6209 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dns_col_external_ids);
6210
6211 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_rbac_role);
6212 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_rbac_role_col_name);
6213 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_rbac_role_col_permissions);
6214
6215 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_rbac_permission);
6216 add_column_noalert(ovnsb_idl_loop.idl,
6217 &sbrec_rbac_permission_col_table);
6218 add_column_noalert(ovnsb_idl_loop.idl,
6219 &sbrec_rbac_permission_col_authorization);
6220 add_column_noalert(ovnsb_idl_loop.idl,
6221 &sbrec_rbac_permission_col_insert_delete);
6222 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_rbac_permission_col_update);
6223
6224 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_chassis);
6225 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_nb_cfg);
6226
6227 /* Main loop. */
6228 exiting = false;
6229 while (!exiting) {
6230 struct northd_context ctx = {
6231 .ovnnb_idl = ovnnb_idl_loop.idl,
6232 .ovnnb_txn = ovsdb_idl_loop_run(&ovnnb_idl_loop),
6233 .ovnsb_idl = ovnsb_idl_loop.idl,
6234 .ovnsb_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop),
6235 };
6236
6237 ovnnb_db_run(&ctx, &ovnsb_idl_loop);
6238 ovnsb_db_run(&ctx, &ovnsb_idl_loop);
6239 if (ctx.ovnsb_txn) {
6240 check_and_add_supported_dhcp_opts_to_sb_db(&ctx);
6241 check_and_add_supported_dhcpv6_opts_to_sb_db(&ctx);
6242 check_and_update_rbac(&ctx);
6243 }
6244
6245 unixctl_server_run(unixctl);
6246 unixctl_server_wait(unixctl);
6247 if (exiting) {
6248 poll_immediate_wake();
6249 }
6250 ovsdb_idl_loop_commit_and_wait(&ovnnb_idl_loop);
6251 ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);
6252
6253 poll_block();
6254 if (should_service_stop()) {
6255 exiting = true;
6256 }
6257 }
6258
6259 unixctl_server_destroy(unixctl);
6260 ovsdb_idl_loop_destroy(&ovnnb_idl_loop);
6261 ovsdb_idl_loop_destroy(&ovnsb_idl_loop);
6262 service_stop();
6263
6264 exit(res);
6265 }
6266
6267 static void
6268 ovn_northd_exit(struct unixctl_conn *conn, int argc OVS_UNUSED,
6269 const char *argv[] OVS_UNUSED, void *exiting_)
6270 {
6271 bool *exiting = exiting_;
6272 *exiting = true;
6273
6274 unixctl_command_reply(conn, NULL);
6275 }