]> git.proxmox.com Git - mirror_ovs.git/blame - ovn/northd/ovn-northd.c
lib: Move lib/poll-loop.h to include/openvswitch
[mirror_ovs.git] / ovn / northd / ovn-northd.c
CommitLineData
ac0630a2
RB
1/*
2 * Licensed under the Apache License, Version 2.0 (the "License");
3 * you may not use this file except in compliance with the License.
4 * You may obtain a copy of the License at:
5 *
6 * http://www.apache.org/licenses/LICENSE-2.0
7 *
8 * Unless required by applicable law or agreed to in writing, software
9 * distributed under the License is distributed on an "AS IS" BASIS,
10 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 * See the License for the specific language governing permissions and
12 * limitations under the License.
13 */
14
15#include <config.h>
16
17#include <getopt.h>
18#include <stdlib.h>
19#include <stdio.h>
20
b511690b 21#include "bitmap.h"
ac0630a2 22#include "command-line.h"
67d9b930 23#include "daemon.h"
ac0630a2 24#include "dirs.h"
3e8a2ad1 25#include "openvswitch/dynamic-string.h"
ac0630a2 26#include "fatal-signal.h"
4edcdcf4 27#include "hash.h"
ee89ea7b
TW
28#include "openvswitch/hmap.h"
29#include "openvswitch/json.h"
8b2ed684 30#include "ovn/lex.h"
b86f4767 31#include "ovn/lib/chassis-index.h"
06a26dd2 32#include "ovn/lib/logical-fields.h"
16936e4d 33#include "ovn/lib/ovn-l7.h"
e3df8838
BP
34#include "ovn/lib/ovn-nb-idl.h"
35#include "ovn/lib/ovn-sb-idl.h"
218351dd 36#include "ovn/lib/ovn-util.h"
a6095f81 37#include "ovn/actions.h"
064d7f84 38#include "packets.h"
fd016ae3 39#include "openvswitch/poll-loop.h"
5868eb24 40#include "smap.h"
7a15be69 41#include "sset.h"
ac0630a2
RB
42#include "stream.h"
43#include "stream-ssl.h"
7b303ff9 44#include "unixctl.h"
ac0630a2 45#include "util.h"
4edcdcf4 46#include "uuid.h"
ac0630a2
RB
47#include "openvswitch/vlog.h"
48
2e2762d4 49VLOG_DEFINE_THIS_MODULE(ovn_northd);
ac0630a2 50
7b303ff9
AW
51static unixctl_cb_func ovn_northd_exit;
52
2e2762d4 53struct northd_context {
f93818dd 54 struct ovsdb_idl *ovnnb_idl;
ec78987f 55 struct ovsdb_idl *ovnsb_idl;
f93818dd 56 struct ovsdb_idl_txn *ovnnb_txn;
3c78b3ca 57 struct ovsdb_idl_txn *ovnsb_txn;
f93818dd
RB
58};
59
ac0630a2 60static const char *ovnnb_db;
ec78987f 61static const char *ovnsb_db;
ac0630a2 62
8639f9be
ND
63#define MAC_ADDR_PREFIX 0x0A0000000000ULL
64#define MAC_ADDR_SPACE 0xffffff
65
66/* MAC address management (macam) table of "struct eth_addr"s, that holds the
67 * MAC addresses allocated by the OVN ipam module. */
68static struct hmap macam = HMAP_INITIALIZER(&macam);
b511690b
GS
69
70#define MAX_OVN_TAGS 4096
880fcd14
BP
71\f
72/* Pipeline stages. */
ac0630a2 73
880fcd14
BP
74/* The two pipelines in an OVN logical flow table. */
75enum ovn_pipeline {
76 P_IN, /* Ingress pipeline. */
77 P_OUT /* Egress pipeline. */
78};
091e3af9 79
880fcd14
BP
80/* The two purposes for which ovn-northd uses OVN logical datapaths. */
81enum ovn_datapath_type {
82 DP_SWITCH, /* OVN logical switch. */
83 DP_ROUTER /* OVN logical router. */
091e3af9
JP
84};
85
880fcd14
BP
86/* Returns an "enum ovn_stage" built from the arguments.
87 *
88 * (It's better to use ovn_stage_build() for type-safety reasons, but inline
89 * functions can't be used in enums or switch cases.) */
90#define OVN_STAGE_BUILD(DP_TYPE, PIPELINE, TABLE) \
91 (((DP_TYPE) << 9) | ((PIPELINE) << 8) | (TABLE))
92
93/* A stage within an OVN logical switch or router.
091e3af9 94 *
880fcd14
BP
95 * An "enum ovn_stage" indicates whether the stage is part of a logical switch
96 * or router, whether the stage is part of the ingress or egress pipeline, and
97 * the table within that pipeline. The first three components are combined to
685f4dfe 98 * form the stage's full name, e.g. S_SWITCH_IN_PORT_SEC_L2,
880fcd14
BP
99 * S_ROUTER_OUT_DELIVERY. */
100enum ovn_stage {
1a03fc7d
BS
101#define PIPELINE_STAGES \
102 /* Logical switch ingress stages. */ \
103 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_L2, 0, "ls_in_port_sec_l2") \
104 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_IP, 1, "ls_in_port_sec_ip") \
105 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_ND, 2, "ls_in_port_sec_nd") \
106 PIPELINE_STAGE(SWITCH, IN, PRE_ACL, 3, "ls_in_pre_acl") \
107 PIPELINE_STAGE(SWITCH, IN, PRE_LB, 4, "ls_in_pre_lb") \
108 PIPELINE_STAGE(SWITCH, IN, PRE_STATEFUL, 5, "ls_in_pre_stateful") \
109 PIPELINE_STAGE(SWITCH, IN, ACL, 6, "ls_in_acl") \
110 PIPELINE_STAGE(SWITCH, IN, QOS_MARK, 7, "ls_in_qos_mark") \
111 PIPELINE_STAGE(SWITCH, IN, LB, 8, "ls_in_lb") \
112 PIPELINE_STAGE(SWITCH, IN, STATEFUL, 9, "ls_in_stateful") \
113 PIPELINE_STAGE(SWITCH, IN, ARP_ND_RSP, 10, "ls_in_arp_rsp") \
114 PIPELINE_STAGE(SWITCH, IN, DHCP_OPTIONS, 11, "ls_in_dhcp_options") \
115 PIPELINE_STAGE(SWITCH, IN, DHCP_RESPONSE, 12, "ls_in_dhcp_response") \
302eda27
NS
116 PIPELINE_STAGE(SWITCH, IN, DNS_LOOKUP, 13, "ls_in_dns_lookup") \
117 PIPELINE_STAGE(SWITCH, IN, DNS_RESPONSE, 14, "ls_in_dns_response") \
118 PIPELINE_STAGE(SWITCH, IN, L2_LKUP, 15, "ls_in_l2_lkup") \
e0c9e58b
JP
119 \
120 /* Logical switch egress stages. */ \
7a15be69
GS
121 PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 0, "ls_out_pre_lb") \
122 PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 1, "ls_out_pre_acl") \
123 PIPELINE_STAGE(SWITCH, OUT, PRE_STATEFUL, 2, "ls_out_pre_stateful") \
124 PIPELINE_STAGE(SWITCH, OUT, LB, 3, "ls_out_lb") \
125 PIPELINE_STAGE(SWITCH, OUT, ACL, 4, "ls_out_acl") \
1a03fc7d
BS
126 PIPELINE_STAGE(SWITCH, OUT, QOS_MARK, 5, "ls_out_qos_mark") \
127 PIPELINE_STAGE(SWITCH, OUT, STATEFUL, 6, "ls_out_stateful") \
128 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_IP, 7, "ls_out_port_sec_ip") \
129 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_L2, 8, "ls_out_port_sec_l2") \
e0c9e58b
JP
130 \
131 /* Logical router ingress stages. */ \
4364646c
ZKL
132 PIPELINE_STAGE(ROUTER, IN, ADMISSION, 0, "lr_in_admission") \
133 PIPELINE_STAGE(ROUTER, IN, IP_INPUT, 1, "lr_in_ip_input") \
134 PIPELINE_STAGE(ROUTER, IN, DEFRAG, 2, "lr_in_defrag") \
135 PIPELINE_STAGE(ROUTER, IN, UNSNAT, 3, "lr_in_unsnat") \
136 PIPELINE_STAGE(ROUTER, IN, DNAT, 4, "lr_in_dnat") \
137 PIPELINE_STAGE(ROUTER, IN, ND_RA_OPTIONS, 5, "lr_in_nd_ra_options") \
138 PIPELINE_STAGE(ROUTER, IN, ND_RA_RESPONSE, 6, "lr_in_nd_ra_response") \
139 PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 7, "lr_in_ip_routing") \
140 PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 8, "lr_in_arp_resolve") \
141 PIPELINE_STAGE(ROUTER, IN, GW_REDIRECT, 9, "lr_in_gw_redirect") \
142 PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 10, "lr_in_arp_request") \
e0c9e58b
JP
143 \
144 /* Logical router egress stages. */ \
06a26dd2
MS
145 PIPELINE_STAGE(ROUTER, OUT, UNDNAT, 0, "lr_out_undnat") \
146 PIPELINE_STAGE(ROUTER, OUT, SNAT, 1, "lr_out_snat") \
147 PIPELINE_STAGE(ROUTER, OUT, EGR_LOOP, 2, "lr_out_egr_loop") \
148 PIPELINE_STAGE(ROUTER, OUT, DELIVERY, 3, "lr_out_delivery")
880fcd14
BP
149
150#define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
151 S_##DP_TYPE##_##PIPELINE##_##STAGE \
152 = OVN_STAGE_BUILD(DP_##DP_TYPE, P_##PIPELINE, TABLE),
153 PIPELINE_STAGES
154#undef PIPELINE_STAGE
091e3af9
JP
155};
156
6bb4a18e
JP
157/* Due to various hard-coded priorities need to implement ACLs, the
158 * northbound database supports a smaller range of ACL priorities than
159 * are available to logical flows. This value is added to an ACL
160 * priority to determine the ACL's logical flow priority. */
161#define OVN_ACL_PRI_OFFSET 1000
162
06a26dd2 163/* Register definitions specific to switches. */
4364646c
ZKL
164#define REGBIT_CONNTRACK_DEFRAG "reg0[0]"
165#define REGBIT_CONNTRACK_COMMIT "reg0[1]"
166#define REGBIT_CONNTRACK_NAT "reg0[2]"
167#define REGBIT_DHCP_OPTS_RESULT "reg0[3]"
302eda27 168#define REGBIT_DNS_LOOKUP_RESULT "reg0[4]"
4364646c 169#define REGBIT_ND_RA_OPTS_RESULT "reg0[5]"
facf8652 170
06a26dd2
MS
171/* Register definitions for switches and routers. */
172#define REGBIT_NAT_REDIRECT "reg9[0]"
173/* Indicate that this packet has been recirculated using egress
174 * loopback. This allows certain checks to be bypassed, such as a
175 * logical router dropping packets with source IP address equals
176 * one of the logical router's own IP addresses. */
177#define REGBIT_EGRESS_LOOPBACK "reg9[1]"
178
880fcd14
BP
179/* Returns an "enum ovn_stage" built from the arguments. */
180static enum ovn_stage
181ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline,
182 uint8_t table)
183{
184 return OVN_STAGE_BUILD(dp_type, pipeline, table);
185}
186
187/* Returns the pipeline to which 'stage' belongs. */
188static enum ovn_pipeline
189ovn_stage_get_pipeline(enum ovn_stage stage)
190{
191 return (stage >> 8) & 1;
192}
193
194/* Returns the table to which 'stage' belongs. */
195static uint8_t
196ovn_stage_get_table(enum ovn_stage stage)
197{
198 return stage & 0xff;
199}
200
201/* Returns a string name for 'stage'. */
202static const char *
203ovn_stage_to_str(enum ovn_stage stage)
204{
205 switch (stage) {
206#define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
207 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return NAME;
208 PIPELINE_STAGES
209#undef PIPELINE_STAGE
210 default: return "<unknown>";
211 }
212}
9a9961d2
BP
213
214/* Returns the type of the datapath to which a flow with the given 'stage' may
215 * be added. */
216static enum ovn_datapath_type
217ovn_stage_to_datapath_type(enum ovn_stage stage)
218{
219 switch (stage) {
220#define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
221 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return DP_##DP_TYPE;
222 PIPELINE_STAGES
223#undef PIPELINE_STAGE
224 default: OVS_NOT_REACHED();
225 }
226}
880fcd14 227\f
ac0630a2
RB
228static void
229usage(void)
230{
231 printf("\
232%s: OVN northbound management daemon\n\
233usage: %s [OPTIONS]\n\
234\n\
235Options:\n\
236 --ovnnb-db=DATABASE connect to ovn-nb database at DATABASE\n\
237 (default: %s)\n\
ec78987f 238 --ovnsb-db=DATABASE connect to ovn-sb database at DATABASE\n\
ac0630a2
RB
239 (default: %s)\n\
240 -h, --help display this help message\n\
241 -o, --options list available options\n\
242 -V, --version display version information\n\
60bdd011 243", program_name, program_name, default_nb_db(), default_sb_db());
67d9b930 244 daemon_usage();
ac0630a2
RB
245 vlog_usage();
246 stream_usage("database", true, true, false);
247}
248\f
5868eb24
BP
249struct tnlid_node {
250 struct hmap_node hmap_node;
251 uint32_t tnlid;
252};
253
254static void
255destroy_tnlids(struct hmap *tnlids)
4edcdcf4 256{
4ec3d7c7
DDP
257 struct tnlid_node *node;
258 HMAP_FOR_EACH_POP (node, hmap_node, tnlids) {
5868eb24
BP
259 free(node);
260 }
261 hmap_destroy(tnlids);
262}
263
264static void
265add_tnlid(struct hmap *set, uint32_t tnlid)
266{
267 struct tnlid_node *node = xmalloc(sizeof *node);
268 hmap_insert(set, &node->hmap_node, hash_int(tnlid, 0));
269 node->tnlid = tnlid;
4edcdcf4
RB
270}
271
4edcdcf4 272static bool
5868eb24 273tnlid_in_use(const struct hmap *set, uint32_t tnlid)
4edcdcf4 274{
5868eb24
BP
275 const struct tnlid_node *node;
276 HMAP_FOR_EACH_IN_BUCKET (node, hmap_node, hash_int(tnlid, 0), set) {
277 if (node->tnlid == tnlid) {
278 return true;
279 }
280 }
281 return false;
282}
4edcdcf4 283
5868eb24
BP
284static uint32_t
285allocate_tnlid(struct hmap *set, const char *name, uint32_t max,
286 uint32_t *hint)
287{
288 for (uint32_t tnlid = *hint + 1; tnlid != *hint;
289 tnlid = tnlid + 1 <= max ? tnlid + 1 : 1) {
290 if (!tnlid_in_use(set, tnlid)) {
291 add_tnlid(set, tnlid);
292 *hint = tnlid;
293 return tnlid;
294 }
4edcdcf4
RB
295 }
296
5868eb24
BP
297 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
298 VLOG_WARN_RL(&rl, "all %s tunnel ids exhausted", name);
299 return 0;
300}
301\f
a6095f81
BS
302struct ovn_chassis_qdisc_queues {
303 struct hmap_node key_node;
304 uint32_t queue_id;
305 struct uuid chassis_uuid;
306};
307
308static void
309destroy_chassis_queues(struct hmap *set)
310{
311 struct ovn_chassis_qdisc_queues *node;
312 HMAP_FOR_EACH_POP (node, key_node, set) {
313 free(node);
314 }
315 hmap_destroy(set);
316}
317
318static void
319add_chassis_queue(struct hmap *set, struct uuid *chassis_uuid,
320 uint32_t queue_id)
321{
322 struct ovn_chassis_qdisc_queues *node = xmalloc(sizeof *node);
323 node->queue_id = queue_id;
324 memcpy(&node->chassis_uuid, chassis_uuid, sizeof node->chassis_uuid);
325 hmap_insert(set, &node->key_node, uuid_hash(chassis_uuid));
326}
327
328static bool
329chassis_queueid_in_use(const struct hmap *set, struct uuid *chassis_uuid,
330 uint32_t queue_id)
331{
332 const struct ovn_chassis_qdisc_queues *node;
333 HMAP_FOR_EACH_WITH_HASH (node, key_node, uuid_hash(chassis_uuid), set) {
334 if (uuid_equals(chassis_uuid, &node->chassis_uuid)
335 && node->queue_id == queue_id) {
336 return true;
337 }
338 }
339 return false;
340}
341
342static uint32_t
343allocate_chassis_queueid(struct hmap *set, struct sbrec_chassis *chassis)
344{
345 for (uint32_t queue_id = QDISC_MIN_QUEUE_ID + 1;
346 queue_id <= QDISC_MAX_QUEUE_ID;
347 queue_id++) {
348 if (!chassis_queueid_in_use(set, &chassis->header_.uuid, queue_id)) {
349 add_chassis_queue(set, &chassis->header_.uuid, queue_id);
350 return queue_id;
351 }
352 }
353
354 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
355 VLOG_WARN_RL(&rl, "all %s queue ids exhausted", chassis->name);
356 return 0;
357}
358
359static void
360free_chassis_queueid(struct hmap *set, struct sbrec_chassis *chassis,
361 uint32_t queue_id)
362{
363 struct ovn_chassis_qdisc_queues *node;
364 HMAP_FOR_EACH_WITH_HASH (node, key_node,
365 uuid_hash(&chassis->header_.uuid),
366 set) {
367 if (uuid_equals(&chassis->header_.uuid, &node->chassis_uuid)
368 && node->queue_id == queue_id) {
369 hmap_remove(set, &node->key_node);
370 break;
371 }
372 }
373}
374
375static inline bool
376port_has_qos_params(const struct smap *opts)
377{
378 return (smap_get(opts, "qos_max_rate") ||
379 smap_get(opts, "qos_burst"));
380}
381\f
161ea2c8
NS
382
383struct ipam_info {
384 uint32_t start_ipv4;
385 size_t total_ipv4s;
386 unsigned long *allocated_ipv4s; /* A bitmap of allocated IPv4s */
7cc0741e
NS
387 bool ipv6_prefix_set;
388 struct in6_addr ipv6_prefix;
161ea2c8
NS
389};
390
9975d7be
BP
391/* The 'key' comes from nbs->header_.uuid or nbr->header_.uuid or
392 * sb->external_ids:logical-switch. */
5868eb24
BP
393struct ovn_datapath {
394 struct hmap_node key_node; /* Index on 'key'. */
9975d7be 395 struct uuid key; /* (nbs/nbr)->header_.uuid. */
4edcdcf4 396
9975d7be
BP
397 const struct nbrec_logical_switch *nbs; /* May be NULL. */
398 const struct nbrec_logical_router *nbr; /* May be NULL. */
5868eb24 399 const struct sbrec_datapath_binding *sb; /* May be NULL. */
4edcdcf4 400
5868eb24 401 struct ovs_list list; /* In list of similar records. */
4edcdcf4 402
9975d7be 403 /* Logical switch data. */
86e98048
BP
404 struct ovn_port **router_ports;
405 size_t n_router_ports;
9975d7be 406
5868eb24
BP
407 struct hmap port_tnlids;
408 uint32_t port_key_hint;
409
410 bool has_unknown;
8639f9be
ND
411
412 /* IPAM data. */
161ea2c8 413 struct ipam_info *ipam_info;
41a15b71
MS
414
415 /* OVN northd only needs to know about the logical router gateway port for
416 * NAT on a distributed router. This "distributed gateway port" is
417 * populated only when there is a "redirect-chassis" specified for one of
418 * the ports on the logical router. Otherwise this will be NULL. */
419 struct ovn_port *l3dgw_port;
420 /* The "derived" OVN port representing the instance of l3dgw_port on
421 * the "redirect-chassis". */
422 struct ovn_port *l3redirect_port;
5b29422c 423 struct ovn_port *localnet_port;
8639f9be
ND
424};
425
426struct macam_node {
427 struct hmap_node hmap_node;
428 struct eth_addr mac_addr; /* Allocated MAC address. */
5868eb24
BP
429};
430
8639f9be
ND
431static void
432cleanup_macam(struct hmap *macam)
433{
434 struct macam_node *node;
435 HMAP_FOR_EACH_POP (node, hmap_node, macam) {
436 free(node);
437 }
438}
439
5868eb24
BP
440static struct ovn_datapath *
441ovn_datapath_create(struct hmap *datapaths, const struct uuid *key,
9975d7be
BP
442 const struct nbrec_logical_switch *nbs,
443 const struct nbrec_logical_router *nbr,
5868eb24
BP
444 const struct sbrec_datapath_binding *sb)
445{
446 struct ovn_datapath *od = xzalloc(sizeof *od);
447 od->key = *key;
448 od->sb = sb;
9975d7be
BP
449 od->nbs = nbs;
450 od->nbr = nbr;
5868eb24
BP
451 hmap_init(&od->port_tnlids);
452 od->port_key_hint = 0;
453 hmap_insert(datapaths, &od->key_node, uuid_hash(&od->key));
454 return od;
455}
456
457static void
458ovn_datapath_destroy(struct hmap *datapaths, struct ovn_datapath *od)
459{
460 if (od) {
461 /* Don't remove od->list. It is used within build_datapaths() as a
462 * private list and once we've exited that function it is not safe to
463 * use it. */
464 hmap_remove(datapaths, &od->key_node);
465 destroy_tnlids(&od->port_tnlids);
161ea2c8
NS
466 if (od->ipam_info) {
467 bitmap_free(od->ipam_info->allocated_ipv4s);
468 free(od->ipam_info);
469 }
86e98048 470 free(od->router_ports);
5868eb24
BP
471 free(od);
472 }
473}
474
9a9961d2
BP
475/* Returns 'od''s datapath type. */
476static enum ovn_datapath_type
477ovn_datapath_get_type(const struct ovn_datapath *od)
478{
479 return od->nbs ? DP_SWITCH : DP_ROUTER;
480}
481
5868eb24
BP
482static struct ovn_datapath *
483ovn_datapath_find(struct hmap *datapaths, const struct uuid *uuid)
484{
485 struct ovn_datapath *od;
486
487 HMAP_FOR_EACH_WITH_HASH (od, key_node, uuid_hash(uuid), datapaths) {
488 if (uuid_equals(uuid, &od->key)) {
489 return od;
490 }
491 }
492 return NULL;
493}
494
495static struct ovn_datapath *
496ovn_datapath_from_sbrec(struct hmap *datapaths,
497 const struct sbrec_datapath_binding *sb)
498{
499 struct uuid key;
500
9975d7be
BP
501 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
502 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
5868eb24
BP
503 return NULL;
504 }
505 return ovn_datapath_find(datapaths, &key);
506}
507
5412db30
J
508static bool
509lrouter_is_enabled(const struct nbrec_logical_router *lrouter)
510{
511 return !lrouter->enabled || *lrouter->enabled;
512}
513
161ea2c8
NS
514static void
515init_ipam_info_for_datapath(struct ovn_datapath *od)
516{
517 if (!od->nbs) {
518 return;
519 }
520
521 const char *subnet_str = smap_get(&od->nbs->other_config, "subnet");
7cc0741e
NS
522 const char *ipv6_prefix = smap_get(&od->nbs->other_config, "ipv6_prefix");
523
524 if (ipv6_prefix) {
525 od->ipam_info = xzalloc(sizeof *od->ipam_info);
526 od->ipam_info->ipv6_prefix_set = ipv6_parse(
527 ipv6_prefix, &od->ipam_info->ipv6_prefix);
528 }
529
161ea2c8
NS
530 if (!subnet_str) {
531 return;
532 }
533
534 ovs_be32 subnet, mask;
535 char *error = ip_parse_masked(subnet_str, &subnet, &mask);
536 if (error || mask == OVS_BE32_MAX || !ip_is_cidr(mask)) {
537 static struct vlog_rate_limit rl
538 = VLOG_RATE_LIMIT_INIT(5, 1);
539 VLOG_WARN_RL(&rl, "bad 'subnet' %s", subnet_str);
540 free(error);
541 return;
542 }
543
7cc0741e
NS
544 if (!od->ipam_info) {
545 od->ipam_info = xzalloc(sizeof *od->ipam_info);
546 }
161ea2c8
NS
547 od->ipam_info->start_ipv4 = ntohl(subnet) + 1;
548 od->ipam_info->total_ipv4s = ~ntohl(mask);
549 od->ipam_info->allocated_ipv4s =
550 bitmap_allocate(od->ipam_info->total_ipv4s);
551
552 /* Mark first IP as taken */
553 bitmap_set1(od->ipam_info->allocated_ipv4s, 0);
554
555 /* Check if there are any reserver IPs (list) to be excluded from IPAM */
556 const char *exclude_ip_list = smap_get(&od->nbs->other_config,
557 "exclude_ips");
558 if (!exclude_ip_list) {
559 return;
560 }
561
562 struct lexer lexer;
563 lexer_init(&lexer, exclude_ip_list);
564 /* exclude_ip_list could be in the format -
565 * "10.0.0.4 10.0.0.10 10.0.0.20..10.0.0.50 10.0.0.100..10.0.0.110".
566 */
567 lexer_get(&lexer);
568 while (lexer.token.type != LEX_T_END) {
569 if (lexer.token.type != LEX_T_INTEGER) {
570 lexer_syntax_error(&lexer, "expecting address");
571 break;
572 }
573 uint32_t start = ntohl(lexer.token.value.ipv4);
574 lexer_get(&lexer);
575
576 uint32_t end = start + 1;
577 if (lexer_match(&lexer, LEX_T_ELLIPSIS)) {
578 if (lexer.token.type != LEX_T_INTEGER) {
579 lexer_syntax_error(&lexer, "expecting address range");
580 break;
581 }
582 end = ntohl(lexer.token.value.ipv4) + 1;
583 lexer_get(&lexer);
584 }
585
586 /* Clamp start...end to fit the subnet. */
587 start = MAX(od->ipam_info->start_ipv4, start);
588 end = MIN(od->ipam_info->start_ipv4 + od->ipam_info->total_ipv4s, end);
589 if (end > start) {
590 bitmap_set_multiple(od->ipam_info->allocated_ipv4s,
591 start - od->ipam_info->start_ipv4,
592 end - start, 1);
593 } else {
594 lexer_error(&lexer, "excluded addresses not in subnet");
595 }
596 }
597 if (lexer.error) {
598 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
599 VLOG_WARN_RL(&rl, "logical switch "UUID_FMT": bad exclude_ips (%s)",
600 UUID_ARGS(&od->key), lexer.error);
601 }
602 lexer_destroy(&lexer);
603}
604
c5fec4f6
BP
605static void
606ovn_datapath_update_external_ids(struct ovn_datapath *od)
607{
608 /* Get the logical-switch or logical-router UUID to set in
609 * external-ids. */
610 char uuid_s[UUID_LEN + 1];
611 sprintf(uuid_s, UUID_FMT, UUID_ARGS(&od->key));
612 const char *key = od->nbs ? "logical-switch" : "logical-router";
613
614 /* Get names to set in external-ids. */
615 const char *name = od->nbs ? od->nbs->name : od->nbr->name;
616 const char *name2 = (od->nbs
617 ? smap_get(&od->nbs->external_ids,
618 "neutron:network_name")
619 : smap_get(&od->nbr->external_ids,
620 "neutron:router_name"));
621
622 /* Set external-ids. */
623 struct smap ids = SMAP_INITIALIZER(&ids);
624 smap_add(&ids, key, uuid_s);
625 smap_add(&ids, "name", name);
626 if (name2 && name2[0]) {
627 smap_add(&ids, "name2", name2);
628 }
629 sbrec_datapath_binding_set_external_ids(od->sb, &ids);
630 smap_destroy(&ids);
631}
632
5868eb24
BP
633static void
634join_datapaths(struct northd_context *ctx, struct hmap *datapaths,
635 struct ovs_list *sb_only, struct ovs_list *nb_only,
636 struct ovs_list *both)
637{
638 hmap_init(datapaths);
417e7e66
BW
639 ovs_list_init(sb_only);
640 ovs_list_init(nb_only);
641 ovs_list_init(both);
5868eb24
BP
642
643 const struct sbrec_datapath_binding *sb, *sb_next;
644 SBREC_DATAPATH_BINDING_FOR_EACH_SAFE (sb, sb_next, ctx->ovnsb_idl) {
645 struct uuid key;
9975d7be
BP
646 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
647 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
648 ovsdb_idl_txn_add_comment(
649 ctx->ovnsb_txn,
650 "deleting Datapath_Binding "UUID_FMT" that lacks "
651 "external-ids:logical-switch and "
652 "external-ids:logical-router",
653 UUID_ARGS(&sb->header_.uuid));
5868eb24
BP
654 sbrec_datapath_binding_delete(sb);
655 continue;
656 }
657
658 if (ovn_datapath_find(datapaths, &key)) {
659 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
9975d7be
BP
660 VLOG_INFO_RL(
661 &rl, "deleting Datapath_Binding "UUID_FMT" with "
662 "duplicate external-ids:logical-switch/router "UUID_FMT,
663 UUID_ARGS(&sb->header_.uuid), UUID_ARGS(&key));
5868eb24
BP
664 sbrec_datapath_binding_delete(sb);
665 continue;
666 }
667
668 struct ovn_datapath *od = ovn_datapath_create(datapaths, &key,
9975d7be 669 NULL, NULL, sb);
417e7e66 670 ovs_list_push_back(sb_only, &od->list);
5868eb24
BP
671 }
672
9975d7be
BP
673 const struct nbrec_logical_switch *nbs;
674 NBREC_LOGICAL_SWITCH_FOR_EACH (nbs, ctx->ovnnb_idl) {
5868eb24 675 struct ovn_datapath *od = ovn_datapath_find(datapaths,
9975d7be 676 &nbs->header_.uuid);
5868eb24 677 if (od) {
9975d7be 678 od->nbs = nbs;
417e7e66
BW
679 ovs_list_remove(&od->list);
680 ovs_list_push_back(both, &od->list);
c5fec4f6 681 ovn_datapath_update_external_ids(od);
5868eb24 682 } else {
9975d7be
BP
683 od = ovn_datapath_create(datapaths, &nbs->header_.uuid,
684 nbs, NULL, NULL);
417e7e66 685 ovs_list_push_back(nb_only, &od->list);
5868eb24 686 }
161ea2c8
NS
687
688 init_ipam_info_for_datapath(od);
5868eb24 689 }
9975d7be
BP
690
691 const struct nbrec_logical_router *nbr;
692 NBREC_LOGICAL_ROUTER_FOR_EACH (nbr, ctx->ovnnb_idl) {
5412db30
J
693 if (!lrouter_is_enabled(nbr)) {
694 continue;
695 }
696
9975d7be
BP
697 struct ovn_datapath *od = ovn_datapath_find(datapaths,
698 &nbr->header_.uuid);
699 if (od) {
700 if (!od->nbs) {
701 od->nbr = nbr;
417e7e66
BW
702 ovs_list_remove(&od->list);
703 ovs_list_push_back(both, &od->list);
c5fec4f6 704 ovn_datapath_update_external_ids(od);
9975d7be
BP
705 } else {
706 /* Can't happen! */
707 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
708 VLOG_WARN_RL(&rl,
709 "duplicate UUID "UUID_FMT" in OVN_Northbound",
710 UUID_ARGS(&nbr->header_.uuid));
711 continue;
712 }
713 } else {
714 od = ovn_datapath_create(datapaths, &nbr->header_.uuid,
715 NULL, nbr, NULL);
417e7e66 716 ovs_list_push_back(nb_only, &od->list);
9975d7be 717 }
9975d7be 718 }
5868eb24
BP
719}
720
721static uint32_t
722ovn_datapath_allocate_key(struct hmap *dp_tnlids)
723{
724 static uint32_t hint;
725 return allocate_tnlid(dp_tnlids, "datapath", (1u << 24) - 1, &hint);
726}
727
0bac7164
BP
728/* Updates the southbound Datapath_Binding table so that it contains the
729 * logical switches and routers specified by the northbound database.
730 *
731 * Initializes 'datapaths' to contain a "struct ovn_datapath" for every logical
732 * switch and router. */
5868eb24
BP
733static void
734build_datapaths(struct northd_context *ctx, struct hmap *datapaths)
735{
736 struct ovs_list sb_only, nb_only, both;
737
738 join_datapaths(ctx, datapaths, &sb_only, &nb_only, &both);
739
417e7e66 740 if (!ovs_list_is_empty(&nb_only)) {
5868eb24
BP
741 /* First index the in-use datapath tunnel IDs. */
742 struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
743 struct ovn_datapath *od;
744 LIST_FOR_EACH (od, list, &both) {
745 add_tnlid(&dp_tnlids, od->sb->tunnel_key);
746 }
747
748 /* Add southbound record for each unmatched northbound record. */
749 LIST_FOR_EACH (od, list, &nb_only) {
750 uint16_t tunnel_key = ovn_datapath_allocate_key(&dp_tnlids);
751 if (!tunnel_key) {
752 break;
753 }
754
755 od->sb = sbrec_datapath_binding_insert(ctx->ovnsb_txn);
c5fec4f6 756 ovn_datapath_update_external_ids(od);
5868eb24
BP
757 sbrec_datapath_binding_set_tunnel_key(od->sb, tunnel_key);
758 }
759 destroy_tnlids(&dp_tnlids);
760 }
761
762 /* Delete southbound records without northbound matches. */
763 struct ovn_datapath *od, *next;
764 LIST_FOR_EACH_SAFE (od, next, list, &sb_only) {
417e7e66 765 ovs_list_remove(&od->list);
5868eb24
BP
766 sbrec_datapath_binding_delete(od->sb);
767 ovn_datapath_destroy(datapaths, od);
768 }
769}
770\f
771struct ovn_port {
772 struct hmap_node key_node; /* Index on 'key'. */
9975d7be
BP
773 char *key; /* nbs->name, nbr->name, sb->logical_port. */
774 char *json_key; /* 'key', quoted for use in JSON. */
5868eb24 775
9975d7be
BP
776 const struct sbrec_port_binding *sb; /* May be NULL. */
777
e93b43d6 778 /* Logical switch port data. */
0ee00741 779 const struct nbrec_logical_switch_port *nbsp; /* May be NULL. */
e93b43d6
JP
780
781 struct lport_addresses *lsp_addrs; /* Logical switch port addresses. */
782 unsigned int n_lsp_addrs;
783
784 struct lport_addresses *ps_addrs; /* Port security addresses. */
785 unsigned int n_ps_addrs;
786
9975d7be 787 /* Logical router port data. */
0ee00741 788 const struct nbrec_logical_router_port *nbrp; /* May be NULL. */
e93b43d6 789
4685e523 790 struct lport_addresses lrp_networks;
c9bdf7bd 791
41a15b71
MS
792 bool derived; /* Indicates whether this is an additional port
793 * derived from nbsp or nbrp. */
794
ad386c3f
BP
795 /* The port's peer:
796 *
797 * - A switch port S of type "router" has a router port R as a peer,
798 * and R in turn has S has its peer.
799 *
800 * - Two connected logical router ports have each other as peer. */
9975d7be 801 struct ovn_port *peer;
5868eb24
BP
802
803 struct ovn_datapath *od;
804
805 struct ovs_list list; /* In list of similar records. */
806};
807
808static struct ovn_port *
809ovn_port_create(struct hmap *ports, const char *key,
0ee00741
HK
810 const struct nbrec_logical_switch_port *nbsp,
811 const struct nbrec_logical_router_port *nbrp,
5868eb24
BP
812 const struct sbrec_port_binding *sb)
813{
814 struct ovn_port *op = xzalloc(sizeof *op);
9975d7be
BP
815
816 struct ds json_key = DS_EMPTY_INITIALIZER;
817 json_string_escape(key, &json_key);
818 op->json_key = ds_steal_cstr(&json_key);
819
820 op->key = xstrdup(key);
5868eb24 821 op->sb = sb;
0ee00741
HK
822 op->nbsp = nbsp;
823 op->nbrp = nbrp;
41a15b71 824 op->derived = false;
5868eb24
BP
825 hmap_insert(ports, &op->key_node, hash_string(op->key, 0));
826 return op;
827}
828
829static void
830ovn_port_destroy(struct hmap *ports, struct ovn_port *port)
831{
832 if (port) {
833 /* Don't remove port->list. It is used within build_ports() as a
834 * private list and once we've exited that function it is not safe to
835 * use it. */
836 hmap_remove(ports, &port->key_node);
e93b43d6
JP
837
838 for (int i = 0; i < port->n_lsp_addrs; i++) {
839 destroy_lport_addresses(&port->lsp_addrs[i]);
840 }
841 free(port->lsp_addrs);
842
843 for (int i = 0; i < port->n_ps_addrs; i++) {
844 destroy_lport_addresses(&port->ps_addrs[i]);
845 }
846 free(port->ps_addrs);
847
4685e523 848 destroy_lport_addresses(&port->lrp_networks);
9975d7be
BP
849 free(port->json_key);
850 free(port->key);
5868eb24
BP
851 free(port);
852 }
853}
854
855static struct ovn_port *
856ovn_port_find(struct hmap *ports, const char *name)
857{
858 struct ovn_port *op;
859
860 HMAP_FOR_EACH_WITH_HASH (op, key_node, hash_string(name, 0), ports) {
861 if (!strcmp(op->key, name)) {
862 return op;
863 }
864 }
865 return NULL;
866}
867
868static uint32_t
869ovn_port_allocate_key(struct ovn_datapath *od)
870{
871 return allocate_tnlid(&od->port_tnlids, "port",
872 (1u << 15) - 1, &od->port_key_hint);
873}
874
41a15b71
MS
875static char *
876chassis_redirect_name(const char *port_name)
877{
878 return xasprintf("cr-%s", port_name);
879}
880
8639f9be
ND
881static bool
882ipam_is_duplicate_mac(struct eth_addr *ea, uint64_t mac64, bool warn)
883{
884 struct macam_node *macam_node;
885 HMAP_FOR_EACH_WITH_HASH (macam_node, hmap_node, hash_uint64(mac64),
886 &macam) {
887 if (eth_addr_equals(*ea, macam_node->mac_addr)) {
888 if (warn) {
889 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
890 VLOG_WARN_RL(&rl, "Duplicate MAC set: "ETH_ADDR_FMT,
891 ETH_ADDR_ARGS(macam_node->mac_addr));
892 }
893 return true;
894 }
895 }
896 return false;
897}
898
8639f9be
ND
899static void
900ipam_insert_mac(struct eth_addr *ea, bool check)
901{
902 if (!ea) {
903 return;
904 }
905
906 uint64_t mac64 = eth_addr_to_uint64(*ea);
907 /* If the new MAC was not assigned by this address management system or
908 * check is true and the new MAC is a duplicate, do not insert it into the
909 * macam hmap. */
910 if (((mac64 ^ MAC_ADDR_PREFIX) >> 24)
911 || (check && ipam_is_duplicate_mac(ea, mac64, true))) {
912 return;
913 }
914
915 struct macam_node *new_macam_node = xmalloc(sizeof *new_macam_node);
916 new_macam_node->mac_addr = *ea;
917 hmap_insert(&macam, &new_macam_node->hmap_node, hash_uint64(mac64));
918}
919
920static void
161ea2c8 921ipam_insert_ip(struct ovn_datapath *od, uint32_t ip)
8639f9be 922{
161ea2c8 923 if (!od || !od->ipam_info || !od->ipam_info->allocated_ipv4s) {
8639f9be
ND
924 return;
925 }
926
161ea2c8
NS
927 if (ip >= od->ipam_info->start_ipv4 &&
928 ip < (od->ipam_info->start_ipv4 + od->ipam_info->total_ipv4s)) {
929 bitmap_set1(od->ipam_info->allocated_ipv4s,
930 ip - od->ipam_info->start_ipv4);
8639f9be 931 }
8639f9be
ND
932}
933
934static void
935ipam_insert_lsp_addresses(struct ovn_datapath *od, struct ovn_port *op,
936 char *address)
937{
938 if (!od || !op || !address || !strcmp(address, "unknown")
20418099 939 || !strcmp(address, "router") || is_dynamic_lsp_address(address)) {
8639f9be
ND
940 return;
941 }
942
943 struct lport_addresses laddrs;
944 if (!extract_lsp_addresses(address, &laddrs)) {
945 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
946 VLOG_WARN_RL(&rl, "Extract addresses failed.");
947 return;
948 }
949 ipam_insert_mac(&laddrs.ea, true);
950
951 /* IP is only added to IPAM if the switch's subnet option
952 * is set, whereas MAC is always added to MACAM. */
161ea2c8 953 if (!od->ipam_info || !od->ipam_info->allocated_ipv4s) {
8639f9be
ND
954 destroy_lport_addresses(&laddrs);
955 return;
956 }
957
958 for (size_t j = 0; j < laddrs.n_ipv4_addrs; j++) {
959 uint32_t ip = ntohl(laddrs.ipv4_addrs[j].addr);
161ea2c8 960 ipam_insert_ip(od, ip);
8639f9be
ND
961 }
962
963 destroy_lport_addresses(&laddrs);
964}
965
966static void
967ipam_add_port_addresses(struct ovn_datapath *od, struct ovn_port *op)
968{
969 if (!od || !op) {
970 return;
971 }
972
973 if (op->nbsp) {
974 /* Add all the port's addresses to address data structures. */
975 for (size_t i = 0; i < op->nbsp->n_addresses; i++) {
976 ipam_insert_lsp_addresses(od, op, op->nbsp->addresses[i]);
977 }
978 if (op->nbsp->dynamic_addresses) {
979 ipam_insert_lsp_addresses(od, op, op->nbsp->dynamic_addresses);
980 }
981 } else if (op->nbrp) {
982 struct lport_addresses lrp_networks;
983 if (!extract_lrp_networks(op->nbrp, &lrp_networks)) {
984 static struct vlog_rate_limit rl
985 = VLOG_RATE_LIMIT_INIT(1, 1);
986 VLOG_WARN_RL(&rl, "Extract addresses failed.");
987 return;
988 }
989 ipam_insert_mac(&lrp_networks.ea, true);
990
991 if (!op->peer || !op->peer->nbsp || !op->peer->od || !op->peer->od->nbs
992 || !smap_get(&op->peer->od->nbs->other_config, "subnet")) {
993 destroy_lport_addresses(&lrp_networks);
994 return;
995 }
996
997 for (size_t i = 0; i < lrp_networks.n_ipv4_addrs; i++) {
998 uint32_t ip = ntohl(lrp_networks.ipv4_addrs[i].addr);
161ea2c8 999 ipam_insert_ip(op->peer->od, ip);
8639f9be
ND
1000 }
1001
1002 destroy_lport_addresses(&lrp_networks);
1003 }
1004}
1005
1006static uint64_t
1007ipam_get_unused_mac(void)
1008{
1009 /* Stores the suffix of the most recently ipam-allocated MAC address. */
1010 static uint32_t last_mac;
1011
1012 uint64_t mac64;
1013 struct eth_addr mac;
1014 uint32_t mac_addr_suffix, i;
1015 for (i = 0; i < MAC_ADDR_SPACE - 1; i++) {
1016 /* The tentative MAC's suffix will be in the interval (1, 0xfffffe). */
1017 mac_addr_suffix = ((last_mac + i) % (MAC_ADDR_SPACE - 1)) + 1;
1018 mac64 = MAC_ADDR_PREFIX | mac_addr_suffix;
1019 eth_addr_from_uint64(mac64, &mac);
1020 if (!ipam_is_duplicate_mac(&mac, mac64, false)) {
1021 last_mac = mac_addr_suffix;
1022 break;
1023 }
1024 }
1025
1026 if (i == MAC_ADDR_SPACE) {
1027 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1028 VLOG_WARN_RL(&rl, "MAC address space exhausted.");
1029 mac64 = 0;
1030 }
1031
1032 return mac64;
1033}
1034
1035static uint32_t
161ea2c8 1036ipam_get_unused_ip(struct ovn_datapath *od)
8639f9be 1037{
161ea2c8 1038 if (!od || !od->ipam_info || !od->ipam_info->allocated_ipv4s) {
8639f9be
ND
1039 return 0;
1040 }
1041
161ea2c8
NS
1042 size_t new_ip_index = bitmap_scan(od->ipam_info->allocated_ipv4s, 0, 0,
1043 od->ipam_info->total_ipv4s - 1);
1044 if (new_ip_index == od->ipam_info->total_ipv4s - 1) {
8639f9be
ND
1045 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1046 VLOG_WARN_RL( &rl, "Subnet address space has been exhausted.");
161ea2c8 1047 return 0;
8639f9be
ND
1048 }
1049
161ea2c8 1050 return od->ipam_info->start_ipv4 + new_ip_index;
8639f9be
ND
1051}
1052
1053static bool
1054ipam_allocate_addresses(struct ovn_datapath *od, struct ovn_port *op,
161ea2c8 1055 const char *addrspec)
8639f9be 1056{
7cc0741e 1057 if (!op->nbsp || !od->ipam_info) {
8639f9be
ND
1058 return false;
1059 }
1060
7cc0741e 1061 /* Get or generate MAC address. */
8639f9be 1062 struct eth_addr mac;
7cc0741e 1063 bool dynamic_mac;
6374d518 1064 int n = 0;
6374d518
LR
1065 if (ovs_scan(addrspec, ETH_ADDR_SCAN_FMT" dynamic%n",
1066 ETH_ADDR_SCAN_ARGS(mac), &n)
1067 && addrspec[n] == '\0') {
7cc0741e 1068 dynamic_mac = false;
6374d518
LR
1069 } else {
1070 uint64_t mac64 = ipam_get_unused_mac();
1071 if (!mac64) {
1072 return false;
1073 }
1074 eth_addr_from_uint64(mac64, &mac);
7cc0741e 1075 dynamic_mac = true;
8639f9be 1076 }
8639f9be 1077
7cc0741e
NS
1078 /* Generate IPv4 address, if desirable. */
1079 bool dynamic_ip4 = od->ipam_info->allocated_ipv4s != NULL;
1080 uint32_t ip4 = dynamic_ip4 ? ipam_get_unused_ip(od) : 0;
8639f9be 1081
7cc0741e
NS
1082 /* Generate IPv6 address, if desirable. */
1083 bool dynamic_ip6 = od->ipam_info->ipv6_prefix_set;
1084 struct in6_addr ip6;
1085 if (dynamic_ip6) {
1086 in6_generate_eui64(mac, &od->ipam_info->ipv6_prefix, &ip6);
1087 }
8639f9be 1088
7cc0741e
NS
1089 /* If we didn't generate anything, bail out. */
1090 if (!dynamic_ip4 && !dynamic_ip6) {
1091 return false;
1092 }
1093
1094 /* Save the dynamic addresses. */
1095 struct ds new_addr = DS_EMPTY_INITIALIZER;
1096 ds_put_format(&new_addr, ETH_ADDR_FMT, ETH_ADDR_ARGS(mac));
1097 if (dynamic_ip4 && ip4) {
1098 ipam_insert_ip(od, ip4);
1099 ds_put_format(&new_addr, " "IP_FMT, IP_ARGS(htonl(ip4)));
1100 }
1101 if (dynamic_ip6) {
1102 char ip6_s[INET6_ADDRSTRLEN + 1];
1103 ipv6_string_mapped(ip6_s, &ip6);
1104 ds_put_format(&new_addr, " %s", ip6_s);
1105 }
1106 ipam_insert_mac(&mac, !dynamic_mac);
1107 nbrec_logical_switch_port_set_dynamic_addresses(op->nbsp,
1108 ds_cstr(&new_addr));
1109 ds_destroy(&new_addr);
8639f9be
ND
1110 return true;
1111}
1112
1113static void
b511690b 1114build_ipam(struct hmap *datapaths, struct hmap *ports)
8639f9be
ND
1115{
1116 /* IPAM generally stands for IP address management. In non-virtualized
1117 * world, MAC addresses come with the hardware. But, with virtualized
1118 * workloads, they need to be assigned and managed. This function
1119 * does both IP address management (ipam) and MAC address management
1120 * (macam). */
1121
8639f9be
ND
1122 /* If the switch's other_config:subnet is set, allocate new addresses for
1123 * ports that have the "dynamic" keyword in their addresses column. */
1124 struct ovn_datapath *od;
1125 HMAP_FOR_EACH (od, key_node, datapaths) {
7cc0741e 1126 if (!od->nbs || !od->ipam_info) {
161ea2c8
NS
1127 continue;
1128 }
1129
1130 struct ovn_port *op;
1131 for (size_t i = 0; i < od->nbs->n_ports; i++) {
1132 const struct nbrec_logical_switch_port *nbsp =
1133 od->nbs->ports[i];
1134
1135 if (!nbsp) {
8639f9be
ND
1136 continue;
1137 }
1138
161ea2c8
NS
1139 op = ovn_port_find(ports, nbsp->name);
1140 if (!op || (op->nbsp && op->peer)) {
1141 /* Do not allocate addresses for logical switch ports that
1142 * have a peer. */
8639f9be
ND
1143 continue;
1144 }
1145
161ea2c8
NS
1146 for (size_t j = 0; j < nbsp->n_addresses; j++) {
1147 if (is_dynamic_lsp_address(nbsp->addresses[j])
1148 && !nbsp->dynamic_addresses) {
1149 if (!ipam_allocate_addresses(od, op, nbsp->addresses[j])
1150 || !extract_lsp_addresses(nbsp->dynamic_addresses,
1151 &op->lsp_addrs[op->n_lsp_addrs])) {
1152 static struct vlog_rate_limit rl
1153 = VLOG_RATE_LIMIT_INIT(1, 1);
1154 VLOG_INFO_RL(&rl, "Failed to allocate address.");
1155 } else {
1156 op->n_lsp_addrs++;
8639f9be 1157 }
161ea2c8 1158 break;
8639f9be
ND
1159 }
1160 }
161ea2c8
NS
1161
1162 if (!nbsp->n_addresses && nbsp->dynamic_addresses) {
1163 nbrec_logical_switch_port_set_dynamic_addresses(op->nbsp,
1164 NULL);
1165 }
8639f9be
ND
1166 }
1167 }
1168}
1169\f
b511690b
GS
1170/* Tag allocation for nested containers.
1171 *
1172 * For a logical switch port with 'parent_name' and a request to allocate tags,
1173 * keeps a track of all allocated tags. */
1174struct tag_alloc_node {
1175 struct hmap_node hmap_node;
1176 char *parent_name;
1177 unsigned long *allocated_tags; /* A bitmap to track allocated tags. */
1178};
1179
1180static void
1181tag_alloc_destroy(struct hmap *tag_alloc_table)
1182{
1183 struct tag_alloc_node *node;
1184 HMAP_FOR_EACH_POP (node, hmap_node, tag_alloc_table) {
1185 bitmap_free(node->allocated_tags);
1186 free(node->parent_name);
1187 free(node);
1188 }
1189 hmap_destroy(tag_alloc_table);
1190}
1191
1192static struct tag_alloc_node *
1193tag_alloc_get_node(struct hmap *tag_alloc_table, const char *parent_name)
1194{
1195 /* If a node for the 'parent_name' exists, return it. */
1196 struct tag_alloc_node *tag_alloc_node;
1197 HMAP_FOR_EACH_WITH_HASH (tag_alloc_node, hmap_node,
1198 hash_string(parent_name, 0),
1199 tag_alloc_table) {
1200 if (!strcmp(tag_alloc_node->parent_name, parent_name)) {
1201 return tag_alloc_node;
1202 }
1203 }
1204
1205 /* Create a new node. */
1206 tag_alloc_node = xmalloc(sizeof *tag_alloc_node);
1207 tag_alloc_node->parent_name = xstrdup(parent_name);
1208 tag_alloc_node->allocated_tags = bitmap_allocate(MAX_OVN_TAGS);
1209 /* Tag 0 is invalid for nested containers. */
1210 bitmap_set1(tag_alloc_node->allocated_tags, 0);
1211 hmap_insert(tag_alloc_table, &tag_alloc_node->hmap_node,
1212 hash_string(parent_name, 0));
1213
1214 return tag_alloc_node;
1215}
1216
1217static void
1218tag_alloc_add_existing_tags(struct hmap *tag_alloc_table,
1219 const struct nbrec_logical_switch_port *nbsp)
1220{
1221 /* Add the tags of already existing nested containers. If there is no
1222 * 'nbsp->parent_name' or no 'nbsp->tag' set, there is nothing to do. */
1223 if (!nbsp->parent_name || !nbsp->parent_name[0] || !nbsp->tag) {
1224 return;
1225 }
1226
1227 struct tag_alloc_node *tag_alloc_node;
1228 tag_alloc_node = tag_alloc_get_node(tag_alloc_table, nbsp->parent_name);
1229 bitmap_set1(tag_alloc_node->allocated_tags, *nbsp->tag);
1230}
1231
1232static void
1233tag_alloc_create_new_tag(struct hmap *tag_alloc_table,
1234 const struct nbrec_logical_switch_port *nbsp)
1235{
1236 if (!nbsp->tag_request) {
1237 return;
1238 }
1239
1240 if (nbsp->parent_name && nbsp->parent_name[0]
1241 && *nbsp->tag_request == 0) {
1242 /* For nested containers that need allocation, do the allocation. */
1243
1244 if (nbsp->tag) {
1245 /* This has already been allocated. */
1246 return;
1247 }
1248
1249 struct tag_alloc_node *tag_alloc_node;
1250 int64_t tag;
1251 tag_alloc_node = tag_alloc_get_node(tag_alloc_table,
1252 nbsp->parent_name);
1253 tag = bitmap_scan(tag_alloc_node->allocated_tags, 0, 1, MAX_OVN_TAGS);
1254 if (tag == MAX_OVN_TAGS) {
1255 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1256 VLOG_ERR_RL(&rl, "out of vlans for logical switch ports with "
1257 "parent %s", nbsp->parent_name);
1258 return;
1259 }
1260 bitmap_set1(tag_alloc_node->allocated_tags, tag);
1261 nbrec_logical_switch_port_set_tag(nbsp, &tag, 1);
1262 } else if (*nbsp->tag_request != 0) {
1263 /* For everything else, copy the contents of 'tag_request' to 'tag'. */
1264 nbrec_logical_switch_port_set_tag(nbsp, nbsp->tag_request, 1);
1265 }
1266}
1267\f
8639f9be 1268
6c4f7a8a
NS
1269/*
1270 * This function checks if the MAC in "address" parameter (if present) is
1271 * different from the one stored in Logical_Switch_Port.dynamic_addresses
1272 * and updates it.
1273 */
1274static void
1275check_and_update_mac_in_dynamic_addresses(
1276 const char *address,
1277 const struct nbrec_logical_switch_port *nbsp)
1278{
1279 if (!nbsp->dynamic_addresses) {
1280 return;
1281 }
1282 int buf_index = 0;
1283 struct eth_addr ea;
1284 if (!ovs_scan_len(address, &buf_index,
1285 ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(ea))) {
1286 return;
1287 }
1288
1289 struct eth_addr present_ea;
1290 buf_index = 0;
1291 if (ovs_scan_len(nbsp->dynamic_addresses, &buf_index,
1292 ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(present_ea))
1293 && !eth_addr_equals(ea, present_ea)) {
1294 /* MAC address has changed. Update it */
1295 char *new_addr = xasprintf(
1296 ETH_ADDR_FMT"%s", ETH_ADDR_ARGS(ea),
1297 &nbsp->dynamic_addresses[buf_index]);
1298 nbrec_logical_switch_port_set_dynamic_addresses(
1299 nbsp, new_addr);
1300 free(new_addr);
1301 }
1302}
1303
5868eb24
BP
1304static void
1305join_logical_ports(struct northd_context *ctx,
1306 struct hmap *datapaths, struct hmap *ports,
a6095f81 1307 struct hmap *chassis_qdisc_queues,
b511690b
GS
1308 struct hmap *tag_alloc_table, struct ovs_list *sb_only,
1309 struct ovs_list *nb_only, struct ovs_list *both)
5868eb24
BP
1310{
1311 hmap_init(ports);
417e7e66
BW
1312 ovs_list_init(sb_only);
1313 ovs_list_init(nb_only);
1314 ovs_list_init(both);
5868eb24
BP
1315
1316 const struct sbrec_port_binding *sb;
1317 SBREC_PORT_BINDING_FOR_EACH (sb, ctx->ovnsb_idl) {
1318 struct ovn_port *op = ovn_port_create(ports, sb->logical_port,
9975d7be 1319 NULL, NULL, sb);
417e7e66 1320 ovs_list_push_back(sb_only, &op->list);
5868eb24
BP
1321 }
1322
1323 struct ovn_datapath *od;
1324 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
1325 if (od->nbs) {
1326 for (size_t i = 0; i < od->nbs->n_ports; i++) {
0ee00741
HK
1327 const struct nbrec_logical_switch_port *nbsp
1328 = od->nbs->ports[i];
1329 struct ovn_port *op = ovn_port_find(ports, nbsp->name);
9975d7be 1330 if (op) {
0ee00741 1331 if (op->nbsp || op->nbrp) {
9975d7be
BP
1332 static struct vlog_rate_limit rl
1333 = VLOG_RATE_LIMIT_INIT(5, 1);
1334 VLOG_WARN_RL(&rl, "duplicate logical port %s",
0ee00741 1335 nbsp->name);
9975d7be
BP
1336 continue;
1337 }
0ee00741 1338 op->nbsp = nbsp;
417e7e66 1339 ovs_list_remove(&op->list);
a6095f81
BS
1340
1341 uint32_t queue_id = smap_get_int(&op->sb->options,
1342 "qdisc_queue_id", 0);
1343 if (queue_id && op->sb->chassis) {
1344 add_chassis_queue(
1345 chassis_qdisc_queues, &op->sb->chassis->header_.uuid,
1346 queue_id);
1347 }
1348
417e7e66 1349 ovs_list_push_back(both, &op->list);
e93b43d6
JP
1350
1351 /* This port exists due to a SB binding, but should
1352 * not have been initialized fully. */
1353 ovs_assert(!op->n_lsp_addrs && !op->n_ps_addrs);
9975d7be 1354 } else {
0ee00741 1355 op = ovn_port_create(ports, nbsp->name, nbsp, NULL, NULL);
417e7e66 1356 ovs_list_push_back(nb_only, &op->list);
9975d7be
BP
1357 }
1358
5b29422c 1359 if (!strcmp(nbsp->type, "localnet")) {
1360 od->localnet_port = op;
1361 }
1362
e93b43d6 1363 op->lsp_addrs
0ee00741
HK
1364 = xmalloc(sizeof *op->lsp_addrs * nbsp->n_addresses);
1365 for (size_t j = 0; j < nbsp->n_addresses; j++) {
20418099
MS
1366 if (!strcmp(nbsp->addresses[j], "unknown")
1367 || !strcmp(nbsp->addresses[j], "router")) {
e93b43d6
JP
1368 continue;
1369 }
6374d518 1370 if (is_dynamic_lsp_address(nbsp->addresses[j])) {
8639f9be 1371 if (nbsp->dynamic_addresses) {
6c4f7a8a
NS
1372 check_and_update_mac_in_dynamic_addresses(
1373 nbsp->addresses[j], nbsp);
8639f9be
ND
1374 if (!extract_lsp_addresses(nbsp->dynamic_addresses,
1375 &op->lsp_addrs[op->n_lsp_addrs])) {
1376 static struct vlog_rate_limit rl
1377 = VLOG_RATE_LIMIT_INIT(1, 1);
1378 VLOG_INFO_RL(&rl, "invalid syntax '%s' in "
1379 "logical switch port "
1380 "dynamic_addresses. No "
1381 "MAC address found",
1382 op->nbsp->dynamic_addresses);
1383 continue;
1384 }
1385 } else {
1386 continue;
1387 }
1388 } else if (!extract_lsp_addresses(nbsp->addresses[j],
e93b43d6
JP
1389 &op->lsp_addrs[op->n_lsp_addrs])) {
1390 static struct vlog_rate_limit rl
1391 = VLOG_RATE_LIMIT_INIT(1, 1);
1392 VLOG_INFO_RL(&rl, "invalid syntax '%s' in logical "
1393 "switch port addresses. No MAC "
1394 "address found",
0ee00741 1395 op->nbsp->addresses[j]);
e93b43d6
JP
1396 continue;
1397 }
1398 op->n_lsp_addrs++;
1399 }
1400
1401 op->ps_addrs
0ee00741
HK
1402 = xmalloc(sizeof *op->ps_addrs * nbsp->n_port_security);
1403 for (size_t j = 0; j < nbsp->n_port_security; j++) {
1404 if (!extract_lsp_addresses(nbsp->port_security[j],
e93b43d6
JP
1405 &op->ps_addrs[op->n_ps_addrs])) {
1406 static struct vlog_rate_limit rl
1407 = VLOG_RATE_LIMIT_INIT(1, 1);
1408 VLOG_INFO_RL(&rl, "invalid syntax '%s' in port "
1409 "security. No MAC address found",
0ee00741 1410 op->nbsp->port_security[j]);
e93b43d6
JP
1411 continue;
1412 }
1413 op->n_ps_addrs++;
1414 }
1415
9975d7be 1416 op->od = od;
8639f9be 1417 ipam_add_port_addresses(od, op);
b511690b 1418 tag_alloc_add_existing_tags(tag_alloc_table, nbsp);
9975d7be
BP
1419 }
1420 } else {
1421 for (size_t i = 0; i < od->nbr->n_ports; i++) {
0ee00741
HK
1422 const struct nbrec_logical_router_port *nbrp
1423 = od->nbr->ports[i];
9975d7be 1424
4685e523 1425 struct lport_addresses lrp_networks;
0ee00741 1426 if (!extract_lrp_networks(nbrp, &lrp_networks)) {
9975d7be
BP
1427 static struct vlog_rate_limit rl
1428 = VLOG_RATE_LIMIT_INIT(5, 1);
0ee00741 1429 VLOG_WARN_RL(&rl, "bad 'mac' %s", nbrp->mac);
9975d7be
BP
1430 continue;
1431 }
1432
4685e523 1433 if (!lrp_networks.n_ipv4_addrs && !lrp_networks.n_ipv6_addrs) {
9975d7be
BP
1434 continue;
1435 }
1436
0ee00741 1437 struct ovn_port *op = ovn_port_find(ports, nbrp->name);
9975d7be 1438 if (op) {
0ee00741 1439 if (op->nbsp || op->nbrp) {
9975d7be
BP
1440 static struct vlog_rate_limit rl
1441 = VLOG_RATE_LIMIT_INIT(5, 1);
1442 VLOG_WARN_RL(&rl, "duplicate logical router port %s",
0ee00741 1443 nbrp->name);
9975d7be
BP
1444 continue;
1445 }
0ee00741 1446 op->nbrp = nbrp;
417e7e66
BW
1447 ovs_list_remove(&op->list);
1448 ovs_list_push_back(both, &op->list);
4685e523
JP
1449
1450 /* This port exists but should not have been
1451 * initialized fully. */
1452 ovs_assert(!op->lrp_networks.n_ipv4_addrs
1453 && !op->lrp_networks.n_ipv6_addrs);
9975d7be 1454 } else {
0ee00741 1455 op = ovn_port_create(ports, nbrp->name, NULL, nbrp, NULL);
417e7e66 1456 ovs_list_push_back(nb_only, &op->list);
9975d7be
BP
1457 }
1458
4685e523 1459 op->lrp_networks = lrp_networks;
9975d7be 1460 op->od = od;
8639f9be 1461 ipam_add_port_addresses(op->od, op);
41a15b71
MS
1462
1463 const char *redirect_chassis = smap_get(&op->nbrp->options,
1464 "redirect-chassis");
b86f4767 1465 if (redirect_chassis || op->nbrp->n_gateway_chassis) {
41a15b71
MS
1466 /* Additional "derived" ovn_port crp represents the
1467 * instance of op on the "redirect-chassis". */
1468 const char *gw_chassis = smap_get(&op->od->nbr->options,
1469 "chassis");
1470 if (gw_chassis) {
1471 static struct vlog_rate_limit rl
1472 = VLOG_RATE_LIMIT_INIT(1, 1);
1473 VLOG_WARN_RL(&rl, "Bad configuration: "
1474 "redirect-chassis configured on port %s "
1475 "on L3 gateway router", nbrp->name);
1476 continue;
1477 }
26b9e08d
MS
1478 if (od->l3dgw_port || od->l3redirect_port) {
1479 static struct vlog_rate_limit rl
1480 = VLOG_RATE_LIMIT_INIT(1, 1);
1481 VLOG_WARN_RL(&rl, "Bad configuration: multiple ports "
1482 "with redirect-chassis on same logical "
1483 "router %s", od->nbr->name);
1484 continue;
1485 }
1486
41a15b71
MS
1487 char *redirect_name = chassis_redirect_name(nbrp->name);
1488 struct ovn_port *crp = ovn_port_find(ports, redirect_name);
1489 if (crp) {
1490 crp->derived = true;
1491 crp->nbrp = nbrp;
1492 ovs_list_remove(&crp->list);
1493 ovs_list_push_back(both, &crp->list);
1494 } else {
1495 crp = ovn_port_create(ports, redirect_name,
1496 NULL, nbrp, NULL);
1497 crp->derived = true;
1498 ovs_list_push_back(nb_only, &crp->list);
1499 }
1500 crp->od = od;
1501 free(redirect_name);
1502
1503 /* Set l3dgw_port and l3redirect_port in od, for later
1504 * use during flow creation. */
26b9e08d
MS
1505 od->l3dgw_port = op;
1506 od->l3redirect_port = crp;
41a15b71 1507 }
5868eb24 1508 }
9975d7be
BP
1509 }
1510 }
1511
1512 /* Connect logical router ports, and logical switch ports of type "router",
1513 * to their peers. */
1514 struct ovn_port *op;
1515 HMAP_FOR_EACH (op, key_node, ports) {
41a15b71 1516 if (op->nbsp && !strcmp(op->nbsp->type, "router") && !op->derived) {
0ee00741 1517 const char *peer_name = smap_get(&op->nbsp->options, "router-port");
9975d7be
BP
1518 if (!peer_name) {
1519 continue;
1520 }
1521
1522 struct ovn_port *peer = ovn_port_find(ports, peer_name);
0ee00741 1523 if (!peer || !peer->nbrp) {
9975d7be
BP
1524 continue;
1525 }
1526
1527 peer->peer = op;
1528 op->peer = peer;
86e98048
BP
1529 op->od->router_ports = xrealloc(
1530 op->od->router_ports,
1531 sizeof *op->od->router_ports * (op->od->n_router_ports + 1));
1532 op->od->router_ports[op->od->n_router_ports++] = op;
20418099
MS
1533
1534 /* Fill op->lsp_addrs for op->nbsp->addresses[] with
1535 * contents "router", which was skipped in the loop above. */
1536 for (size_t j = 0; j < op->nbsp->n_addresses; j++) {
1537 if (!strcmp(op->nbsp->addresses[j], "router")) {
1538 if (extract_lrp_networks(peer->nbrp,
1539 &op->lsp_addrs[op->n_lsp_addrs])) {
1540 op->n_lsp_addrs++;
1541 }
1542 break;
1543 }
1544 }
41a15b71 1545 } else if (op->nbrp && op->nbrp->peer && !op->derived) {
ad386c3f
BP
1546 struct ovn_port *peer = ovn_port_find(ports, op->nbrp->peer);
1547 if (peer) {
1548 if (peer->nbrp) {
1549 op->peer = peer;
60fa6dbb 1550 } else if (peer->nbsp) {
ad386c3f
BP
1551 /* An ovn_port for a switch port of type "router" does have
1552 * a router port as its peer (see the case above for
1553 * "router" ports), but this is set via options:router-port
1554 * in Logical_Switch_Port and does not involve the
1555 * Logical_Router_Port's 'peer' column. */
1556 static struct vlog_rate_limit rl =
1557 VLOG_RATE_LIMIT_INIT(5, 1);
1558 VLOG_WARN_RL(&rl, "Bad configuration: The peer of router "
1559 "port %s is a switch port", op->key);
1560 }
1561 }
5868eb24
BP
1562 }
1563 }
1564}
1565
e914fb54
MS
1566static void
1567ip_address_and_port_from_lb_key(const char *key, char **ip_address,
485d373b 1568 uint16_t *port, int *addr_family);
e914fb54
MS
1569
1570static void
1571get_router_load_balancer_ips(const struct ovn_datapath *od,
485d373b 1572 struct sset *all_ips, int *addr_family)
e914fb54
MS
1573{
1574 if (!od->nbr) {
1575 return;
1576 }
1577
1578 for (int i = 0; i < od->nbr->n_load_balancer; i++) {
1579 struct nbrec_load_balancer *lb = od->nbr->load_balancer[i];
1580 struct smap *vips = &lb->vips;
1581 struct smap_node *node;
1582
1583 SMAP_FOR_EACH (node, vips) {
1584 /* node->key contains IP:port or just IP. */
1585 char *ip_address = NULL;
1586 uint16_t port;
1587
485d373b
MM
1588 ip_address_and_port_from_lb_key(node->key, &ip_address, &port,
1589 addr_family);
e914fb54
MS
1590 if (!ip_address) {
1591 continue;
1592 }
1593
1594 if (!sset_contains(all_ips, ip_address)) {
1595 sset_add(all_ips, ip_address);
1596 }
1597
1598 free(ip_address);
1599 }
1600 }
1601}
1602
f40c5588
MS
1603/* Returns an array of strings, each consisting of a MAC address followed
1604 * by one or more IP addresses, and if the port is a distributed gateway
1605 * port, followed by 'is_chassis_resident("LPORT_NAME")', where the
1606 * LPORT_NAME is the name of the L3 redirect port or the name of the
1607 * logical_port specified in a NAT rule. These strings include the
1608 * external IP addresses of all NAT rules defined on that router, and all
1609 * of the IP addresses used in load balancer VIPs defined on that router.
e914fb54 1610 *
f40c5588
MS
1611 * The caller must free each of the n returned strings with free(),
1612 * and must free the returned array when it is no longer needed. */
1613static char **
1614get_nat_addresses(const struct ovn_port *op, size_t *n)
e914fb54 1615{
f40c5588 1616 size_t n_nats = 0;
e914fb54
MS
1617 struct eth_addr mac;
1618 if (!op->nbrp || !op->od || !op->od->nbr
1619 || (!op->od->nbr->n_nat && !op->od->nbr->n_load_balancer)
1620 || !eth_addr_from_string(op->nbrp->mac, &mac)) {
f40c5588 1621 *n = n_nats;
e914fb54
MS
1622 return NULL;
1623 }
1624
f40c5588
MS
1625 struct ds c_addresses = DS_EMPTY_INITIALIZER;
1626 ds_put_format(&c_addresses, ETH_ADDR_FMT, ETH_ADDR_ARGS(mac));
1627 bool central_ip_address = false;
1628
1629 char **addresses;
1630 addresses = xmalloc(sizeof *addresses * (op->od->nbr->n_nat + 1));
e914fb54
MS
1631
1632 /* Get NAT IP addresses. */
f40c5588 1633 for (size_t i = 0; i < op->od->nbr->n_nat; i++) {
e914fb54
MS
1634 const struct nbrec_nat *nat = op->od->nbr->nat[i];
1635 ovs_be32 ip, mask;
1636
1637 char *error = ip_parse_masked(nat->external_ip, &ip, &mask);
1638 if (error || mask != OVS_BE32_MAX) {
1639 free(error);
1640 continue;
1641 }
26b9e08d
MS
1642
1643 /* Determine whether this NAT rule satisfies the conditions for
1644 * distributed NAT processing. */
1645 if (op->od->l3redirect_port && !strcmp(nat->type, "dnat_and_snat")
1646 && nat->logical_port && nat->external_mac) {
1647 /* Distributed NAT rule. */
f40c5588
MS
1648 if (eth_addr_from_string(nat->external_mac, &mac)) {
1649 struct ds address = DS_EMPTY_INITIALIZER;
1650 ds_put_format(&address, ETH_ADDR_FMT, ETH_ADDR_ARGS(mac));
1651 ds_put_format(&address, " %s", nat->external_ip);
1652 ds_put_format(&address, " is_chassis_resident(\"%s\")",
1653 nat->logical_port);
1654 addresses[n_nats++] = ds_steal_cstr(&address);
1655 }
26b9e08d
MS
1656 } else {
1657 /* Centralized NAT rule, either on gateway router or distributed
1658 * router. */
f40c5588
MS
1659 ds_put_format(&c_addresses, " %s", nat->external_ip);
1660 central_ip_address = true;
26b9e08d 1661 }
e914fb54
MS
1662 }
1663
1664 /* A set to hold all load-balancer vips. */
1665 struct sset all_ips = SSET_INITIALIZER(&all_ips);
485d373b
MM
1666 int addr_family;
1667 get_router_load_balancer_ips(op->od, &all_ips, &addr_family);
e914fb54
MS
1668
1669 const char *ip_address;
1670 SSET_FOR_EACH (ip_address, &all_ips) {
f40c5588
MS
1671 ds_put_format(&c_addresses, " %s", ip_address);
1672 central_ip_address = true;
e914fb54
MS
1673 }
1674 sset_destroy(&all_ips);
1675
f40c5588
MS
1676 if (central_ip_address) {
1677 /* Gratuitous ARP for centralized NAT rules on distributed gateway
1678 * ports should be restricted to the "redirect-chassis". */
1679 if (op->od->l3redirect_port) {
1680 ds_put_format(&c_addresses, " is_chassis_resident(%s)",
1681 op->od->l3redirect_port->json_key);
1682 }
1683
1684 addresses[n_nats++] = ds_steal_cstr(&c_addresses);
26b9e08d
MS
1685 }
1686
f40c5588
MS
1687 *n = n_nats;
1688
1689 return addresses;
e914fb54
MS
1690}
1691
b86f4767 1692static bool
1693gateway_chassis_equal(const struct nbrec_gateway_chassis *nb_gwc,
1694 const struct sbrec_chassis *nb_gwc_c,
1695 const struct sbrec_gateway_chassis *sb_gwc)
1696{
79371ff5 1697 bool equal = !strcmp(nb_gwc->name, sb_gwc->name)
1698 && nb_gwc->priority == sb_gwc->priority
1699 && smap_equal(&nb_gwc->options, &sb_gwc->options)
1700 && smap_equal(&nb_gwc->external_ids, &sb_gwc->external_ids);
1701
1702 if (!equal) {
1703 return false;
1704 }
1705
1706 /* If everything else matched and we were unable to find the SBDB
1707 * Chassis entry at this time, assume a match and return true.
1708 * This happens when an ovn-controller is restarting and the Chassis
1709 * entry is gone away momentarily */
1710 return !nb_gwc_c
1711 || (sb_gwc->chassis && !strcmp(nb_gwc_c->name,
1712 sb_gwc->chassis->name));
b86f4767 1713}
1714
1715static bool
1716sbpb_gw_chassis_needs_update(
1717 const struct sbrec_port_binding *port_binding,
1718 const struct nbrec_logical_router_port *lrp,
1719 const struct chassis_index *chassis_index)
1720{
1721 if (!lrp || !port_binding) {
1722 return false;
1723 }
1724
1725 /* These arrays are used to collect valid Gateway_Chassis and valid
1726 * Chassis records from the Logical_Router_Port Gateway_Chassis list,
1727 * we ignore the ones we can't match on the SBDB */
1728 struct nbrec_gateway_chassis **lrp_gwc = xzalloc(lrp->n_gateway_chassis *
1729 sizeof *lrp_gwc);
1730 const struct sbrec_chassis **lrp_gwc_c = xzalloc(lrp->n_gateway_chassis *
1731 sizeof *lrp_gwc_c);
1732
1733 /* Count the number of gateway chassis chassis names from the logical
1734 * router port that we are able to match on the southbound database */
1735 int lrp_n_gateway_chassis = 0;
1736 int n;
1737 for (n = 0; n < lrp->n_gateway_chassis; n++) {
1738
1739 if (!lrp->gateway_chassis[n]->chassis_name) {
1740 continue;
1741 }
1742
1743 const struct sbrec_chassis *chassis =
1744 chassis_lookup_by_name(chassis_index,
1745 lrp->gateway_chassis[n]->chassis_name);
1746
79371ff5 1747 lrp_gwc_c[lrp_n_gateway_chassis] = chassis;
1748 lrp_gwc[lrp_n_gateway_chassis] = lrp->gateway_chassis[n];
1749 lrp_n_gateway_chassis++;
1750 if (!chassis) {
b86f4767 1751 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1752 VLOG_WARN_RL(
1753 &rl, "Chassis name %s referenced in NBDB via Gateway_Chassis "
1754 "on logical router port %s does not exist in SBDB",
1755 lrp->gateway_chassis[n]->chassis_name, lrp->name);
1756 }
1757 }
1758
1759 /* Basic check, different amount of Gateway_Chassis means that we
1760 * need to update southbound database Port_Binding */
1761 if (lrp_n_gateway_chassis != port_binding->n_gateway_chassis) {
1762 free(lrp_gwc_c);
1763 free(lrp_gwc);
1764 return true;
1765 }
1766
1767 for (n = 0; n < lrp_n_gateway_chassis; n++) {
1768 int i;
1769 /* For each of the valid gw chassis on the lrp, check if there's
1770 * a match on the Port_Binding list, we assume order is not
1771 * persisted */
1772 for (i = 0; i < port_binding->n_gateway_chassis; i++) {
1773 if (gateway_chassis_equal(lrp_gwc[n],
1774 lrp_gwc_c[n],
1775 port_binding->gateway_chassis[i])) {
1776 break; /* we found a match */
1777 }
1778 }
1779
1780 /* if no Port_Binding gateway chassis matched for the entry... */
1781 if (i == port_binding->n_gateway_chassis) {
1782 free(lrp_gwc_c);
1783 free(lrp_gwc);
1784 return true; /* found no match for this gateway chassis on lrp */
1785 }
1786 }
1787
1788 /* no need for update, all ports matched */
1789 free(lrp_gwc_c);
1790 free(lrp_gwc);
1791 return false;
1792}
1793
1794/* This functions translates the gw chassis on the nb database
1795 * to sb database entries, the only difference is that SB database
1796 * Gateway_Chassis table references the chassis directly instead
1797 * of using the name */
5868eb24 1798static void
b86f4767 1799copy_gw_chassis_from_nbrp_to_sbpb(
1800 struct northd_context *ctx,
1801 const struct nbrec_logical_router_port *lrp,
1802 const struct chassis_index *chassis_index,
1803 const struct sbrec_port_binding *port_binding) {
1804
1805 if (!lrp || !port_binding || !lrp->n_gateway_chassis) {
1806 return;
1807 }
1808
1809 struct sbrec_gateway_chassis **gw_chassis = NULL;
1810 int n_gwc = 0;
1811 int n;
1812
1813 /* XXX: This can be improved. This code will generate a set of new
1814 * Gateway_Chassis and push them all in a single transaction, instead
1815 * this would be more optimal if we just add/update/remove the rows in
1816 * the southbound db that need to change. We don't expect lots of
1817 * changes to the Gateway_Chassis table, but if that proves to be wrong
1818 * we should optimize this. */
1819 for (n = 0; n < lrp->n_gateway_chassis; n++) {
1820 struct nbrec_gateway_chassis *lrp_gwc = lrp->gateway_chassis[n];
1821 if (!lrp_gwc->chassis_name) {
1822 continue;
1823 }
1824
1825 const struct sbrec_chassis *chassis =
1826 chassis_lookup_by_name(chassis_index, lrp_gwc->chassis_name);
1827
b86f4767 1828 gw_chassis = xrealloc(gw_chassis, (n_gwc + 1) * sizeof *gw_chassis);
1829
1830 struct sbrec_gateway_chassis *pb_gwc =
1831 sbrec_gateway_chassis_insert(ctx->ovnsb_txn);
1832
1833 sbrec_gateway_chassis_set_name(pb_gwc, lrp_gwc->name);
1834 sbrec_gateway_chassis_set_priority(pb_gwc, lrp_gwc->priority);
1835 sbrec_gateway_chassis_set_chassis(pb_gwc, chassis);
1836 sbrec_gateway_chassis_set_options(pb_gwc, &lrp_gwc->options);
1837 sbrec_gateway_chassis_set_external_ids(pb_gwc, &lrp_gwc->external_ids);
1838
1839 gw_chassis[n_gwc++] = pb_gwc;
1840 }
1841 sbrec_port_binding_set_gateway_chassis(port_binding, gw_chassis, n_gwc);
1842 free(gw_chassis);
1843}
1844
1845static void
1846ovn_port_update_sbrec(struct northd_context *ctx,
1847 const struct ovn_port *op,
1848 const struct chassis_index *chassis_index,
a6095f81 1849 struct hmap *chassis_qdisc_queues)
5868eb24
BP
1850{
1851 sbrec_port_binding_set_datapath(op->sb, op->od->sb);
0ee00741 1852 if (op->nbrp) {
c1645003 1853 /* If the router is for l3 gateway, it resides on a chassis
17bac0ff 1854 * and its port type is "l3gateway". */
71f21279 1855 const char *chassis_name = smap_get(&op->od->nbr->options, "chassis");
41a15b71
MS
1856 if (op->derived) {
1857 sbrec_port_binding_set_type(op->sb, "chassisredirect");
71f21279 1858 } else if (chassis_name) {
17bac0ff 1859 sbrec_port_binding_set_type(op->sb, "l3gateway");
c1645003
GS
1860 } else {
1861 sbrec_port_binding_set_type(op->sb, "patch");
1862 }
9975d7be 1863
c1645003
GS
1864 struct smap new;
1865 smap_init(&new);
41a15b71
MS
1866 if (op->derived) {
1867 const char *redirect_chassis = smap_get(&op->nbrp->options,
1868 "redirect-chassis");
b86f4767 1869 if (op->nbrp->n_gateway_chassis && redirect_chassis) {
1870 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1871 VLOG_WARN_RL(
1872 &rl, "logical router port %s has both options:"
1873 "redirect-chassis and gateway_chassis populated "
1874 "redirect-chassis will be ignored in favour of "
1875 "gateway chassis", op->nbrp->name);
1876 }
1877
1878 if (op->nbrp->n_gateway_chassis) {
1879 if (sbpb_gw_chassis_needs_update(op->sb, op->nbrp,
1880 chassis_index)) {
1881 copy_gw_chassis_from_nbrp_to_sbpb(ctx, op->nbrp,
1882 chassis_index, op->sb);
1883 }
1884
1885 } else if (redirect_chassis) {
b86f4767 1886 /* Handle ports that had redirect-chassis option attached
1da17a0b 1887 * to them, and for backwards compatibility convert them
1888 * to a single Gateway_Chassis entry */
b86f4767 1889 const struct sbrec_chassis *chassis =
1890 chassis_lookup_by_name(chassis_index, redirect_chassis);
1891 if (chassis) {
1892 /* If we found the chassis, and the gw chassis on record
1893 * differs from what we expect go ahead and update */
13ad61c4 1894 if (op->sb->n_gateway_chassis != 1
69224120 1895 || !op->sb->gateway_chassis[0]->chassis
b86f4767 1896 || strcmp(op->sb->gateway_chassis[0]->chassis->name,
1897 chassis->name)
1898 || op->sb->gateway_chassis[0]->priority != 0) {
1899 /* Construct a single Gateway_Chassis entry on the
1900 * Port_Binding attached to the redirect_chassis
1901 * name */
1902 struct sbrec_gateway_chassis *gw_chassis =
1903 sbrec_gateway_chassis_insert(ctx->ovnsb_txn);
1904
1905 char *gwc_name = xasprintf("%s_%s", op->nbrp->name,
1906 chassis->name);
1907
1908 /* XXX: Again, here, we could just update an existing
1909 * Gateway_Chassis, instead of creating a new one
1910 * and replacing it */
1911 sbrec_gateway_chassis_set_name(gw_chassis, gwc_name);
1912 sbrec_gateway_chassis_set_priority(gw_chassis, 0);
1913 sbrec_gateway_chassis_set_chassis(gw_chassis, chassis);
1914 sbrec_gateway_chassis_set_external_ids(gw_chassis,
1915 &op->nbrp->external_ids);
1916 sbrec_port_binding_set_gateway_chassis(op->sb,
1917 &gw_chassis, 1);
1918 free(gwc_name);
1919 }
1920 } else {
1921 VLOG_WARN("chassis name '%s' from redirect from logical "
1922 " router port '%s' redirect-chassis not found",
1923 redirect_chassis, op->nbrp->name);
1924 if (op->sb->n_gateway_chassis) {
1925 sbrec_port_binding_set_gateway_chassis(op->sb, NULL,
1926 0);
1927 }
1928 }
41a15b71
MS
1929 }
1930 smap_add(&new, "distributed-port", op->nbrp->name);
1931 } else {
7b997d4f
MM
1932 if (op->peer) {
1933 smap_add(&new, "peer", op->peer->key);
1934 }
71f21279
BP
1935 if (chassis_name) {
1936 smap_add(&new, "l3gateway-chassis", chassis_name);
41a15b71 1937 }
c1645003
GS
1938 }
1939 sbrec_port_binding_set_options(op->sb, &new);
1940 smap_destroy(&new);
9975d7be
BP
1941
1942 sbrec_port_binding_set_parent_port(op->sb, NULL);
1943 sbrec_port_binding_set_tag(op->sb, NULL, 0);
1944 sbrec_port_binding_set_mac(op->sb, NULL, 0);
4a680bff
BP
1945
1946 struct smap ids = SMAP_INITIALIZER(&ids);
1947 sbrec_port_binding_set_external_ids(op->sb, &ids);
9975d7be 1948 } else {
0ee00741 1949 if (strcmp(op->nbsp->type, "router")) {
a6095f81
BS
1950 uint32_t queue_id = smap_get_int(
1951 &op->sb->options, "qdisc_queue_id", 0);
1952 bool has_qos = port_has_qos_params(&op->nbsp->options);
1953 struct smap options;
1954
1955 if (op->sb->chassis && has_qos && !queue_id) {
1956 queue_id = allocate_chassis_queueid(chassis_qdisc_queues,
1957 op->sb->chassis);
1958 } else if (!has_qos && queue_id) {
1959 free_chassis_queueid(chassis_qdisc_queues,
1960 op->sb->chassis,
1961 queue_id);
1962 queue_id = 0;
1963 }
1964
1965 smap_clone(&options, &op->nbsp->options);
1966 if (queue_id) {
1967 smap_add_format(&options,
1968 "qdisc_queue_id", "%d", queue_id);
1969 }
1970 sbrec_port_binding_set_options(op->sb, &options);
1971 smap_destroy(&options);
173acc1c
MM
1972 if (ovn_is_known_nb_lsp_type(op->nbsp->type)) {
1973 sbrec_port_binding_set_type(op->sb, op->nbsp->type);
1974 } else {
1975 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1976 VLOG_WARN_RL(
1977 &rl, "Unknown port type '%s' set on logical switch '%s'.",
1978 op->nbsp->type, op->nbsp->name);
1979 }
9975d7be 1980 } else {
c1645003
GS
1981 const char *chassis = NULL;
1982 if (op->peer && op->peer->od && op->peer->od->nbr) {
1983 chassis = smap_get(&op->peer->od->nbr->options, "chassis");
1984 }
1985
1986 /* A switch port connected to a gateway router is also of
17bac0ff 1987 * type "l3gateway". */
c1645003 1988 if (chassis) {
17bac0ff 1989 sbrec_port_binding_set_type(op->sb, "l3gateway");
c1645003
GS
1990 } else {
1991 sbrec_port_binding_set_type(op->sb, "patch");
1992 }
9975d7be 1993
7b997d4f
MM
1994 const char *router_port = smap_get(&op->nbsp->options,
1995 "router-port");
1996 if (router_port || chassis) {
1997 struct smap new;
1998 smap_init(&new);
1999 if (router_port) {
2000 smap_add(&new, "peer", router_port);
2001 }
2002 if (chassis) {
2003 smap_add(&new, "l3gateway-chassis", chassis);
2004 }
2005 sbrec_port_binding_set_options(op->sb, &new);
2006 smap_destroy(&new);
c1645003 2007 }
8439c2eb
CSV
2008
2009 const char *nat_addresses = smap_get(&op->nbsp->options,
2010 "nat-addresses");
e914fb54 2011 if (nat_addresses && !strcmp(nat_addresses, "router")) {
26b9e08d
MS
2012 if (op->peer && op->peer->od
2013 && (chassis || op->peer->od->l3redirect_port)) {
f40c5588
MS
2014 size_t n_nats;
2015 char **nats = get_nat_addresses(op->peer, &n_nats);
2016 if (n_nats) {
2017 sbrec_port_binding_set_nat_addresses(op->sb,
2018 (const char **) nats, n_nats);
2019 for (size_t i = 0; i < n_nats; i++) {
2020 free(nats[i]);
2021 }
e914fb54 2022 free(nats);
f40c5588
MS
2023 } else {
2024 sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0);
e914fb54 2025 }
f40c5588
MS
2026 } else {
2027 sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0);
e914fb54 2028 }
26b9e08d
MS
2029 /* Only accept manual specification of ethernet address
2030 * followed by IPv4 addresses on type "l3gateway" ports. */
2031 } else if (nat_addresses && chassis) {
8439c2eb
CSV
2032 struct lport_addresses laddrs;
2033 if (!extract_lsp_addresses(nat_addresses, &laddrs)) {
2034 static struct vlog_rate_limit rl =
2035 VLOG_RATE_LIMIT_INIT(1, 1);
2036 VLOG_WARN_RL(&rl, "Error extracting nat-addresses.");
f40c5588 2037 sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0);
8439c2eb 2038 } else {
f40c5588
MS
2039 sbrec_port_binding_set_nat_addresses(op->sb,
2040 &nat_addresses, 1);
8439c2eb
CSV
2041 destroy_lport_addresses(&laddrs);
2042 }
f40c5588
MS
2043 } else {
2044 sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0);
8439c2eb 2045 }
9975d7be 2046 }
0ee00741
HK
2047 sbrec_port_binding_set_parent_port(op->sb, op->nbsp->parent_name);
2048 sbrec_port_binding_set_tag(op->sb, op->nbsp->tag, op->nbsp->n_tag);
2049 sbrec_port_binding_set_mac(op->sb, (const char **) op->nbsp->addresses,
2050 op->nbsp->n_addresses);
4a680bff
BP
2051
2052 struct smap ids = SMAP_INITIALIZER(&ids);
37737b96
DAS
2053 smap_clone(&ids, &op->nbsp->external_ids);
2054 const char *name = smap_get(&ids, "neutron:port_name");
4a680bff
BP
2055 if (name && name[0]) {
2056 smap_add(&ids, "name", name);
2057 }
2058 sbrec_port_binding_set_external_ids(op->sb, &ids);
37737b96 2059 smap_destroy(&ids);
9975d7be 2060 }
5868eb24
BP
2061}
2062
6e31816f
CSV
2063/* Remove mac_binding entries that refer to logical_ports which are
2064 * deleted. */
2065static void
2066cleanup_mac_bindings(struct northd_context *ctx, struct hmap *ports)
2067{
2068 const struct sbrec_mac_binding *b, *n;
2069 SBREC_MAC_BINDING_FOR_EACH_SAFE (b, n, ctx->ovnsb_idl) {
2070 if (!ovn_port_find(ports, b->logical_port)) {
2071 sbrec_mac_binding_delete(b);
2072 }
2073 }
2074}
2075
0bac7164 2076/* Updates the southbound Port_Binding table so that it contains the logical
80f408f4 2077 * switch ports specified by the northbound database.
0bac7164
BP
2078 *
2079 * Initializes 'ports' to contain a "struct ovn_port" for every logical port,
2080 * using the "struct ovn_datapath"s in 'datapaths' to look up logical
2081 * datapaths. */
5868eb24
BP
2082static void
2083build_ports(struct northd_context *ctx, struct hmap *datapaths,
b86f4767 2084 const struct chassis_index *chassis_index, struct hmap *ports)
5868eb24
BP
2085{
2086 struct ovs_list sb_only, nb_only, both;
a6095f81
BS
2087 struct hmap tag_alloc_table = HMAP_INITIALIZER(&tag_alloc_table);
2088 struct hmap chassis_qdisc_queues = HMAP_INITIALIZER(&chassis_qdisc_queues);
5868eb24 2089
a6095f81
BS
2090 join_logical_ports(ctx, datapaths, ports, &chassis_qdisc_queues,
2091 &tag_alloc_table, &sb_only, &nb_only, &both);
5868eb24 2092
5868eb24 2093 struct ovn_port *op, *next;
b511690b
GS
2094 /* For logical ports that are in both databases, update the southbound
2095 * record based on northbound data. Also index the in-use tunnel_keys.
2096 * For logical ports that are in NB database, do any tag allocation
2097 * needed. */
5868eb24 2098 LIST_FOR_EACH_SAFE (op, next, list, &both) {
b511690b
GS
2099 if (op->nbsp) {
2100 tag_alloc_create_new_tag(&tag_alloc_table, op->nbsp);
2101 }
b86f4767 2102 ovn_port_update_sbrec(ctx, op, chassis_index, &chassis_qdisc_queues);
5868eb24
BP
2103
2104 add_tnlid(&op->od->port_tnlids, op->sb->tunnel_key);
2105 if (op->sb->tunnel_key > op->od->port_key_hint) {
2106 op->od->port_key_hint = op->sb->tunnel_key;
2107 }
2108 }
2109
2110 /* Add southbound record for each unmatched northbound record. */
2111 LIST_FOR_EACH_SAFE (op, next, list, &nb_only) {
2112 uint16_t tunnel_key = ovn_port_allocate_key(op->od);
2113 if (!tunnel_key) {
2114 continue;
2115 }
2116
2117 op->sb = sbrec_port_binding_insert(ctx->ovnsb_txn);
b86f4767 2118 ovn_port_update_sbrec(ctx, op, chassis_index, &chassis_qdisc_queues);
5868eb24
BP
2119
2120 sbrec_port_binding_set_logical_port(op->sb, op->key);
2121 sbrec_port_binding_set_tunnel_key(op->sb, tunnel_key);
2122 }
2123
6e31816f
CSV
2124 bool remove_mac_bindings = false;
2125 if (!ovs_list_is_empty(&sb_only)) {
2126 remove_mac_bindings = true;
2127 }
2128
5868eb24
BP
2129 /* Delete southbound records without northbound matches. */
2130 LIST_FOR_EACH_SAFE(op, next, list, &sb_only) {
417e7e66 2131 ovs_list_remove(&op->list);
5868eb24
BP
2132 sbrec_port_binding_delete(op->sb);
2133 ovn_port_destroy(ports, op);
2134 }
6e31816f
CSV
2135 if (remove_mac_bindings) {
2136 cleanup_mac_bindings(ctx, ports);
2137 }
b511690b
GS
2138
2139 tag_alloc_destroy(&tag_alloc_table);
a6095f81 2140 destroy_chassis_queues(&chassis_qdisc_queues);
5868eb24
BP
2141}
2142\f
2143#define OVN_MIN_MULTICAST 32768
2144#define OVN_MAX_MULTICAST 65535
2145
2146struct multicast_group {
2147 const char *name;
2148 uint16_t key; /* OVN_MIN_MULTICAST...OVN_MAX_MULTICAST. */
2149};
2150
2151#define MC_FLOOD "_MC_flood"
2152static const struct multicast_group mc_flood = { MC_FLOOD, 65535 };
2153
2154#define MC_UNKNOWN "_MC_unknown"
2155static const struct multicast_group mc_unknown = { MC_UNKNOWN, 65534 };
2156
2157static bool
2158multicast_group_equal(const struct multicast_group *a,
2159 const struct multicast_group *b)
2160{
2161 return !strcmp(a->name, b->name) && a->key == b->key;
2162}
2163
2164/* Multicast group entry. */
2165struct ovn_multicast {
2166 struct hmap_node hmap_node; /* Index on 'datapath' and 'key'. */
2167 struct ovn_datapath *datapath;
2168 const struct multicast_group *group;
2169
2170 struct ovn_port **ports;
2171 size_t n_ports, allocated_ports;
2172};
2173
2174static uint32_t
2175ovn_multicast_hash(const struct ovn_datapath *datapath,
2176 const struct multicast_group *group)
2177{
2178 return hash_pointer(datapath, group->key);
2179}
2180
2181static struct ovn_multicast *
2182ovn_multicast_find(struct hmap *mcgroups, struct ovn_datapath *datapath,
2183 const struct multicast_group *group)
2184{
2185 struct ovn_multicast *mc;
2186
2187 HMAP_FOR_EACH_WITH_HASH (mc, hmap_node,
2188 ovn_multicast_hash(datapath, group), mcgroups) {
2189 if (mc->datapath == datapath
2190 && multicast_group_equal(mc->group, group)) {
2191 return mc;
4edcdcf4
RB
2192 }
2193 }
5868eb24
BP
2194 return NULL;
2195}
2196
2197static void
2198ovn_multicast_add(struct hmap *mcgroups, const struct multicast_group *group,
2199 struct ovn_port *port)
2200{
2201 struct ovn_datapath *od = port->od;
2202 struct ovn_multicast *mc = ovn_multicast_find(mcgroups, od, group);
2203 if (!mc) {
2204 mc = xmalloc(sizeof *mc);
2205 hmap_insert(mcgroups, &mc->hmap_node, ovn_multicast_hash(od, group));
2206 mc->datapath = od;
2207 mc->group = group;
2208 mc->n_ports = 0;
2209 mc->allocated_ports = 4;
2210 mc->ports = xmalloc(mc->allocated_ports * sizeof *mc->ports);
2211 }
2212 if (mc->n_ports >= mc->allocated_ports) {
2213 mc->ports = x2nrealloc(mc->ports, &mc->allocated_ports,
2214 sizeof *mc->ports);
2215 }
2216 mc->ports[mc->n_ports++] = port;
2217}
4edcdcf4 2218
5868eb24
BP
2219static void
2220ovn_multicast_destroy(struct hmap *mcgroups, struct ovn_multicast *mc)
2221{
2222 if (mc) {
2223 hmap_remove(mcgroups, &mc->hmap_node);
2224 free(mc->ports);
2225 free(mc);
2226 }
2227}
4edcdcf4 2228
5868eb24
BP
2229static void
2230ovn_multicast_update_sbrec(const struct ovn_multicast *mc,
2231 const struct sbrec_multicast_group *sb)
2232{
2233 struct sbrec_port_binding **ports = xmalloc(mc->n_ports * sizeof *ports);
2234 for (size_t i = 0; i < mc->n_ports; i++) {
2235 ports[i] = CONST_CAST(struct sbrec_port_binding *, mc->ports[i]->sb);
2236 }
2237 sbrec_multicast_group_set_ports(sb, ports, mc->n_ports);
2238 free(ports);
4edcdcf4 2239}
bd39395f 2240\f
48605550 2241/* Logical flow generation.
bd39395f 2242 *
48605550 2243 * This code generates the Logical_Flow table in the southbound database, as a
bd39395f
BP
2244 * function of most of the northbound database.
2245 */
2246
5868eb24
BP
2247struct ovn_lflow {
2248 struct hmap_node hmap_node;
bd39395f 2249
5868eb24 2250 struct ovn_datapath *od;
880fcd14 2251 enum ovn_stage stage;
5868eb24
BP
2252 uint16_t priority;
2253 char *match;
2254 char *actions;
17bfa2aa 2255 char *stage_hint;
d8026bbf 2256 const char *where;
bd39395f
BP
2257};
2258
2259static size_t
5868eb24 2260ovn_lflow_hash(const struct ovn_lflow *lflow)
bd39395f 2261{
5868eb24 2262 size_t hash = uuid_hash(&lflow->od->key);
880fcd14 2263 hash = hash_2words((lflow->stage << 16) | lflow->priority, hash);
5868eb24
BP
2264 hash = hash_string(lflow->match, hash);
2265 return hash_string(lflow->actions, hash);
bd39395f
BP
2266}
2267
5868eb24
BP
2268static bool
2269ovn_lflow_equal(const struct ovn_lflow *a, const struct ovn_lflow *b)
2270{
2271 return (a->od == b->od
880fcd14 2272 && a->stage == b->stage
5868eb24
BP
2273 && a->priority == b->priority
2274 && !strcmp(a->match, b->match)
2275 && !strcmp(a->actions, b->actions));
2276}
2277
2278static void
2279ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
d8026bbf 2280 enum ovn_stage stage, uint16_t priority,
17bfa2aa
HZ
2281 char *match, char *actions, char *stage_hint,
2282 const char *where)
bd39395f 2283{
5868eb24 2284 lflow->od = od;
880fcd14 2285 lflow->stage = stage;
5868eb24
BP
2286 lflow->priority = priority;
2287 lflow->match = match;
2288 lflow->actions = actions;
17bfa2aa 2289 lflow->stage_hint = stage_hint;
d8026bbf 2290 lflow->where = where;
bd39395f
BP
2291}
2292
48605550 2293/* Adds a row with the specified contents to the Logical_Flow table. */
bd39395f 2294static void
d8026bbf
BP
2295ovn_lflow_add_at(struct hmap *lflow_map, struct ovn_datapath *od,
2296 enum ovn_stage stage, uint16_t priority,
17bfa2aa
HZ
2297 const char *match, const char *actions,
2298 const char *stage_hint, const char *where)
5868eb24 2299{
9a9961d2
BP
2300 ovs_assert(ovn_stage_to_datapath_type(stage) == ovn_datapath_get_type(od));
2301
5868eb24 2302 struct ovn_lflow *lflow = xmalloc(sizeof *lflow);
880fcd14 2303 ovn_lflow_init(lflow, od, stage, priority,
17bfa2aa
HZ
2304 xstrdup(match), xstrdup(actions),
2305 nullable_xstrdup(stage_hint), where);
5868eb24
BP
2306 hmap_insert(lflow_map, &lflow->hmap_node, ovn_lflow_hash(lflow));
2307}
2308
d8026bbf 2309/* Adds a row with the specified contents to the Logical_Flow table. */
17bfa2aa
HZ
2310#define ovn_lflow_add_with_hint(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, \
2311 ACTIONS, STAGE_HINT) \
2312 ovn_lflow_add_at(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, ACTIONS, \
2313 STAGE_HINT, OVS_SOURCE_LOCATOR)
2314
d8026bbf 2315#define ovn_lflow_add(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, ACTIONS) \
17bfa2aa
HZ
2316 ovn_lflow_add_with_hint(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, \
2317 ACTIONS, NULL)
d8026bbf 2318
5868eb24
BP
2319static struct ovn_lflow *
2320ovn_lflow_find(struct hmap *lflows, struct ovn_datapath *od,
880fcd14 2321 enum ovn_stage stage, uint16_t priority,
5868eb24
BP
2322 const char *match, const char *actions)
2323{
2324 struct ovn_lflow target;
880fcd14 2325 ovn_lflow_init(&target, od, stage, priority,
d8026bbf 2326 CONST_CAST(char *, match), CONST_CAST(char *, actions),
17bfa2aa 2327 NULL, NULL);
5868eb24
BP
2328
2329 struct ovn_lflow *lflow;
2330 HMAP_FOR_EACH_WITH_HASH (lflow, hmap_node, ovn_lflow_hash(&target),
2331 lflows) {
2332 if (ovn_lflow_equal(lflow, &target)) {
2333 return lflow;
bd39395f
BP
2334 }
2335 }
5868eb24
BP
2336 return NULL;
2337}
bd39395f 2338
5868eb24
BP
2339static void
2340ovn_lflow_destroy(struct hmap *lflows, struct ovn_lflow *lflow)
2341{
2342 if (lflow) {
2343 hmap_remove(lflows, &lflow->hmap_node);
2344 free(lflow->match);
2345 free(lflow->actions);
17bfa2aa 2346 free(lflow->stage_hint);
5868eb24
BP
2347 free(lflow);
2348 }
bd39395f
BP
2349}
2350
bd39395f 2351/* Appends port security constraints on L2 address field 'eth_addr_field'
e93b43d6
JP
2352 * (e.g. "eth.src" or "eth.dst") to 'match'. 'ps_addrs', with 'n_ps_addrs'
2353 * elements, is the collection of port_security constraints from an
2354 * OVN_NB Logical_Switch_Port row generated by extract_lsp_addresses(). */
bd39395f 2355static void
685f4dfe 2356build_port_security_l2(const char *eth_addr_field,
e93b43d6
JP
2357 struct lport_addresses *ps_addrs,
2358 unsigned int n_ps_addrs,
685f4dfe 2359 struct ds *match)
bd39395f 2360{
e93b43d6
JP
2361 if (!n_ps_addrs) {
2362 return;
2363 }
bd39395f 2364
e93b43d6 2365 ds_put_format(match, " && %s == {", eth_addr_field);
f7cb14cd 2366
e93b43d6
JP
2367 for (size_t i = 0; i < n_ps_addrs; i++) {
2368 ds_put_format(match, "%s ", ps_addrs[i].ea_s);
bd39395f 2369 }
f7cb14cd 2370 ds_chomp(match, ' ');
bd39395f 2371 ds_put_cstr(match, "}");
bd39395f
BP
2372}
2373
685f4dfe
NS
2374static void
2375build_port_security_ipv6_nd_flow(
2376 struct ds *match, struct eth_addr ea, struct ipv6_netaddr *ipv6_addrs,
2377 int n_ipv6_addrs)
2378{
2379 ds_put_format(match, " && ip6 && nd && ((nd.sll == "ETH_ADDR_FMT" || "
2380 "nd.sll == "ETH_ADDR_FMT") || ((nd.tll == "ETH_ADDR_FMT" || "
2381 "nd.tll == "ETH_ADDR_FMT")", ETH_ADDR_ARGS(eth_addr_zero),
2382 ETH_ADDR_ARGS(ea), ETH_ADDR_ARGS(eth_addr_zero),
2383 ETH_ADDR_ARGS(ea));
2384 if (!n_ipv6_addrs) {
2385 ds_put_cstr(match, "))");
2386 return;
2387 }
2388
2389 char ip6_str[INET6_ADDRSTRLEN + 1];
2390 struct in6_addr lla;
2391 in6_generate_lla(ea, &lla);
2392 memset(ip6_str, 0, sizeof(ip6_str));
2393 ipv6_string_mapped(ip6_str, &lla);
2394 ds_put_format(match, " && (nd.target == %s", ip6_str);
2395
2396 for(int i = 0; i < n_ipv6_addrs; i++) {
2397 memset(ip6_str, 0, sizeof(ip6_str));
2398 ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr);
2399 ds_put_format(match, " || nd.target == %s", ip6_str);
2400 }
2401
2402 ds_put_format(match, ")))");
2403}
2404
2405static void
2406build_port_security_ipv6_flow(
2407 enum ovn_pipeline pipeline, struct ds *match, struct eth_addr ea,
2408 struct ipv6_netaddr *ipv6_addrs, int n_ipv6_addrs)
2409{
2410 char ip6_str[INET6_ADDRSTRLEN + 1];
2411
2412 ds_put_format(match, " && %s == {",
2413 pipeline == P_IN ? "ip6.src" : "ip6.dst");
2414
2415 /* Allow link-local address. */
2416 struct in6_addr lla;
2417 in6_generate_lla(ea, &lla);
2418 ipv6_string_mapped(ip6_str, &lla);
2419 ds_put_format(match, "%s, ", ip6_str);
2420
9e687b23
DL
2421 /* Allow ip6.dst=ff00::/8 for multicast packets */
2422 if (pipeline == P_OUT) {
2423 ds_put_cstr(match, "ff00::/8, ");
2424 }
685f4dfe
NS
2425 for(int i = 0; i < n_ipv6_addrs; i++) {
2426 ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr);
9e687b23 2427 ds_put_format(match, "%s, ", ip6_str);
685f4dfe 2428 }
9e687b23
DL
2429 /* Replace ", " by "}". */
2430 ds_chomp(match, ' ');
2431 ds_chomp(match, ',');
685f4dfe
NS
2432 ds_put_cstr(match, "}");
2433}
2434
2435/**
2436 * Build port security constraints on ARP and IPv6 ND fields
2437 * and add logical flows to S_SWITCH_IN_PORT_SEC_ND stage.
2438 *
2439 * For each port security of the logical port, following
2440 * logical flows are added
2441 * - If the port security has no IP (both IPv4 and IPv6) or
2442 * if it has IPv4 address(es)
2443 * - Priority 90 flow to allow ARP packets for known MAC addresses
2444 * in the eth.src and arp.spa fields. If the port security
2445 * has IPv4 addresses, allow known IPv4 addresses in the arp.tpa field.
2446 *
2447 * - If the port security has no IP (both IPv4 and IPv6) or
2448 * if it has IPv6 address(es)
2449 * - Priority 90 flow to allow IPv6 ND packets for known MAC addresses
2450 * in the eth.src and nd.sll/nd.tll fields. If the port security
2451 * has IPv6 addresses, allow known IPv6 addresses in the nd.target field
2452 * for IPv6 Neighbor Advertisement packet.
2453 *
2454 * - Priority 80 flow to drop ARP and IPv6 ND packets.
2455 */
2456static void
2457build_port_security_nd(struct ovn_port *op, struct hmap *lflows)
2458{
e93b43d6
JP
2459 struct ds match = DS_EMPTY_INITIALIZER;
2460
2461 for (size_t i = 0; i < op->n_ps_addrs; i++) {
2462 struct lport_addresses *ps = &op->ps_addrs[i];
685f4dfe 2463
e93b43d6 2464 bool no_ip = !(ps->n_ipv4_addrs || ps->n_ipv6_addrs);
685f4dfe 2465
e93b43d6
JP
2466 ds_clear(&match);
2467 if (ps->n_ipv4_addrs || no_ip) {
2468 ds_put_format(&match,
2469 "inport == %s && eth.src == %s && arp.sha == %s",
2470 op->json_key, ps->ea_s, ps->ea_s);
685f4dfe 2471
e93b43d6
JP
2472 if (ps->n_ipv4_addrs) {
2473 ds_put_cstr(&match, " && arp.spa == {");
f95523c0 2474 for (size_t j = 0; j < ps->n_ipv4_addrs; j++) {
7d9d86ad
NS
2475 /* When the netmask is applied, if the host portion is
2476 * non-zero, the host can only use the specified
2477 * address in the arp.spa. If zero, the host is allowed
2478 * to use any address in the subnet. */
f95523c0
JP
2479 if (ps->ipv4_addrs[j].plen == 32
2480 || ps->ipv4_addrs[j].addr & ~ps->ipv4_addrs[j].mask) {
2481 ds_put_cstr(&match, ps->ipv4_addrs[j].addr_s);
7d9d86ad 2482 } else {
e93b43d6 2483 ds_put_format(&match, "%s/%d",
f95523c0
JP
2484 ps->ipv4_addrs[j].network_s,
2485 ps->ipv4_addrs[j].plen);
7d9d86ad 2486 }
e93b43d6 2487 ds_put_cstr(&match, ", ");
685f4dfe
NS
2488 }
2489 ds_chomp(&match, ' ');
e93b43d6
JP
2490 ds_chomp(&match, ',');
2491 ds_put_cstr(&match, "}");
685f4dfe
NS
2492 }
2493 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90,
2494 ds_cstr(&match), "next;");
685f4dfe
NS
2495 }
2496
e93b43d6
JP
2497 if (ps->n_ipv6_addrs || no_ip) {
2498 ds_clear(&match);
2499 ds_put_format(&match, "inport == %s && eth.src == %s",
2500 op->json_key, ps->ea_s);
2501 build_port_security_ipv6_nd_flow(&match, ps->ea, ps->ipv6_addrs,
2502 ps->n_ipv6_addrs);
685f4dfe
NS
2503 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90,
2504 ds_cstr(&match), "next;");
685f4dfe 2505 }
685f4dfe
NS
2506 }
2507
e93b43d6
JP
2508 ds_clear(&match);
2509 ds_put_format(&match, "inport == %s && (arp || nd)", op->json_key);
685f4dfe 2510 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 80,
e93b43d6
JP
2511 ds_cstr(&match), "drop;");
2512 ds_destroy(&match);
685f4dfe
NS
2513}
2514
2515/**
2516 * Build port security constraints on IPv4 and IPv6 src and dst fields
2517 * and add logical flows to S_SWITCH_(IN/OUT)_PORT_SEC_IP stage.
2518 *
2519 * For each port security of the logical port, following
2520 * logical flows are added
2521 * - If the port security has IPv4 addresses,
2522 * - Priority 90 flow to allow IPv4 packets for known IPv4 addresses
2523 *
2524 * - If the port security has IPv6 addresses,
2525 * - Priority 90 flow to allow IPv6 packets for known IPv6 addresses
2526 *
2527 * - If the port security has IPv4 addresses or IPv6 addresses or both
2528 * - Priority 80 flow to drop all IPv4 and IPv6 traffic
2529 */
2530static void
2531build_port_security_ip(enum ovn_pipeline pipeline, struct ovn_port *op,
2532 struct hmap *lflows)
2533{
2534 char *port_direction;
2535 enum ovn_stage stage;
2536 if (pipeline == P_IN) {
2537 port_direction = "inport";
2538 stage = S_SWITCH_IN_PORT_SEC_IP;
2539 } else {
2540 port_direction = "outport";
2541 stage = S_SWITCH_OUT_PORT_SEC_IP;
2542 }
2543
e93b43d6
JP
2544 for (size_t i = 0; i < op->n_ps_addrs; i++) {
2545 struct lport_addresses *ps = &op->ps_addrs[i];
685f4dfe 2546
e93b43d6 2547 if (!(ps->n_ipv4_addrs || ps->n_ipv6_addrs)) {
685f4dfe
NS
2548 continue;
2549 }
2550
e93b43d6 2551 if (ps->n_ipv4_addrs) {
685f4dfe
NS
2552 struct ds match = DS_EMPTY_INITIALIZER;
2553 if (pipeline == P_IN) {
9e687b23
DL
2554 /* Permit use of the unspecified address for DHCP discovery */
2555 struct ds dhcp_match = DS_EMPTY_INITIALIZER;
2556 ds_put_format(&dhcp_match, "inport == %s"
e93b43d6 2557 " && eth.src == %s"
9e687b23
DL
2558 " && ip4.src == 0.0.0.0"
2559 " && ip4.dst == 255.255.255.255"
e93b43d6
JP
2560 " && udp.src == 68 && udp.dst == 67",
2561 op->json_key, ps->ea_s);
9e687b23
DL
2562 ovn_lflow_add(lflows, op->od, stage, 90,
2563 ds_cstr(&dhcp_match), "next;");
2564 ds_destroy(&dhcp_match);
e93b43d6 2565 ds_put_format(&match, "inport == %s && eth.src == %s"
9e687b23 2566 " && ip4.src == {", op->json_key,
e93b43d6 2567 ps->ea_s);
685f4dfe 2568 } else {
e93b43d6 2569 ds_put_format(&match, "outport == %s && eth.dst == %s"
685f4dfe 2570 " && ip4.dst == {255.255.255.255, 224.0.0.0/4, ",
e93b43d6 2571 op->json_key, ps->ea_s);
685f4dfe
NS
2572 }
2573
f95523c0
JP
2574 for (int j = 0; j < ps->n_ipv4_addrs; j++) {
2575 ovs_be32 mask = ps->ipv4_addrs[j].mask;
7d9d86ad
NS
2576 /* When the netmask is applied, if the host portion is
2577 * non-zero, the host can only use the specified
2578 * address. If zero, the host is allowed to use any
2579 * address in the subnet.
e93b43d6 2580 */
f95523c0
JP
2581 if (ps->ipv4_addrs[j].plen == 32
2582 || ps->ipv4_addrs[j].addr & ~mask) {
2583 ds_put_format(&match, "%s", ps->ipv4_addrs[j].addr_s);
2584 if (pipeline == P_OUT && ps->ipv4_addrs[j].plen != 32) {
e93b43d6
JP
2585 /* Host is also allowed to receive packets to the
2586 * broadcast address in the specified subnet. */
2587 ds_put_format(&match, ", %s",
f95523c0 2588 ps->ipv4_addrs[j].bcast_s);
7d9d86ad
NS
2589 }
2590 } else {
2591 /* host portion is zero */
f95523c0
JP
2592 ds_put_format(&match, "%s/%d", ps->ipv4_addrs[j].network_s,
2593 ps->ipv4_addrs[j].plen);
7d9d86ad
NS
2594 }
2595 ds_put_cstr(&match, ", ");
685f4dfe
NS
2596 }
2597
2598 /* Replace ", " by "}". */
2599 ds_chomp(&match, ' ');
2600 ds_chomp(&match, ',');
2601 ds_put_cstr(&match, "}");
2602 ovn_lflow_add(lflows, op->od, stage, 90, ds_cstr(&match), "next;");
2603 ds_destroy(&match);
685f4dfe
NS
2604 }
2605
e93b43d6 2606 if (ps->n_ipv6_addrs) {
685f4dfe 2607 struct ds match = DS_EMPTY_INITIALIZER;
9e687b23
DL
2608 if (pipeline == P_IN) {
2609 /* Permit use of unspecified address for duplicate address
2610 * detection */
2611 struct ds dad_match = DS_EMPTY_INITIALIZER;
2612 ds_put_format(&dad_match, "inport == %s"
e93b43d6 2613 " && eth.src == %s"
9e687b23
DL
2614 " && ip6.src == ::"
2615 " && ip6.dst == ff02::/16"
2616 " && icmp6.type == {131, 135, 143}", op->json_key,
e93b43d6 2617 ps->ea_s);
9e687b23
DL
2618 ovn_lflow_add(lflows, op->od, stage, 90,
2619 ds_cstr(&dad_match), "next;");
2620 ds_destroy(&dad_match);
2621 }
e93b43d6 2622 ds_put_format(&match, "%s == %s && %s == %s",
685f4dfe 2623 port_direction, op->json_key,
e93b43d6
JP
2624 pipeline == P_IN ? "eth.src" : "eth.dst", ps->ea_s);
2625 build_port_security_ipv6_flow(pipeline, &match, ps->ea,
2626 ps->ipv6_addrs, ps->n_ipv6_addrs);
685f4dfe
NS
2627 ovn_lflow_add(lflows, op->od, stage, 90,
2628 ds_cstr(&match), "next;");
2629 ds_destroy(&match);
685f4dfe
NS
2630 }
2631
e93b43d6
JP
2632 char *match = xasprintf("%s == %s && %s == %s && ip",
2633 port_direction, op->json_key,
2634 pipeline == P_IN ? "eth.src" : "eth.dst",
2635 ps->ea_s);
685f4dfe
NS
2636 ovn_lflow_add(lflows, op->od, stage, 80, match, "drop;");
2637 free(match);
2638 }
f2a715b5 2639
685f4dfe
NS
2640}
2641
95a9a275 2642static bool
80f408f4 2643lsp_is_enabled(const struct nbrec_logical_switch_port *lsp)
95a9a275 2644{
80f408f4 2645 return !lsp->enabled || *lsp->enabled;
95a9a275
RB
2646}
2647
4c7bf534 2648static bool
80f408f4 2649lsp_is_up(const struct nbrec_logical_switch_port *lsp)
4c7bf534 2650{
80f408f4 2651 return !lsp->up || *lsp->up;
4c7bf534
NS
2652}
2653
281977f7
NS
2654static bool
2655build_dhcpv4_action(struct ovn_port *op, ovs_be32 offer_ip,
213615b3
NS
2656 struct ds *options_action, struct ds *response_action,
2657 struct ds *ipv4_addr_match)
281977f7
NS
2658{
2659 if (!op->nbsp->dhcpv4_options) {
2660 /* CMS has disabled native DHCPv4 for this lport. */
2661 return false;
2662 }
2663
2664 ovs_be32 host_ip, mask;
2665 char *error = ip_parse_masked(op->nbsp->dhcpv4_options->cidr, &host_ip,
2666 &mask);
2667 if (error || ((offer_ip ^ host_ip) & mask)) {
2668 /* Either
2669 * - cidr defined is invalid or
2670 * - the offer ip of the logical port doesn't belong to the cidr
2671 * defined in the DHCPv4 options.
2672 * */
2673 free(error);
2674 return false;
2675 }
2676
2677 const char *server_ip = smap_get(
2678 &op->nbsp->dhcpv4_options->options, "server_id");
2679 const char *server_mac = smap_get(
2680 &op->nbsp->dhcpv4_options->options, "server_mac");
2681 const char *lease_time = smap_get(
2682 &op->nbsp->dhcpv4_options->options, "lease_time");
281977f7 2683
b89d25e5
GL
2684 if (!(server_ip && server_mac && lease_time)) {
2685 /* "server_id", "server_mac" and "lease_time" should be
281977f7
NS
2686 * present in the dhcp_options. */
2687 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
2688 VLOG_WARN_RL(&rl, "Required DHCPv4 options not defined for lport - %s",
2689 op->json_key);
2690 return false;
2691 }
2692
2693 struct smap dhcpv4_options = SMAP_INITIALIZER(&dhcpv4_options);
2694 smap_clone(&dhcpv4_options, &op->nbsp->dhcpv4_options->options);
2695
2696 /* server_mac is not DHCPv4 option, delete it from the smap. */
2697 smap_remove(&dhcpv4_options, "server_mac");
2698 char *netmask = xasprintf(IP_FMT, IP_ARGS(mask));
2699 smap_add(&dhcpv4_options, "netmask", netmask);
2700 free(netmask);
2701
2702 ds_put_format(options_action,
2703 REGBIT_DHCP_OPTS_RESULT" = put_dhcp_opts(offerip = "
2704 IP_FMT", ", IP_ARGS(offer_ip));
7c76bf4e
DDP
2705
2706 /* We're not using SMAP_FOR_EACH because we want a consistent order of the
2707 * options on different architectures (big or little endian, SSE4.2) */
2708 const struct smap_node **sorted_opts = smap_sort(&dhcpv4_options);
2709 for (size_t i = 0; i < smap_count(&dhcpv4_options); i++) {
2710 const struct smap_node *node = sorted_opts[i];
281977f7
NS
2711 ds_put_format(options_action, "%s = %s, ", node->key, node->value);
2712 }
7c76bf4e 2713 free(sorted_opts);
281977f7
NS
2714
2715 ds_chomp(options_action, ' ');
2716 ds_chomp(options_action, ',');
2717 ds_put_cstr(options_action, "); next;");
2718
2719 ds_put_format(response_action, "eth.dst = eth.src; eth.src = %s; "
2720 "ip4.dst = "IP_FMT"; ip4.src = %s; udp.src = 67; "
bf143492
JP
2721 "udp.dst = 68; outport = inport; flags.loopback = 1; "
2722 "output;",
281977f7
NS
2723 server_mac, IP_ARGS(offer_ip), server_ip);
2724
213615b3
NS
2725 ds_put_format(ipv4_addr_match,
2726 "ip4.src == "IP_FMT" && ip4.dst == {%s, 255.255.255.255}",
2727 IP_ARGS(offer_ip), server_ip);
281977f7
NS
2728 smap_destroy(&dhcpv4_options);
2729 return true;
2730}
2731
33ac3c83
NS
2732static bool
2733build_dhcpv6_action(struct ovn_port *op, struct in6_addr *offer_ip,
2734 struct ds *options_action, struct ds *response_action)
2735{
2736 if (!op->nbsp->dhcpv6_options) {
2737 /* CMS has disabled native DHCPv6 for this lport. */
2738 return false;
2739 }
2740
2741 struct in6_addr host_ip, mask;
2742
2743 char *error = ipv6_parse_masked(op->nbsp->dhcpv6_options->cidr, &host_ip,
2744 &mask);
2745 if (error) {
2746 free(error);
2747 return false;
2748 }
2749 struct in6_addr ip6_mask = ipv6_addr_bitxor(offer_ip, &host_ip);
2750 ip6_mask = ipv6_addr_bitand(&ip6_mask, &mask);
2751 if (!ipv6_mask_is_any(&ip6_mask)) {
2752 /* offer_ip doesn't belongs to the cidr defined in lport's DHCPv6
2753 * options.*/
2754 return false;
2755 }
2756
7c76bf4e 2757 const struct smap *options_map = &op->nbsp->dhcpv6_options->options;
33ac3c83 2758 /* "server_id" should be the MAC address. */
7c76bf4e 2759 const char *server_mac = smap_get(options_map, "server_id");
33ac3c83
NS
2760 struct eth_addr ea;
2761 if (!server_mac || !eth_addr_from_string(server_mac, &ea)) {
2762 /* "server_id" should be present in the dhcpv6_options. */
2763 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2764 VLOG_WARN_RL(&rl, "server_id not present in the DHCPv6 options"
2765 " for lport %s", op->json_key);
2766 return false;
2767 }
2768
2769 /* Get the link local IP of the DHCPv6 server from the server MAC. */
2770 struct in6_addr lla;
2771 in6_generate_lla(ea, &lla);
2772
2773 char server_ip[INET6_ADDRSTRLEN + 1];
2774 ipv6_string_mapped(server_ip, &lla);
2775
2776 char ia_addr[INET6_ADDRSTRLEN + 1];
2777 ipv6_string_mapped(ia_addr, offer_ip);
2778
2779 ds_put_format(options_action,
40df4566
ZKL
2780 REGBIT_DHCP_OPTS_RESULT" = put_dhcpv6_opts(");
2781
2782 /* Check whether the dhcpv6 options should be configured as stateful.
2783 * Only reply with ia_addr option for dhcpv6 stateful address mode. */
7c76bf4e 2784 if (!smap_get_bool(options_map, "dhcpv6_stateless", false)) {
40df4566 2785 ipv6_string_mapped(ia_addr, offer_ip);
40df4566
ZKL
2786 ds_put_format(options_action, "ia_addr = %s, ", ia_addr);
2787 }
2788
7c76bf4e
DDP
2789 /* We're not using SMAP_FOR_EACH because we want a consistent order of the
2790 * options on different architectures (big or little endian, SSE4.2) */
2791 const struct smap_node **sorted_opts = smap_sort(options_map);
2792 for (size_t i = 0; i < smap_count(options_map); i++) {
2793 const struct smap_node *node = sorted_opts[i];
40df4566
ZKL
2794 if (strcmp(node->key, "dhcpv6_stateless")) {
2795 ds_put_format(options_action, "%s = %s, ", node->key, node->value);
2796 }
33ac3c83 2797 }
7c76bf4e
DDP
2798 free(sorted_opts);
2799
33ac3c83
NS
2800 ds_chomp(options_action, ' ');
2801 ds_chomp(options_action, ',');
2802 ds_put_cstr(options_action, "); next;");
2803
2804 ds_put_format(response_action, "eth.dst = eth.src; eth.src = %s; "
2805 "ip6.dst = ip6.src; ip6.src = %s; udp.src = 547; "
2806 "udp.dst = 546; outport = inport; flags.loopback = 1; "
2807 "output;",
2808 server_mac, server_ip);
40df4566 2809
33ac3c83
NS
2810 return true;
2811}
2812
78aab811
JP
2813static bool
2814has_stateful_acl(struct ovn_datapath *od)
2815{
9975d7be
BP
2816 for (size_t i = 0; i < od->nbs->n_acls; i++) {
2817 struct nbrec_acl *acl = od->nbs->acls[i];
78aab811
JP
2818 if (!strcmp(acl->action, "allow-related")) {
2819 return true;
2820 }
2821 }
2822
2823 return false;
2824}
2825
2826static void
9ab989b7 2827build_pre_acls(struct ovn_datapath *od, struct hmap *lflows)
78aab811
JP
2828{
2829 bool has_stateful = has_stateful_acl(od);
2830
2831 /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are
2832 * allowed by default. */
880fcd14
BP
2833 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 0, "1", "next;");
2834 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 0, "1", "next;");
78aab811 2835
c132fca0 2836 /* If there are any stateful ACL rules in this datapath, we must
78aab811
JP
2837 * send all IP packets through the conntrack action, which handles
2838 * defragmentation, in order to match L4 headers. */
2839 if (has_stateful) {
9ab989b7
BP
2840 for (size_t i = 0; i < od->n_router_ports; i++) {
2841 struct ovn_port *op = od->router_ports[i];
2842 /* Can't use ct() for router ports. Consider the
2843 * following configuration: lp1(10.0.0.2) on
2844 * hostA--ls1--lr0--ls2--lp2(10.0.1.2) on hostB, For a
2845 * ping from lp1 to lp2, First, the response will go
2846 * through ct() with a zone for lp2 in the ls2 ingress
2847 * pipeline on hostB. That ct zone knows about this
2848 * connection. Next, it goes through ct() with the zone
2849 * for the router port in the egress pipeline of ls2 on
2850 * hostB. This zone does not know about the connection,
2851 * as the icmp request went through the logical router
2852 * on hostA, not hostB. This would only work with
2853 * distributed conntrack state across all chassis. */
2854 struct ds match_in = DS_EMPTY_INITIALIZER;
2855 struct ds match_out = DS_EMPTY_INITIALIZER;
2856
2857 ds_put_format(&match_in, "ip && inport == %s", op->json_key);
2858 ds_put_format(&match_out, "ip && outport == %s", op->json_key);
2859 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110,
2860 ds_cstr(&match_in), "next;");
2861 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110,
2862 ds_cstr(&match_out), "next;");
2863
2864 ds_destroy(&match_in);
2865 ds_destroy(&match_out);
48fcdb47 2866 }
5b29422c 2867 if (od->localnet_port) {
2868 struct ds match_in = DS_EMPTY_INITIALIZER;
2869 struct ds match_out = DS_EMPTY_INITIALIZER;
2870
2871 ds_put_format(&match_in, "ip && inport == %s",
2872 od->localnet_port->json_key);
2873 ds_put_format(&match_out, "ip && outport == %s",
2874 od->localnet_port->json_key);
2875 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110,
2876 ds_cstr(&match_in), "next;");
2877 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110,
2878 ds_cstr(&match_out), "next;");
2879
2880 ds_destroy(&match_in);
2881 ds_destroy(&match_out);
2882 }
2883
2d018f9b
GS
2884 /* Ingress and Egress Pre-ACL Table (Priority 110).
2885 *
2886 * Not to do conntrack on ND packets. */
2887 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110, "nd", "next;");
4364646c
ZKL
2888 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110, "(nd_rs || nd_ra)",
2889 "next;");
2d018f9b 2890 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110, "nd", "next;");
4364646c
ZKL
2891 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110,
2892 "(nd_rs || nd_ra)", "next;");
48fcdb47 2893
78aab811
JP
2894 /* Ingress and Egress Pre-ACL Table (Priority 100).
2895 *
2896 * Regardless of whether the ACL is "from-lport" or "to-lport",
2897 * we need rules in both the ingress and egress table, because
facf8652
GS
2898 * the return traffic needs to be followed.
2899 *
2900 * 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send
2901 * it to conntrack for tracking and defragmentation. */
2902 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 100, "ip",
2903 REGBIT_CONNTRACK_DEFRAG" = 1; next;");
2904 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip",
2905 REGBIT_CONNTRACK_DEFRAG" = 1; next;");
2d018f9b
GS
2906 }
2907}
78aab811 2908
7a15be69
GS
2909/* For a 'key' of the form "IP:port" or just "IP", sets 'port' and
2910 * 'ip_address'. The caller must free() the memory allocated for
2911 * 'ip_address'. */
2912static void
2913ip_address_and_port_from_lb_key(const char *key, char **ip_address,
485d373b 2914 uint16_t *port, int *addr_family)
7a15be69 2915{
485d373b
MM
2916 struct sockaddr_storage ss;
2917 char ip_addr_buf[INET6_ADDRSTRLEN];
2918 char *error;
7a15be69 2919
485d373b
MM
2920 error = ipv46_parse(key, PORT_OPTIONAL, &ss);
2921 if (error) {
7a15be69 2922 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
485d373b
MM
2923 VLOG_WARN_RL(&rl, "bad ip address or port for load balancer key %s",
2924 key);
7a15be69
GS
2925 free(error);
2926 return;
2927 }
2928
485d373b
MM
2929 if (ss.ss_family == AF_INET) {
2930 struct sockaddr_in *sin = ALIGNED_CAST(struct sockaddr_in *, &ss);
2931 *port = sin->sin_port == 0 ? 0 : ntohs(sin->sin_port);
2932 inet_ntop(AF_INET, &sin->sin_addr, ip_addr_buf, sizeof ip_addr_buf);
2933 } else {
2934 struct sockaddr_in6 *sin6 = ALIGNED_CAST(struct sockaddr_in6 *, &ss);
2935 *port = sin6->sin6_port == 0 ? 0 : ntohs(sin6->sin6_port);
2936 inet_ntop(AF_INET6, &sin6->sin6_addr, ip_addr_buf, sizeof ip_addr_buf);
7a15be69
GS
2937 }
2938
485d373b
MM
2939 *ip_address = xstrdup(ip_addr_buf);
2940 *addr_family = ss.ss_family;
7a15be69
GS
2941}
2942
302eda27
NS
2943/*
2944 * Returns true if logical switch is configured with DNS records, false
2945 * otherwise.
2946 */
2947static bool
2948ls_has_dns_records(const struct nbrec_logical_switch *nbs)
2949{
2950 for (size_t i = 0; i < nbs->n_dns_records; i++) {
2951 if (!smap_is_empty(&nbs->dns_records[i]->records)) {
2952 return true;
2953 }
2954 }
2955
2956 return false;
2957}
2958
7a15be69
GS
2959static void
2960build_pre_lb(struct ovn_datapath *od, struct hmap *lflows)
2961{
2962 /* Allow all packets to go to next tables by default. */
2963 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 0, "1", "next;");
2964 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 0, "1", "next;");
2965
2966 struct sset all_ips = SSET_INITIALIZER(&all_ips);
61591ad9 2967 bool vip_configured = false;
485d373b 2968 int addr_family = AF_INET;
61591ad9
GS
2969 for (int i = 0; i < od->nbs->n_load_balancer; i++) {
2970 struct nbrec_load_balancer *lb = od->nbs->load_balancer[i];
7a15be69
GS
2971 struct smap *vips = &lb->vips;
2972 struct smap_node *node;
7a15be69
GS
2973
2974 SMAP_FOR_EACH (node, vips) {
2975 vip_configured = true;
2976
2977 /* node->key contains IP:port or just IP. */
2978 char *ip_address = NULL;
2979 uint16_t port;
485d373b
MM
2980 ip_address_and_port_from_lb_key(node->key, &ip_address, &port,
2981 &addr_family);
7a15be69
GS
2982 if (!ip_address) {
2983 continue;
2984 }
2985
2986 if (!sset_contains(&all_ips, ip_address)) {
2987 sset_add(&all_ips, ip_address);
2988 }
2989
2990 free(ip_address);
2991
2992 /* Ignore L4 port information in the key because fragmented packets
2993 * may not have L4 information. The pre-stateful table will send
2994 * the packet through ct() action to de-fragment. In stateful
2995 * table, we will eventually look at L4 information. */
2996 }
61591ad9 2997 }
7a15be69 2998
61591ad9
GS
2999 /* 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send
3000 * packet to conntrack for defragmentation. */
3001 const char *ip_address;
3002 SSET_FOR_EACH(ip_address, &all_ips) {
485d373b
MM
3003 char *match;
3004
3005 if (addr_family == AF_INET) {
3006 match = xasprintf("ip && ip4.dst == %s", ip_address);
3007 } else {
3008 match = xasprintf("ip && ip6.dst == %s", ip_address);
3009 }
61591ad9
GS
3010 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB,
3011 100, match, REGBIT_CONNTRACK_DEFRAG" = 1; next;");
3012 free(match);
3013 }
7a15be69 3014
61591ad9 3015 sset_destroy(&all_ips);
7a15be69 3016
61591ad9
GS
3017 if (vip_configured) {
3018 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB,
3019 100, "ip", REGBIT_CONNTRACK_DEFRAG" = 1; next;");
7a15be69
GS
3020 }
3021}
3022
facf8652
GS
3023static void
3024build_pre_stateful(struct ovn_datapath *od, struct hmap *lflows)
3025{
3026 /* Ingress and Egress pre-stateful Table (Priority 0): Packets are
3027 * allowed by default. */
3028 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 0, "1", "next;");
3029 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 0, "1", "next;");
3030
3031 /* If REGBIT_CONNTRACK_DEFRAG is set as 1, then the packets should be
3032 * sent to conntrack for tracking and defragmentation. */
3033 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 100,
3034 REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;");
3035 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 100,
3036 REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;");
3037}
3038
d383eed5
JP
3039static void
3040build_acl_log(struct ds *actions, const struct nbrec_acl *acl)
3041{
3042 if (!acl->log) {
3043 return;
3044 }
3045
3046 ds_put_cstr(actions, "log(");
3047
3048 if (acl->name) {
3049 ds_put_format(actions, "name=\"%s\", ", acl->name);
3050 }
3051
3052 /* If a severity level isn't specified, default to "info". */
3053 if (acl->severity) {
3054 ds_put_format(actions, "severity=%s, ", acl->severity);
3055 } else {
3056 ds_put_format(actions, "severity=info, ");
3057 }
3058
3059 if (!strcmp(acl->action, "drop")) {
3060 ds_put_cstr(actions, "verdict=drop, ");
3061 } else if (!strcmp(acl->action, "reject")) {
3062 ds_put_cstr(actions, "verdict=reject, ");
3063 } else if (!strcmp(acl->action, "allow")
3064 || !strcmp(acl->action, "allow-related")) {
3065 ds_put_cstr(actions, "verdict=allow, ");
3066 }
3067
3068 ds_chomp(actions, ' ');
3069 ds_chomp(actions, ',');
3070 ds_put_cstr(actions, "); ");
3071}
3072
2d018f9b
GS
3073static void
3074build_acls(struct ovn_datapath *od, struct hmap *lflows)
3075{
3076 bool has_stateful = has_stateful_acl(od);
e75451fe 3077
2d018f9b
GS
3078 /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
3079 * default. A related rule at priority 1 is added below if there
3080 * are any stateful ACLs in this datapath. */
3081 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 0, "1", "next;");
3082 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 0, "1", "next;");
3083
3084 if (has_stateful) {
78aab811
JP
3085 /* Ingress and Egress ACL Table (Priority 1).
3086 *
3087 * By default, traffic is allowed. This is partially handled by
3088 * the Priority 0 ACL flows added earlier, but we also need to
3089 * commit IP flows. This is because, while the initiater's
3090 * direction may not have any stateful rules, the server's may
3091 * and then its return traffic would not have an associated
cc58e1f2
RB
3092 * conntrack entry and would return "+invalid".
3093 *
3094 * We use "ct_commit" for a connection that is not already known
3095 * by the connection tracker. Once a connection is committed,
3096 * subsequent packets will hit the flow at priority 0 that just
3097 * uses "next;"
3098 *
b73db61d 3099 * We also check for established connections that have ct_label.blocked
cc58e1f2
RB
3100 * set on them. That's a connection that was disallowed, but is
3101 * now allowed by policy again since it hit this default-allow flow.
b73db61d 3102 * We need to set ct_label.blocked=0 to let the connection continue,
cc58e1f2
RB
3103 * which will be done by ct_commit() in the "stateful" stage.
3104 * Subsequent packets will hit the flow at priority 0 that just
3105 * uses "next;". */
3106 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 1,
b73db61d 3107 "ip && (!ct.est || (ct.est && ct_label.blocked == 1))",
cc58e1f2
RB
3108 REGBIT_CONNTRACK_COMMIT" = 1; next;");
3109 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 1,
b73db61d 3110 "ip && (!ct.est || (ct.est && ct_label.blocked == 1))",
cc58e1f2 3111 REGBIT_CONNTRACK_COMMIT" = 1; next;");
78aab811
JP
3112
3113 /* Ingress and Egress ACL Table (Priority 65535).
3114 *
cc58e1f2
RB
3115 * Always drop traffic that's in an invalid state. Also drop
3116 * reply direction packets for connections that have been marked
3117 * for deletion (bit 0 of ct_label is set).
3118 *
3119 * This is enforced at a higher priority than ACLs can be defined. */
880fcd14 3120 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
b73db61d 3121 "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
cc58e1f2 3122 "drop;");
880fcd14 3123 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
b73db61d 3124 "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
cc58e1f2 3125 "drop;");
78aab811
JP
3126
3127 /* Ingress and Egress ACL Table (Priority 65535).
3128 *
cc58e1f2
RB
3129 * Allow reply traffic that is part of an established
3130 * conntrack entry that has not been marked for deletion
3131 * (bit 0 of ct_label). We only match traffic in the
3132 * reply direction because we want traffic in the request
3133 * direction to hit the currently defined policy from ACLs.
3134 *
3135 * This is enforced at a higher priority than ACLs can be defined. */
880fcd14 3136 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
cc58e1f2 3137 "ct.est && !ct.rel && !ct.new && !ct.inv "
b73db61d 3138 "&& ct.rpl && ct_label.blocked == 0",
78aab811 3139 "next;");
880fcd14 3140 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
cc58e1f2 3141 "ct.est && !ct.rel && !ct.new && !ct.inv "
b73db61d 3142 "&& ct.rpl && ct_label.blocked == 0",
78aab811
JP
3143 "next;");
3144
3145 /* Ingress and Egress ACL Table (Priority 65535).
3146 *
cc58e1f2
RB
3147 * Allow traffic that is related to an existing conntrack entry that
3148 * has not been marked for deletion (bit 0 of ct_label).
3149 *
3150 * This is enforced at a higher priority than ACLs can be defined.
78aab811
JP
3151 *
3152 * NOTE: This does not support related data sessions (eg,
3153 * a dynamically negotiated FTP data channel), but will allow
3154 * related traffic such as an ICMP Port Unreachable through
3155 * that's generated from a non-listening UDP port. */
880fcd14 3156 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
cc58e1f2 3157 "!ct.est && ct.rel && !ct.new && !ct.inv "
b73db61d 3158 "&& ct_label.blocked == 0",
78aab811 3159 "next;");
880fcd14 3160 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
cc58e1f2 3161 "!ct.est && ct.rel && !ct.new && !ct.inv "
b73db61d 3162 "&& ct_label.blocked == 0",
78aab811 3163 "next;");
e75451fe
ZKL
3164
3165 /* Ingress and Egress ACL Table (Priority 65535).
3166 *
3167 * Not to do conntrack on ND packets. */
3168 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, "nd", "next;");
3169 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, "nd", "next;");
78aab811
JP
3170 }
3171
3172 /* Ingress or Egress ACL Table (Various priorities). */
9975d7be
BP
3173 for (size_t i = 0; i < od->nbs->n_acls; i++) {
3174 struct nbrec_acl *acl = od->nbs->acls[i];
78aab811 3175 bool ingress = !strcmp(acl->direction, "from-lport") ? true :false;
880fcd14 3176 enum ovn_stage stage = ingress ? S_SWITCH_IN_ACL : S_SWITCH_OUT_ACL;
78aab811 3177
17bfa2aa 3178 char *stage_hint = xasprintf("%08x", acl->header_.uuid.parts[0]);
cc58e1f2
RB
3179 if (!strcmp(acl->action, "allow")
3180 || !strcmp(acl->action, "allow-related")) {
78aab811
JP
3181 /* If there are any stateful flows, we must even commit "allow"
3182 * actions. This is because, while the initiater's
3183 * direction may not have any stateful rules, the server's
3184 * may and then its return traffic would not have an
3185 * associated conntrack entry and would return "+invalid". */
cc58e1f2 3186 if (!has_stateful) {
d383eed5
JP
3187 struct ds actions = DS_EMPTY_INITIALIZER;
3188 build_acl_log(&actions, acl);
3189 ds_put_cstr(&actions, "next;");
17bfa2aa
HZ
3190 ovn_lflow_add_with_hint(lflows, od, stage,
3191 acl->priority + OVN_ACL_PRI_OFFSET,
d383eed5
JP
3192 acl->match, ds_cstr(&actions),
3193 stage_hint);
3194 ds_destroy(&actions);
cc58e1f2
RB
3195 } else {
3196 struct ds match = DS_EMPTY_INITIALIZER;
d383eed5 3197 struct ds actions = DS_EMPTY_INITIALIZER;
cc58e1f2
RB
3198
3199 /* Commit the connection tracking entry if it's a new
3200 * connection that matches this ACL. After this commit,
3201 * the reply traffic is allowed by a flow we create at
3202 * priority 65535, defined earlier.
3203 *
3204 * It's also possible that a known connection was marked for
3205 * deletion after a policy was deleted, but the policy was
3206 * re-added while that connection is still known. We catch
b73db61d 3207 * that case here and un-set ct_label.blocked (which will be done
cc58e1f2
RB
3208 * by ct_commit in the "stateful" stage) to indicate that the
3209 * connection should be allowed to resume.
3210 */
3211 ds_put_format(&match, "((ct.new && !ct.est)"
3212 " || (!ct.new && ct.est && !ct.rpl "
b73db61d 3213 "&& ct_label.blocked == 1)) "
cc58e1f2 3214 "&& (%s)", acl->match);
d383eed5
JP
3215 ds_put_cstr(&actions, REGBIT_CONNTRACK_COMMIT" = 1; ");
3216 build_acl_log(&actions, acl);
3217 ds_put_cstr(&actions, "next;");
17bfa2aa
HZ
3218 ovn_lflow_add_with_hint(lflows, od, stage,
3219 acl->priority + OVN_ACL_PRI_OFFSET,
3220 ds_cstr(&match),
d383eed5 3221 ds_cstr(&actions),
17bfa2aa 3222 stage_hint);
cc58e1f2
RB
3223
3224 /* Match on traffic in the request direction for an established
3225 * connection tracking entry that has not been marked for
3226 * deletion. There is no need to commit here, so we can just
3227 * proceed to the next table. We use this to ensure that this
3228 * connection is still allowed by the currently defined
3229 * policy. */
3230 ds_clear(&match);
d383eed5 3231 ds_clear(&actions);
cc58e1f2
RB
3232 ds_put_format(&match,
3233 "!ct.new && ct.est && !ct.rpl"
b73db61d 3234 " && ct_label.blocked == 0 && (%s)",
cc58e1f2 3235 acl->match);
d383eed5
JP
3236
3237 build_acl_log(&actions, acl);
3238 ds_put_cstr(&actions, "next;");
17bfa2aa
HZ
3239 ovn_lflow_add_with_hint(lflows, od, stage,
3240 acl->priority + OVN_ACL_PRI_OFFSET,
d383eed5 3241 ds_cstr(&match), ds_cstr(&actions),
17bfa2aa 3242 stage_hint);
cc58e1f2
RB
3243
3244 ds_destroy(&match);
d383eed5 3245 ds_destroy(&actions);
cc58e1f2
RB
3246 }
3247 } else if (!strcmp(acl->action, "drop")
3248 || !strcmp(acl->action, "reject")) {
78aab811 3249 struct ds match = DS_EMPTY_INITIALIZER;
d383eed5 3250 struct ds actions = DS_EMPTY_INITIALIZER;
78aab811 3251
cc58e1f2
RB
3252 /* XXX Need to support "reject", treat it as "drop;" for now. */
3253 if (!strcmp(acl->action, "reject")) {
3254 VLOG_INFO("reject is not a supported action");
3255 }
78aab811 3256
cc58e1f2
RB
3257 /* The implementation of "drop" differs if stateful ACLs are in
3258 * use for this datapath. In that case, the actions differ
3259 * depending on whether the connection was previously committed
3260 * to the connection tracker with ct_commit. */
3261 if (has_stateful) {
3262 /* If the packet is not part of an established connection, then
3263 * we can simply drop it. */
3264 ds_put_format(&match,
b73db61d 3265 "(!ct.est || (ct.est && ct_label.blocked == 1)) "
cc58e1f2
RB
3266 "&& (%s)",
3267 acl->match);
d383eed5
JP
3268 ds_clear(&actions);
3269 build_acl_log(&actions, acl);
3270 ds_put_cstr(&actions, "/* drop */");
17bfa2aa
HZ
3271 ovn_lflow_add_with_hint(lflows, od, stage,
3272 acl->priority + OVN_ACL_PRI_OFFSET,
d383eed5 3273 ds_cstr(&match), ds_cstr(&actions),
17bfa2aa 3274 stage_hint);
cc58e1f2
RB
3275
3276 /* For an existing connection without ct_label set, we've
3277 * encountered a policy change. ACLs previously allowed
3278 * this connection and we committed the connection tracking
3279 * entry. Current policy says that we should drop this
3280 * connection. First, we set bit 0 of ct_label to indicate
3281 * that this connection is set for deletion. By not
3282 * specifying "next;", we implicitly drop the packet after
3283 * updating conntrack state. We would normally defer
3284 * ct_commit() to the "stateful" stage, but since we're
3285 * dropping the packet, we go ahead and do it here. */
3286 ds_clear(&match);
d383eed5 3287 ds_clear(&actions);
cc58e1f2 3288 ds_put_format(&match,
b73db61d 3289 "ct.est && ct_label.blocked == 0 && (%s)",
cc58e1f2 3290 acl->match);
d383eed5
JP
3291 ds_put_cstr(&actions, "ct_commit(ct_label=1/1); ");
3292 build_acl_log(&actions, acl);
3293 ds_put_cstr(&actions, "/* drop */");
17bfa2aa
HZ
3294 ovn_lflow_add_with_hint(lflows, od, stage,
3295 acl->priority + OVN_ACL_PRI_OFFSET,
d383eed5 3296 ds_cstr(&match), ds_cstr(&actions),
17bfa2aa 3297 stage_hint);
cc58e1f2 3298
cc58e1f2
RB
3299 } else {
3300 /* There are no stateful ACLs in use on this datapath,
3301 * so a "drop" ACL is simply the "drop" logical flow action
3302 * in all cases. */
d383eed5
JP
3303 ds_clear(&actions);
3304 build_acl_log(&actions, acl);
3305 ds_put_cstr(&actions, "/* drop */");
17bfa2aa
HZ
3306 ovn_lflow_add_with_hint(lflows, od, stage,
3307 acl->priority + OVN_ACL_PRI_OFFSET,
d383eed5
JP
3308 acl->match, ds_cstr(&actions),
3309 stage_hint);
cc58e1f2 3310 }
d383eed5
JP
3311 ds_destroy(&match);
3312 ds_destroy(&actions);
78aab811 3313 }
17bfa2aa 3314 free(stage_hint);
78aab811 3315 }
281977f7
NS
3316
3317 /* Add 34000 priority flow to allow DHCP reply from ovn-controller to all
302eda27
NS
3318 * logical ports of the datapath if the CMS has configured DHCPv4 options.
3319 * */
052fa3ac
BP
3320 for (size_t i = 0; i < od->nbs->n_ports; i++) {
3321 if (od->nbs->ports[i]->dhcpv4_options) {
3322 const char *server_id = smap_get(
3323 &od->nbs->ports[i]->dhcpv4_options->options, "server_id");
3324 const char *server_mac = smap_get(
3325 &od->nbs->ports[i]->dhcpv4_options->options, "server_mac");
3326 const char *lease_time = smap_get(
3327 &od->nbs->ports[i]->dhcpv4_options->options, "lease_time");
b89d25e5 3328 if (server_id && server_mac && lease_time) {
052fa3ac
BP
3329 struct ds match = DS_EMPTY_INITIALIZER;
3330 const char *actions =
3331 has_stateful ? "ct_commit; next;" : "next;";
3332 ds_put_format(&match, "outport == \"%s\" && eth.src == %s "
3333 "&& ip4.src == %s && udp && udp.src == 67 "
3334 "&& udp.dst == 68", od->nbs->ports[i]->name,
3335 server_mac, server_id);
3336 ovn_lflow_add(
3337 lflows, od, S_SWITCH_OUT_ACL, 34000, ds_cstr(&match),
3338 actions);
75e82c17 3339 ds_destroy(&match);
281977f7 3340 }
052fa3ac 3341 }
33ac3c83 3342
052fa3ac
BP
3343 if (od->nbs->ports[i]->dhcpv6_options) {
3344 const char *server_mac = smap_get(
3345 &od->nbs->ports[i]->dhcpv6_options->options, "server_id");
3346 struct eth_addr ea;
3347 if (server_mac && eth_addr_from_string(server_mac, &ea)) {
3348 /* Get the link local IP of the DHCPv6 server from the
3349 * server MAC. */
3350 struct in6_addr lla;
3351 in6_generate_lla(ea, &lla);
3352
3353 char server_ip[INET6_ADDRSTRLEN + 1];
3354 ipv6_string_mapped(server_ip, &lla);
3355
3356 struct ds match = DS_EMPTY_INITIALIZER;
3357 const char *actions = has_stateful ? "ct_commit; next;" :
3358 "next;";
3359 ds_put_format(&match, "outport == \"%s\" && eth.src == %s "
3360 "&& ip6.src == %s && udp && udp.src == 547 "
3361 "&& udp.dst == 546", od->nbs->ports[i]->name,
3362 server_mac, server_ip);
3363 ovn_lflow_add(
3364 lflows, od, S_SWITCH_OUT_ACL, 34000, ds_cstr(&match),
3365 actions);
75e82c17 3366 ds_destroy(&match);
33ac3c83 3367 }
281977f7
NS
3368 }
3369 }
302eda27
NS
3370
3371 /* Add a 34000 priority flow to advance the DNS reply from ovn-controller,
3372 * if the CMS has configured DNS records for the datapath.
3373 */
3374 if (ls_has_dns_records(od->nbs)) {
3375 const char *actions = has_stateful ? "ct_commit; next;" : "next;";
3376 ovn_lflow_add(
3377 lflows, od, S_SWITCH_OUT_ACL, 34000, "udp.src == 53",
3378 actions);
3379 }
78aab811
JP
3380}
3381
1a03fc7d
BS
3382static void
3383build_qos(struct ovn_datapath *od, struct hmap *lflows) {
3384 ovn_lflow_add(lflows, od, S_SWITCH_IN_QOS_MARK, 0, "1", "next;");
3385 ovn_lflow_add(lflows, od, S_SWITCH_OUT_QOS_MARK, 0, "1", "next;");
3386
3387 for (size_t i = 0; i < od->nbs->n_qos_rules; i++) {
3388 struct nbrec_qos *qos = od->nbs->qos_rules[i];
3389 bool ingress = !strcmp(qos->direction, "from-lport") ? true :false;
3390 enum ovn_stage stage = ingress ? S_SWITCH_IN_QOS_MARK : S_SWITCH_OUT_QOS_MARK;
3391
3392 if (!strcmp(qos->key_action, "dscp")) {
3393 struct ds dscp_action = DS_EMPTY_INITIALIZER;
3394
3395 ds_put_format(&dscp_action, "ip.dscp = %d; next;",
3396 (uint8_t)qos->value_action);
3397 ovn_lflow_add(lflows, od, stage,
3398 qos->priority,
3399 qos->match, ds_cstr(&dscp_action));
3400 ds_destroy(&dscp_action);
3401 }
3402 }
3403}
3404
7a15be69
GS
3405static void
3406build_lb(struct ovn_datapath *od, struct hmap *lflows)
3407{
3408 /* Ingress and Egress LB Table (Priority 0): Packets are allowed by
3409 * default. */
3410 ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, 0, "1", "next;");
3411 ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, 0, "1", "next;");
3412
3413 if (od->nbs->load_balancer) {
3414 /* Ingress and Egress LB Table (Priority 65535).
3415 *
3416 * Send established traffic through conntrack for just NAT. */
3417 ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, UINT16_MAX,
3418 "ct.est && !ct.rel && !ct.new && !ct.inv",
3419 REGBIT_CONNTRACK_NAT" = 1; next;");
3420 ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, UINT16_MAX,
3421 "ct.est && !ct.rel && !ct.new && !ct.inv",
3422 REGBIT_CONNTRACK_NAT" = 1; next;");
3423 }
3424}
3425
fa313a8c
GS
3426static void
3427build_stateful(struct ovn_datapath *od, struct hmap *lflows)
3428{
3429 /* Ingress and Egress stateful Table (Priority 0): Packets are
3430 * allowed by default. */
3431 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 0, "1", "next;");
3432 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 0, "1", "next;");
3433
3434 /* If REGBIT_CONNTRACK_COMMIT is set as 1, then the packets should be
b73db61d 3435 * committed to conntrack. We always set ct_label.blocked to 0 here as
cc58e1f2
RB
3436 * any packet that makes it this far is part of a connection we
3437 * want to allow to continue. */
fa313a8c 3438 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100,
cc58e1f2 3439 REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit(ct_label=0/1); next;");
fa313a8c 3440 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100,
cc58e1f2 3441 REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit(ct_label=0/1); next;");
7a15be69
GS
3442
3443 /* If REGBIT_CONNTRACK_NAT is set as 1, then packets should just be sent
3444 * through nat (without committing).
3445 *
3446 * REGBIT_CONNTRACK_COMMIT is set for new connections and
3447 * REGBIT_CONNTRACK_NAT is set for established connections. So they
3448 * don't overlap.
3449 */
3450 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100,
3451 REGBIT_CONNTRACK_NAT" == 1", "ct_lb;");
3452 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100,
3453 REGBIT_CONNTRACK_NAT" == 1", "ct_lb;");
3454
3455 /* Load balancing rules for new connections get committed to conntrack
3456 * table. So even if REGBIT_CONNTRACK_COMMIT is set in a previous table
3457 * a higher priority rule for load balancing below also commits the
3458 * connection, so it is okay if we do not hit the above match on
3459 * REGBIT_CONNTRACK_COMMIT. */
61591ad9
GS
3460 for (int i = 0; i < od->nbs->n_load_balancer; i++) {
3461 struct nbrec_load_balancer *lb = od->nbs->load_balancer[i];
7a15be69
GS
3462 struct smap *vips = &lb->vips;
3463 struct smap_node *node;
3464
3465 SMAP_FOR_EACH (node, vips) {
3466 uint16_t port = 0;
485d373b 3467 int addr_family;
7a15be69
GS
3468
3469 /* node->key contains IP:port or just IP. */
3470 char *ip_address = NULL;
485d373b
MM
3471 ip_address_and_port_from_lb_key(node->key, &ip_address, &port,
3472 &addr_family);
7a15be69
GS
3473 if (!ip_address) {
3474 continue;
3475 }
3476
3477 /* New connections in Ingress table. */
3478 char *action = xasprintf("ct_lb(%s);", node->value);
3479 struct ds match = DS_EMPTY_INITIALIZER;
485d373b
MM
3480 if (addr_family == AF_INET) {
3481 ds_put_format(&match, "ct.new && ip4.dst == %s", ip_address);
3482 } else {
3483 ds_put_format(&match, "ct.new && ip6.dst == %s", ip_address);
3484 }
7a15be69
GS
3485 if (port) {
3486 if (lb->protocol && !strcmp(lb->protocol, "udp")) {
9784ffaf 3487 ds_put_format(&match, " && udp.dst == %d", port);
7a15be69 3488 } else {
9784ffaf 3489 ds_put_format(&match, " && tcp.dst == %d", port);
7a15be69
GS
3490 }
3491 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL,
3492 120, ds_cstr(&match), action);
3493 } else {
3494 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL,
3495 110, ds_cstr(&match), action);
3496 }
3497
7443e4ec 3498 free(ip_address);
7a15be69
GS
3499 ds_destroy(&match);
3500 free(action);
3501 }
3502 }
fa313a8c
GS
3503}
3504
bd39395f 3505static void
9975d7be
BP
3506build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
3507 struct hmap *lflows, struct hmap *mcgroups)
bd39395f 3508{
5cff6b99
BP
3509 /* This flow table structure is documented in ovn-northd(8), so please
3510 * update ovn-northd.8.xml if you change anything. */
3511
09b39248
JP
3512 struct ds match = DS_EMPTY_INITIALIZER;
3513 struct ds actions = DS_EMPTY_INITIALIZER;
3514
9975d7be 3515 /* Build pre-ACL and ACL tables for both ingress and egress.
1a03fc7d 3516 * Ingress tables 3 through 9. Egress tables 0 through 6. */
5868eb24
BP
3517 struct ovn_datapath *od;
3518 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
3519 if (!od->nbs) {
3520 continue;
3521 }
3522
9ab989b7 3523 build_pre_acls(od, lflows);
7a15be69 3524 build_pre_lb(od, lflows);
facf8652 3525 build_pre_stateful(od, lflows);
2d018f9b 3526 build_acls(od, lflows);
1a03fc7d 3527 build_qos(od, lflows);
7a15be69 3528 build_lb(od, lflows);
fa313a8c 3529 build_stateful(od, lflows);
9975d7be
BP
3530 }
3531
3532 /* Logical switch ingress table 0: Admission control framework (priority
3533 * 100). */
3534 HMAP_FOR_EACH (od, key_node, datapaths) {
3535 if (!od->nbs) {
3536 continue;
3537 }
3538
bd39395f 3539 /* Logical VLANs not supported. */
685f4dfe 3540 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "vlan.present",
091e3af9 3541 "drop;");
bd39395f
BP
3542
3543 /* Broadcast/multicast source address is invalid. */
685f4dfe 3544 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "eth.src[40]",
091e3af9 3545 "drop;");
bd39395f 3546
35060cdc
BP
3547 /* Port security flows have priority 50 (see below) and will continue
3548 * to the next table if packet source is acceptable. */
bd39395f
BP
3549 }
3550
685f4dfe
NS
3551 /* Logical switch ingress table 0: Ingress port security - L2
3552 * (priority 50).
3553 * Ingress table 1: Ingress port security - IP (priority 90 and 80)
3554 * Ingress table 2: Ingress port security - ND (priority 90 and 80)
3555 */
5868eb24
BP
3556 struct ovn_port *op;
3557 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 3558 if (!op->nbsp) {
9975d7be
BP
3559 continue;
3560 }
3561
0ee00741 3562 if (!lsp_is_enabled(op->nbsp)) {
96af668a
BP
3563 /* Drop packets from disabled logical ports (since logical flow
3564 * tables are default-drop). */
3565 continue;
3566 }
3567
09b39248 3568 ds_clear(&match);
a6095f81 3569 ds_clear(&actions);
9975d7be 3570 ds_put_format(&match, "inport == %s", op->json_key);
e93b43d6
JP
3571 build_port_security_l2("eth.src", op->ps_addrs, op->n_ps_addrs,
3572 &match);
a6095f81
BS
3573
3574 const char *queue_id = smap_get(&op->sb->options, "qdisc_queue_id");
3575 if (queue_id) {
3576 ds_put_format(&actions, "set_queue(%s); ", queue_id);
3577 }
3578 ds_put_cstr(&actions, "next;");
685f4dfe 3579 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_L2, 50,
a6095f81 3580 ds_cstr(&match), ds_cstr(&actions));
685f4dfe 3581
0ee00741 3582 if (op->nbsp->n_port_security) {
685f4dfe
NS
3583 build_port_security_ip(P_IN, op, lflows);
3584 build_port_security_nd(op, lflows);
3585 }
3586 }
3587
3588 /* Ingress table 1 and 2: Port security - IP and ND, by default goto next.
3589 * (priority 0)*/
3590 HMAP_FOR_EACH (od, key_node, datapaths) {
3591 if (!od->nbs) {
3592 continue;
3593 }
3594
3595 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_ND, 0, "1", "next;");
3596 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_IP, 0, "1", "next;");
5868eb24 3597 }
445a266a 3598
1a03fc7d 3599 /* Ingress table 10: ARP/ND responder, skip requests coming from localnet
0b077281
RR
3600 * and vtep ports. (priority 100); see ovn-northd.8.xml for the
3601 * rationale. */
fa128126 3602 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 3603 if (!op->nbsp) {
fa128126
HZ
3604 continue;
3605 }
3606
0b077281
RR
3607 if ((!strcmp(op->nbsp->type, "localnet")) ||
3608 (!strcmp(op->nbsp->type, "vtep"))) {
09b39248
JP
3609 ds_clear(&match);
3610 ds_put_format(&match, "inport == %s", op->json_key);
e75451fe 3611 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100,
09b39248 3612 ds_cstr(&match), "next;");
fa128126
HZ
3613 }
3614 }
3615
1a03fc7d 3616 /* Ingress table 10: ARP/ND responder, reply for known IPs.
fa128126 3617 * (priority 50). */
57d143eb 3618 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 3619 if (!op->nbsp) {
57d143eb
HZ
3620 continue;
3621 }
3622
4c7bf534 3623 /*
e75451fe 3624 * Add ARP/ND reply flows if either the
4c7bf534 3625 * - port is up or
2a38ef45
DA
3626 * - port type is router or
3627 * - port type is localport
4c7bf534 3628 */
2a38ef45
DA
3629 if (!lsp_is_up(op->nbsp) && strcmp(op->nbsp->type, "router") &&
3630 strcmp(op->nbsp->type, "localport")) {
4c7bf534
NS
3631 continue;
3632 }
3633
e93b43d6
JP
3634 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
3635 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
09b39248 3636 ds_clear(&match);
e93b43d6
JP
3637 ds_put_format(&match, "arp.tpa == %s && arp.op == 1",
3638 op->lsp_addrs[i].ipv4_addrs[j].addr_s);
09b39248
JP
3639 ds_clear(&actions);
3640 ds_put_format(&actions,
57d143eb 3641 "eth.dst = eth.src; "
e93b43d6 3642 "eth.src = %s; "
57d143eb
HZ
3643 "arp.op = 2; /* ARP reply */ "
3644 "arp.tha = arp.sha; "
e93b43d6 3645 "arp.sha = %s; "
57d143eb 3646 "arp.tpa = arp.spa; "
e93b43d6 3647 "arp.spa = %s; "
57d143eb 3648 "outport = inport; "
bf143492 3649 "flags.loopback = 1; "
57d143eb 3650 "output;",
e93b43d6
JP
3651 op->lsp_addrs[i].ea_s, op->lsp_addrs[i].ea_s,
3652 op->lsp_addrs[i].ipv4_addrs[j].addr_s);
e75451fe 3653 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50,
09b39248 3654 ds_cstr(&match), ds_cstr(&actions));
9fcb6a18
BP
3655
3656 /* Do not reply to an ARP request from the port that owns the
3657 * address (otherwise a DHCP client that ARPs to check for a
3658 * duplicate address will fail). Instead, forward it the usual
3659 * way.
3660 *
3661 * (Another alternative would be to simply drop the packet. If
3662 * everything is working as it is configured, then this would
3663 * produce equivalent results, since no one should reply to the
3664 * request. But ARPing for one's own IP address is intended to
3665 * detect situations where the network is not working as
3666 * configured, so dropping the request would frustrate that
3667 * intent.) */
3668 ds_put_format(&match, " && inport == %s", op->json_key);
3669 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100,
3670 ds_cstr(&match), "next;");
57d143eb 3671 }
7dc88496 3672
6fdb7cd6
JP
3673 /* For ND solicitations, we need to listen for both the
3674 * unicast IPv6 address and its all-nodes multicast address,
3675 * but always respond with the unicast IPv6 address. */
3676 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
09b39248 3677 ds_clear(&match);
6fdb7cd6
JP
3678 ds_put_format(&match,
3679 "nd_ns && ip6.dst == {%s, %s} && nd.target == %s",
3680 op->lsp_addrs[i].ipv6_addrs[j].addr_s,
3681 op->lsp_addrs[i].ipv6_addrs[j].sn_addr_s,
3682 op->lsp_addrs[i].ipv6_addrs[j].addr_s);
3683
09b39248
JP
3684 ds_clear(&actions);
3685 ds_put_format(&actions,
6fdb7cd6
JP
3686 "nd_na { "
3687 "eth.src = %s; "
3688 "ip6.src = %s; "
3689 "nd.target = %s; "
3690 "nd.tll = %s; "
3691 "outport = inport; "
bf143492 3692 "flags.loopback = 1; "
6fdb7cd6
JP
3693 "output; "
3694 "};",
3695 op->lsp_addrs[i].ea_s,
3696 op->lsp_addrs[i].ipv6_addrs[j].addr_s,
3697 op->lsp_addrs[i].ipv6_addrs[j].addr_s,
3698 op->lsp_addrs[i].ea_s);
e75451fe 3699 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50,
09b39248 3700 ds_cstr(&match), ds_cstr(&actions));
9fcb6a18
BP
3701
3702 /* Do not reply to a solicitation from the port that owns the
3703 * address (otherwise DAD detection will fail). */
3704 ds_put_format(&match, " && inport == %s", op->json_key);
3705 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100,
3706 ds_cstr(&match), "next;");
e75451fe 3707 }
57d143eb
HZ
3708 }
3709 }
3710
1a03fc7d 3711 /* Ingress table 10: ARP/ND responder, by default goto next.
fa128126
HZ
3712 * (priority 0)*/
3713 HMAP_FOR_EACH (od, key_node, datapaths) {
3714 if (!od->nbs) {
3715 continue;
3716 }
3717
e75451fe 3718 ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_ND_RSP, 0, "1", "next;");
fa128126
HZ
3719 }
3720
1a03fc7d 3721 /* Logical switch ingress table 11 and 12: DHCP options and response
281977f7
NS
3722 * priority 100 flows. */
3723 HMAP_FOR_EACH (op, key_node, ports) {
3724 if (!op->nbsp) {
3725 continue;
3726 }
3727
3728 if (!lsp_is_enabled(op->nbsp) || !strcmp(op->nbsp->type, "router")) {
3729 /* Don't add the DHCP flows if the port is not enabled or if the
3730 * port is a router port. */
3731 continue;
3732 }
3733
33ac3c83
NS
3734 if (!op->nbsp->dhcpv4_options && !op->nbsp->dhcpv6_options) {
3735 /* CMS has disabled both native DHCPv4 and DHCPv6 for this lport.
3736 */
281977f7
NS
3737 continue;
3738 }
3739
3740 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
3741 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
3742 struct ds options_action = DS_EMPTY_INITIALIZER;
3743 struct ds response_action = DS_EMPTY_INITIALIZER;
213615b3 3744 struct ds ipv4_addr_match = DS_EMPTY_INITIALIZER;
281977f7
NS
3745 if (build_dhcpv4_action(
3746 op, op->lsp_addrs[i].ipv4_addrs[j].addr,
213615b3 3747 &options_action, &response_action, &ipv4_addr_match)) {
71f21279 3748 ds_clear(&match);
281977f7
NS
3749 ds_put_format(
3750 &match, "inport == %s && eth.src == %s && "
3751 "ip4.src == 0.0.0.0 && ip4.dst == 255.255.255.255 && "
3752 "udp.src == 68 && udp.dst == 67", op->json_key,
3753 op->lsp_addrs[i].ea_s);
3754
3755 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS,
3756 100, ds_cstr(&match),
3757 ds_cstr(&options_action));
213615b3
NS
3758 ds_clear(&match);
3759 /* Allow ip4.src = OFFER_IP and
3760 * ip4.dst = {SERVER_IP, 255.255.255.255} for the below
3761 * cases
3762 * - When the client wants to renew the IP by sending
3763 * the DHCPREQUEST to the server ip.
3764 * - When the client wants to renew the IP by
3765 * broadcasting the DHCPREQUEST.
3766 */
3767 ds_put_format(
3768 &match, "inport == %s && eth.src == %s && "
3769 "%s && udp.src == 68 && udp.dst == 67", op->json_key,
3770 op->lsp_addrs[i].ea_s, ds_cstr(&ipv4_addr_match));
3771
3772 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS,
3773 100, ds_cstr(&match),
3774 ds_cstr(&options_action));
3775 ds_clear(&match);
3776
281977f7 3777 /* If REGBIT_DHCP_OPTS_RESULT is set, it means the
213615b3
NS
3778 * put_dhcp_opts action is successful. */
3779 ds_put_format(
3780 &match, "inport == %s && eth.src == %s && "
3781 "ip4 && udp.src == 68 && udp.dst == 67"
3782 " && "REGBIT_DHCP_OPTS_RESULT, op->json_key,
3783 op->lsp_addrs[i].ea_s);
281977f7
NS
3784 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_RESPONSE,
3785 100, ds_cstr(&match),
3786 ds_cstr(&response_action));
281977f7
NS
3787 ds_destroy(&options_action);
3788 ds_destroy(&response_action);
213615b3 3789 ds_destroy(&ipv4_addr_match);
281977f7
NS
3790 break;
3791 }
3792 }
33ac3c83
NS
3793
3794 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
3795 struct ds options_action = DS_EMPTY_INITIALIZER;
3796 struct ds response_action = DS_EMPTY_INITIALIZER;
3797 if (build_dhcpv6_action(
3798 op, &op->lsp_addrs[i].ipv6_addrs[j].addr,
3799 &options_action, &response_action)) {
71f21279 3800 ds_clear(&match);
33ac3c83
NS
3801 ds_put_format(
3802 &match, "inport == %s && eth.src == %s"
3803 " && ip6.dst == ff02::1:2 && udp.src == 546 &&"
3804 " udp.dst == 547", op->json_key,
3805 op->lsp_addrs[i].ea_s);
3806
3807 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS, 100,
3808 ds_cstr(&match), ds_cstr(&options_action));
3809
3810 /* If REGBIT_DHCP_OPTS_RESULT is set to 1, it means the
3811 * put_dhcpv6_opts action is successful */
3812 ds_put_cstr(&match, " && "REGBIT_DHCP_OPTS_RESULT);
3813 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_RESPONSE, 100,
3814 ds_cstr(&match), ds_cstr(&response_action));
33ac3c83
NS
3815 ds_destroy(&options_action);
3816 ds_destroy(&response_action);
3817 break;
3818 }
3819 }
281977f7
NS
3820 }
3821 }
3822
302eda27
NS
3823 /* Logical switch ingress table 13 and 14: DNS lookup and response
3824 * priority 100 flows.
3825 */
3826 HMAP_FOR_EACH (od, key_node, datapaths) {
3827 if (!od->nbs || !ls_has_dns_records(od->nbs)) {
3828 continue;
3829 }
3830
71f21279
BP
3831 struct ds action = DS_EMPTY_INITIALIZER;
3832
3833 ds_clear(&match);
302eda27
NS
3834 ds_put_cstr(&match, "udp.dst == 53");
3835 ds_put_format(&action,
3836 REGBIT_DNS_LOOKUP_RESULT" = dns_lookup(); next;");
3837 ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_LOOKUP, 100,
3838 ds_cstr(&match), ds_cstr(&action));
3839 ds_clear(&action);
3840 ds_put_cstr(&match, " && "REGBIT_DNS_LOOKUP_RESULT);
3841 ds_put_format(&action, "eth.dst <-> eth.src; ip4.src <-> ip4.dst; "
3842 "udp.dst = udp.src; udp.src = 53; outport = inport; "
3843 "flags.loopback = 1; output;");
3844 ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_RESPONSE, 100,
3845 ds_cstr(&match), ds_cstr(&action));
3846 ds_clear(&action);
3847 ds_put_format(&action, "eth.dst <-> eth.src; ip6.src <-> ip6.dst; "
3848 "udp.dst = udp.src; udp.src = 53; outport = inport; "
3849 "flags.loopback = 1; output;");
3850 ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_RESPONSE, 100,
3851 ds_cstr(&match), ds_cstr(&action));
302eda27
NS
3852 ds_destroy(&action);
3853 }
3854
1a03fc7d 3855 /* Ingress table 11 and 12: DHCP options and response, by default goto next.
302eda27
NS
3856 * (priority 0).
3857 * Ingress table 13 and 14: DNS lookup and response, by default goto next.
3858 * (priority 0).*/
281977f7
NS
3859
3860 HMAP_FOR_EACH (od, key_node, datapaths) {
3861 if (!od->nbs) {
3862 continue;
3863 }
3864
3865 ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_OPTIONS, 0, "1", "next;");
3866 ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_RESPONSE, 0, "1", "next;");
302eda27
NS
3867 ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_LOOKUP, 0, "1", "next;");
3868 ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_RESPONSE, 0, "1", "next;");
281977f7
NS
3869 }
3870
302eda27 3871 /* Ingress table 15: Destination lookup, broadcast and multicast handling
5868eb24
BP
3872 * (priority 100). */
3873 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 3874 if (!op->nbsp) {
9975d7be
BP
3875 continue;
3876 }
3877
0ee00741 3878 if (lsp_is_enabled(op->nbsp)) {
9975d7be 3879 ovn_multicast_add(mcgroups, &mc_flood, op);
445a266a 3880 }
5868eb24
BP
3881 }
3882 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
3883 if (!od->nbs) {
3884 continue;
3885 }
3886
3887 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 100, "eth.mcast",
5868eb24 3888 "outport = \""MC_FLOOD"\"; output;");
bd39395f 3889 }
bd39395f 3890
1a03fc7d 3891 /* Ingress table 13: Destination lookup, unicast handling (priority 50), */
5868eb24 3892 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 3893 if (!op->nbsp) {
9975d7be
BP
3894 continue;
3895 }
3896
0ee00741 3897 for (size_t i = 0; i < op->nbsp->n_addresses; i++) {
10c3fcdf 3898 /* Addresses are owned by the logical port.
3899 * Ethernet address followed by zero or more IPv4
3900 * or IPv6 addresses (or both). */
74ff3298 3901 struct eth_addr mac;
10c3fcdf 3902 if (ovs_scan(op->nbsp->addresses[i],
3903 ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) {
09b39248 3904 ds_clear(&match);
9975d7be
BP
3905 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
3906 ETH_ADDR_ARGS(mac));
5868eb24 3907
09b39248 3908 ds_clear(&actions);
9975d7be
BP
3909 ds_put_format(&actions, "outport = %s; output;", op->json_key);
3910 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
5868eb24 3911 ds_cstr(&match), ds_cstr(&actions));
0ee00741
HK
3912 } else if (!strcmp(op->nbsp->addresses[i], "unknown")) {
3913 if (lsp_is_enabled(op->nbsp)) {
9975d7be 3914 ovn_multicast_add(mcgroups, &mc_unknown, op);
96af668a
BP
3915 op->od->has_unknown = true;
3916 }
6374d518 3917 } else if (is_dynamic_lsp_address(op->nbsp->addresses[i])) {
8639f9be 3918 if (!op->nbsp->dynamic_addresses
10c3fcdf 3919 || !ovs_scan(op->nbsp->dynamic_addresses,
3920 ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) {
8639f9be
ND
3921 continue;
3922 }
3923 ds_clear(&match);
3924 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
3925 ETH_ADDR_ARGS(mac));
3926
3927 ds_clear(&actions);
3928 ds_put_format(&actions, "outport = %s; output;", op->json_key);
3929 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
20418099
MS
3930 ds_cstr(&match), ds_cstr(&actions));
3931 } else if (!strcmp(op->nbsp->addresses[i], "router")) {
3932 if (!op->peer || !op->peer->nbrp
3933 || !ovs_scan(op->peer->nbrp->mac,
3934 ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) {
3935 continue;
3936 }
3937 ds_clear(&match);
3938 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
3939 ETH_ADDR_ARGS(mac));
41a15b71
MS
3940 if (op->peer->od->l3dgw_port
3941 && op->peer == op->peer->od->l3dgw_port
3942 && op->peer->od->l3redirect_port) {
3943 /* The destination lookup flow for the router's
3944 * distributed gateway port MAC address should only be
3945 * programmed on the "redirect-chassis". */
3946 ds_put_format(&match, " && is_chassis_resident(%s)",
3947 op->peer->od->l3redirect_port->json_key);
3948 }
20418099
MS
3949
3950 ds_clear(&actions);
3951 ds_put_format(&actions, "outport = %s; output;", op->json_key);
3952 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
8639f9be 3953 ds_cstr(&match), ds_cstr(&actions));
06a26dd2
MS
3954
3955 /* Add ethernet addresses specified in NAT rules on
3956 * distributed logical routers. */
3957 if (op->peer->od->l3dgw_port
3958 && op->peer == op->peer->od->l3dgw_port) {
71f21279 3959 for (int j = 0; j < op->peer->od->nbr->n_nat; j++) {
06a26dd2 3960 const struct nbrec_nat *nat
71f21279 3961 = op->peer->od->nbr->nat[j];
06a26dd2
MS
3962 if (!strcmp(nat->type, "dnat_and_snat")
3963 && nat->logical_port && nat->external_mac
3964 && eth_addr_from_string(nat->external_mac, &mac)) {
3965
3966 ds_clear(&match);
3967 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT
3968 " && is_chassis_resident(\"%s\")",
3969 ETH_ADDR_ARGS(mac),
3970 nat->logical_port);
3971
3972 ds_clear(&actions);
3973 ds_put_format(&actions, "outport = %s; output;",
3974 op->json_key);
3975 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP,
3976 50, ds_cstr(&match),
3977 ds_cstr(&actions));
3978 }
3979 }
3980 }
5868eb24
BP
3981 } else {
3982 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
445a266a 3983
2fa326a3
BP
3984 VLOG_INFO_RL(&rl,
3985 "%s: invalid syntax '%s' in addresses column",
0ee00741 3986 op->nbsp->name, op->nbsp->addresses[i]);
445a266a
BP
3987 }
3988 }
bd39395f
BP
3989 }
3990
1a03fc7d 3991 /* Ingress table 13: Destination lookup for unknown MACs (priority 0). */
5868eb24 3992 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
3993 if (!od->nbs) {
3994 continue;
3995 }
3996
5868eb24 3997 if (od->has_unknown) {
9975d7be 3998 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1",
5868eb24 3999 "outport = \""MC_UNKNOWN"\"; output;");
445a266a 4000 }
bd39395f
BP
4001 }
4002
94300e09
JP
4003 /* Egress tables 6: Egress port security - IP (priority 0)
4004 * Egress table 7: Egress port security L2 - multicast/broadcast
4005 * (priority 100). */
5868eb24 4006 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
4007 if (!od->nbs) {
4008 continue;
4009 }
4010
685f4dfe
NS
4011 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_IP, 0, "1", "next;");
4012 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_L2, 100, "eth.mcast",
091e3af9 4013 "output;");
48f42f3a
RB
4014 }
4015
94300e09 4016 /* Egress table 6: Egress port security - IP (priorities 90 and 80)
685f4dfe
NS
4017 * if port security enabled.
4018 *
94300e09 4019 * Egress table 7: Egress port security - L2 (priorities 50 and 150).
d770a830
BP
4020 *
4021 * Priority 50 rules implement port security for enabled logical port.
4022 *
4023 * Priority 150 rules drop packets to disabled logical ports, so that they
4024 * don't even receive multicast or broadcast packets. */
5868eb24 4025 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 4026 if (!op->nbsp) {
9975d7be
BP
4027 continue;
4028 }
4029
09b39248 4030 ds_clear(&match);
9975d7be 4031 ds_put_format(&match, "outport == %s", op->json_key);
0ee00741 4032 if (lsp_is_enabled(op->nbsp)) {
e93b43d6
JP
4033 build_port_security_l2("eth.dst", op->ps_addrs, op->n_ps_addrs,
4034 &match);
685f4dfe 4035 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 50,
d770a830
BP
4036 ds_cstr(&match), "output;");
4037 } else {
685f4dfe 4038 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 150,
d770a830
BP
4039 ds_cstr(&match), "drop;");
4040 }
eb00399e 4041
0ee00741 4042 if (op->nbsp->n_port_security) {
685f4dfe
NS
4043 build_port_security_ip(P_OUT, op, lflows);
4044 }
eb00399e 4045 }
09b39248
JP
4046
4047 ds_destroy(&match);
4048 ds_destroy(&actions);
9975d7be 4049}
eb00399e 4050
9975d7be
BP
4051static bool
4052lrport_is_enabled(const struct nbrec_logical_router_port *lrport)
4053{
4054 return !lrport->enabled || *lrport->enabled;
4055}
4056
4685e523
JP
4057/* Returns a string of the IP address of the router port 'op' that
4058 * overlaps with 'ip_s". If one is not found, returns NULL.
4059 *
4060 * The caller must not free the returned string. */
4061static const char *
4062find_lrp_member_ip(const struct ovn_port *op, const char *ip_s)
4063{
6fdb7cd6 4064 bool is_ipv4 = strchr(ip_s, '.') ? true : false;
4685e523 4065
6fdb7cd6
JP
4066 if (is_ipv4) {
4067 ovs_be32 ip;
4685e523 4068
6fdb7cd6
JP
4069 if (!ip_parse(ip_s, &ip)) {
4070 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4071 VLOG_WARN_RL(&rl, "bad ip address %s", ip_s);
4072 return NULL;
4073 }
4685e523 4074
6fdb7cd6
JP
4075 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
4076 const struct ipv4_netaddr *na = &op->lrp_networks.ipv4_addrs[i];
4077
4078 if (!((na->network ^ ip) & na->mask)) {
4079 /* There should be only 1 interface that matches the
4080 * supplied IP. Otherwise, it's a configuration error,
4081 * because subnets of a router's interfaces should NOT
4082 * overlap. */
4083 return na->addr_s;
4084 }
4085 }
4086 } else {
4087 struct in6_addr ip6;
4088
4089 if (!ipv6_parse(ip_s, &ip6)) {
4090 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4091 VLOG_WARN_RL(&rl, "bad ipv6 address %s", ip_s);
4092 return NULL;
4093 }
4094
4095 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
4096 const struct ipv6_netaddr *na = &op->lrp_networks.ipv6_addrs[i];
4097 struct in6_addr xor_addr = ipv6_addr_bitxor(&na->network, &ip6);
4098 struct in6_addr and_addr = ipv6_addr_bitand(&xor_addr, &na->mask);
4099
4100 if (ipv6_is_zero(&and_addr)) {
4101 /* There should be only 1 interface that matches the
4102 * supplied IP. Otherwise, it's a configuration error,
4103 * because subnets of a router's interfaces should NOT
4104 * overlap. */
4105 return na->addr_s;
4106 }
4685e523
JP
4107 }
4108 }
4109
4110 return NULL;
4111}
4112
9975d7be 4113static void
0bac7164 4114add_route(struct hmap *lflows, const struct ovn_port *op,
4685e523 4115 const char *lrp_addr_s, const char *network_s, int plen,
440a9f4b 4116 const char *gateway, const char *policy)
9975d7be 4117{
6fdb7cd6 4118 bool is_ipv4 = strchr(network_s, '.') ? true : false;
a63f7235 4119 struct ds match = DS_EMPTY_INITIALIZER;
440a9f4b
GS
4120 const char *dir;
4121 uint16_t priority;
4122
4123 if (policy && !strcmp(policy, "src-ip")) {
4124 dir = "src";
4125 priority = plen * 2;
4126 } else {
4127 dir = "dst";
4128 priority = (plen * 2) + 1;
4129 }
6fdb7cd6 4130
a63f7235
JP
4131 /* IPv6 link-local addresses must be scoped to the local router port. */
4132 if (!is_ipv4) {
4133 struct in6_addr network;
4134 ovs_assert(ipv6_parse(network_s, &network));
4135 if (in6_is_lla(&network)) {
4136 ds_put_format(&match, "inport == %s && ", op->json_key);
4137 }
4138 }
440a9f4b 4139 ds_put_format(&match, "ip%s.%s == %s/%d", is_ipv4 ? "4" : "6", dir,
a63f7235 4140 network_s, plen);
9975d7be
BP
4141
4142 struct ds actions = DS_EMPTY_INITIALIZER;
6fdb7cd6
JP
4143 ds_put_format(&actions, "ip.ttl--; %sreg0 = ", is_ipv4 ? "" : "xx");
4144
9975d7be 4145 if (gateway) {
c9bdf7bd 4146 ds_put_cstr(&actions, gateway);
9975d7be 4147 } else {
6fdb7cd6 4148 ds_put_format(&actions, "ip%s.dst", is_ipv4 ? "4" : "6");
9975d7be 4149 }
4685e523 4150 ds_put_format(&actions, "; "
6fdb7cd6 4151 "%sreg1 = %s; "
4685e523 4152 "eth.src = %s; "
0bac7164 4153 "outport = %s; "
bf143492 4154 "flags.loopback = 1; "
0bac7164 4155 "next;",
6fdb7cd6 4156 is_ipv4 ? "" : "xx",
4685e523
JP
4157 lrp_addr_s,
4158 op->lrp_networks.ea_s,
4159 op->json_key);
9975d7be
BP
4160
4161 /* The priority here is calculated to implement longest-prefix-match
4162 * routing. */
440a9f4b 4163 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_ROUTING, priority,
a63f7235
JP
4164 ds_cstr(&match), ds_cstr(&actions));
4165 ds_destroy(&match);
9975d7be 4166 ds_destroy(&actions);
9975d7be
BP
4167}
4168
28dc3fe9
SR
4169static void
4170build_static_route_flow(struct hmap *lflows, struct ovn_datapath *od,
4171 struct hmap *ports,
4172 const struct nbrec_logical_router_static_route *route)
4173{
6fdb7cd6 4174 ovs_be32 nexthop;
56f9a57a 4175 const char *lrp_addr_s = NULL;
6fdb7cd6
JP
4176 unsigned int plen;
4177 bool is_ipv4;
28dc3fe9 4178
6fdb7cd6
JP
4179 /* Verify that the next hop is an IP address with an all-ones mask. */
4180 char *error = ip_parse_cidr(route->nexthop, &nexthop, &plen);
4181 if (!error) {
4182 if (plen != 32) {
4183 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4184 VLOG_WARN_RL(&rl, "bad next hop mask %s", route->nexthop);
4185 return;
4186 }
4187 is_ipv4 = true;
4188 } else {
28dc3fe9 4189 free(error);
6fdb7cd6
JP
4190
4191 struct in6_addr ip6;
71f21279 4192 error = ipv6_parse_cidr(route->nexthop, &ip6, &plen);
6fdb7cd6
JP
4193 if (!error) {
4194 if (plen != 128) {
4195 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4196 VLOG_WARN_RL(&rl, "bad next hop mask %s", route->nexthop);
4197 return;
4198 }
4199 is_ipv4 = false;
4200 } else {
4201 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4202 VLOG_WARN_RL(&rl, "bad next hop ip address %s", route->nexthop);
4203 free(error);
4204 return;
4205 }
28dc3fe9
SR
4206 }
4207
6fdb7cd6
JP
4208 char *prefix_s;
4209 if (is_ipv4) {
4210 ovs_be32 prefix;
4211 /* Verify that ip prefix is a valid IPv4 address. */
4212 error = ip_parse_cidr(route->ip_prefix, &prefix, &plen);
4213 if (error) {
4214 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4215 VLOG_WARN_RL(&rl, "bad 'ip_prefix' in static routes %s",
4216 route->ip_prefix);
4217 free(error);
4218 return;
4219 }
4220 prefix_s = xasprintf(IP_FMT, IP_ARGS(prefix & be32_prefix_mask(plen)));
4221 } else {
4222 /* Verify that ip prefix is a valid IPv6 address. */
4223 struct in6_addr prefix;
4224 error = ipv6_parse_cidr(route->ip_prefix, &prefix, &plen);
4225 if (error) {
4226 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4227 VLOG_WARN_RL(&rl, "bad 'ip_prefix' in static routes %s",
4228 route->ip_prefix);
4229 free(error);
4230 return;
4231 }
4232 struct in6_addr mask = ipv6_create_mask(plen);
4233 struct in6_addr network = ipv6_addr_bitand(&prefix, &mask);
4234 prefix_s = xmalloc(INET6_ADDRSTRLEN);
4235 inet_ntop(AF_INET6, &network, prefix_s, INET6_ADDRSTRLEN);
28dc3fe9
SR
4236 }
4237
4238 /* Find the outgoing port. */
4239 struct ovn_port *out_port = NULL;
4240 if (route->output_port) {
4241 out_port = ovn_port_find(ports, route->output_port);
4242 if (!out_port) {
4243 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4244 VLOG_WARN_RL(&rl, "Bad out port %s for static route %s",
4245 route->output_port, route->ip_prefix);
6fdb7cd6 4246 goto free_prefix_s;
28dc3fe9 4247 }
4685e523 4248 lrp_addr_s = find_lrp_member_ip(out_port, route->nexthop);
8dd5b512
GS
4249 if (!lrp_addr_s) {
4250 /* There are no IP networks configured on the router's port via
4251 * which 'route->nexthop' is theoretically reachable. But since
4252 * 'out_port' has been specified, we honor it by trying to reach
4253 * 'route->nexthop' via the first IP address of 'out_port'.
4254 * (There are cases, e.g in GCE, where each VM gets a /32 IP
4255 * address and the default gateway is still reachable from it.) */
4256 if (is_ipv4) {
4257 if (out_port->lrp_networks.n_ipv4_addrs) {
4258 lrp_addr_s = out_port->lrp_networks.ipv4_addrs[0].addr_s;
4259 }
4260 } else {
4261 if (out_port->lrp_networks.n_ipv6_addrs) {
4262 lrp_addr_s = out_port->lrp_networks.ipv6_addrs[0].addr_s;
4263 }
4264 }
4265 }
28dc3fe9
SR
4266 } else {
4267 /* output_port is not specified, find the
4268 * router port matching the next hop. */
4269 int i;
4270 for (i = 0; i < od->nbr->n_ports; i++) {
4271 struct nbrec_logical_router_port *lrp = od->nbr->ports[i];
4272 out_port = ovn_port_find(ports, lrp->name);
4273 if (!out_port) {
4274 /* This should not happen. */
4275 continue;
4276 }
4277
4685e523
JP
4278 lrp_addr_s = find_lrp_member_ip(out_port, route->nexthop);
4279 if (lrp_addr_s) {
28dc3fe9
SR
4280 break;
4281 }
4282 }
28dc3fe9
SR
4283 }
4284
7d170821 4285 if (!out_port || !lrp_addr_s) {
4685e523
JP
4286 /* There is no matched out port. */
4287 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4288 VLOG_WARN_RL(&rl, "No path for static route %s; next hop %s",
4289 route->ip_prefix, route->nexthop);
6fdb7cd6 4290 goto free_prefix_s;
4685e523
JP
4291 }
4292
440a9f4b
GS
4293 char *policy = route->policy ? route->policy : "dst-ip";
4294 add_route(lflows, out_port, lrp_addr_s, prefix_s, plen, route->nexthop,
4295 policy);
6fdb7cd6
JP
4296
4297free_prefix_s:
c9bdf7bd 4298 free(prefix_s);
28dc3fe9
SR
4299}
4300
4685e523 4301static void
6fdb7cd6 4302op_put_v4_networks(struct ds *ds, const struct ovn_port *op, bool add_bcast)
4685e523
JP
4303{
4304 if (!add_bcast && op->lrp_networks.n_ipv4_addrs == 1) {
4305 ds_put_format(ds, "%s", op->lrp_networks.ipv4_addrs[0].addr_s);
4306 return;
4307 }
4308
4309 ds_put_cstr(ds, "{");
4310 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
4311 ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].addr_s);
4312 if (add_bcast) {
4313 ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].bcast_s);
4314 }
4315 }
4316 ds_chomp(ds, ' ');
4317 ds_chomp(ds, ',');
4318 ds_put_cstr(ds, "}");
4319}
4320
6fdb7cd6
JP
4321static void
4322op_put_v6_networks(struct ds *ds, const struct ovn_port *op)
4323{
4324 if (op->lrp_networks.n_ipv6_addrs == 1) {
4325 ds_put_format(ds, "%s", op->lrp_networks.ipv6_addrs[0].addr_s);
4326 return;
4327 }
4328
4329 ds_put_cstr(ds, "{");
4330 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
4331 ds_put_format(ds, "%s, ", op->lrp_networks.ipv6_addrs[i].addr_s);
4332 }
4333 ds_chomp(ds, ' ');
4334 ds_chomp(ds, ',');
4335 ds_put_cstr(ds, "}");
4336}
4337
65d8810c
GS
4338static const char *
4339get_force_snat_ip(struct ovn_datapath *od, const char *key_type, ovs_be32 *ip)
4340{
4341 char *key = xasprintf("%s_force_snat_ip", key_type);
4342 const char *ip_address = smap_get(&od->nbr->options, key);
4343 free(key);
4344
4345 if (ip_address) {
4346 ovs_be32 mask;
4347 char *error = ip_parse_masked(ip_address, ip, &mask);
4348 if (error || mask != OVS_BE32_MAX) {
4349 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4350 VLOG_WARN_RL(&rl, "bad ip %s in options of router "UUID_FMT"",
4351 ip_address, UUID_ARGS(&od->key));
4352 free(error);
4353 *ip = 0;
4354 return NULL;
4355 }
4356 return ip_address;
4357 }
4358
4359 *ip = 0;
4360 return NULL;
4361}
4362
4363static void
4364add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od,
4365 struct ds *match, struct ds *actions, int priority,
6f39e18d 4366 const char *lb_force_snat_ip, char *backend_ips,
485d373b 4367 bool is_udp, int addr_family)
65d8810c
GS
4368{
4369 /* A match and actions for new connections. */
4370 char *new_match = xasprintf("ct.new && %s", ds_cstr(match));
4371 if (lb_force_snat_ip) {
4372 char *new_actions = xasprintf("flags.force_snat_for_lb = 1; %s",
4373 ds_cstr(actions));
4374 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, new_match,
4375 new_actions);
4376 free(new_actions);
4377 } else {
4378 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, new_match,
4379 ds_cstr(actions));
4380 }
4381
4382 /* A match and actions for established connections. */
4383 char *est_match = xasprintf("ct.est && %s", ds_cstr(match));
4384 if (lb_force_snat_ip) {
4385 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, est_match,
4386 "flags.force_snat_for_lb = 1; ct_dnat;");
4387 } else {
4388 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, est_match,
4389 "ct_dnat;");
4390 }
4391
4392 free(new_match);
4393 free(est_match);
6f39e18d 4394
485d373b
MM
4395 if (!od->l3dgw_port || !od->l3redirect_port || !backend_ips
4396 || addr_family != AF_INET) {
6f39e18d
NS
4397 return;
4398 }
4399
4400 /* Add logical flows to UNDNAT the load balanced reverse traffic in
4401 * the router egress pipleine stage - S_ROUTER_OUT_UNDNAT if the logical
4402 * router has a gateway router port associated.
4403 */
4404 struct ds undnat_match = DS_EMPTY_INITIALIZER;
4405 ds_put_cstr(&undnat_match, "ip4 && (");
4406 char *start, *next, *ip_str;
4407 start = next = xstrdup(backend_ips);
4408 ip_str = strsep(&next, ",");
4409 bool backend_ips_found = false;
4410 while (ip_str && ip_str[0]) {
4411 char *ip_address = NULL;
4412 uint16_t port = 0;
485d373b
MM
4413 int addr_family;
4414 ip_address_and_port_from_lb_key(ip_str, &ip_address, &port,
4415 &addr_family);
6f39e18d
NS
4416 if (!ip_address) {
4417 break;
4418 }
4419
4420 ds_put_format(&undnat_match, "(ip4.src == %s", ip_address);
4421 free(ip_address);
4422 if (port) {
4423 ds_put_format(&undnat_match, " && %s.src == %d) || ",
4424 is_udp ? "udp" : "tcp", port);
4425 } else {
4426 ds_put_cstr(&undnat_match, ") || ");
4427 }
4428 ip_str = strsep(&next, ",");
4429 backend_ips_found = true;
4430 }
4431
4432 free(start);
4433 if (!backend_ips_found) {
4434 ds_destroy(&undnat_match);
4435 return;
4436 }
4437 ds_chomp(&undnat_match, ' ');
4438 ds_chomp(&undnat_match, '|');
4439 ds_chomp(&undnat_match, '|');
4440 ds_chomp(&undnat_match, ' ');
4441 ds_put_format(&undnat_match, ") && outport == %s && "
4442 "is_chassis_resident(%s)", od->l3dgw_port->json_key,
4443 od->l3redirect_port->json_key);
4444 if (lb_force_snat_ip) {
4445 ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 120,
4446 ds_cstr(&undnat_match),
4447 "flags.force_snat_for_lb = 1; ct_dnat;");
4448 } else {
4449 ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 120,
4450 ds_cstr(&undnat_match), "ct_dnat;");
4451 }
4452
4453 ds_destroy(&undnat_match);
65d8810c
GS
4454}
4455
9975d7be
BP
4456static void
4457build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
4458 struct hmap *lflows)
4459{
4460 /* This flow table structure is documented in ovn-northd(8), so please
4461 * update ovn-northd.8.xml if you change anything. */
4462
09b39248
JP
4463 struct ds match = DS_EMPTY_INITIALIZER;
4464 struct ds actions = DS_EMPTY_INITIALIZER;
4465
9975d7be
BP
4466 /* Logical router ingress table 0: Admission control framework. */
4467 struct ovn_datapath *od;
4468 HMAP_FOR_EACH (od, key_node, datapaths) {
4469 if (!od->nbr) {
4470 continue;
4471 }
4472
4473 /* Logical VLANs not supported.
4474 * Broadcast/multicast source address is invalid. */
4475 ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100,
4476 "vlan.present || eth.src[40]", "drop;");
4477 }
4478
4479 /* Logical router ingress table 0: match (priority 50). */
4480 struct ovn_port *op;
4481 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 4482 if (!op->nbrp) {
9975d7be
BP
4483 continue;
4484 }
4485
0ee00741 4486 if (!lrport_is_enabled(op->nbrp)) {
9975d7be
BP
4487 /* Drop packets from disabled logical ports (since logical flow
4488 * tables are default-drop). */
4489 continue;
4490 }
4491
41a15b71
MS
4492 if (op->derived) {
4493 /* No ingress packets should be received on a chassisredirect
4494 * port. */
4495 continue;
4496 }
4497
4498 ds_clear(&match);
4499 ds_put_format(&match, "eth.mcast && inport == %s", op->json_key);
4500 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
4501 ds_cstr(&match), "next;");
4502
09b39248 4503 ds_clear(&match);
41a15b71 4504 ds_put_format(&match, "eth.dst == %s && inport == %s",
4685e523 4505 op->lrp_networks.ea_s, op->json_key);
41a15b71
MS
4506 if (op->od->l3dgw_port && op == op->od->l3dgw_port
4507 && op->od->l3redirect_port) {
4508 /* Traffic with eth.dst = l3dgw_port->lrp_networks.ea_s
4509 * should only be received on the "redirect-chassis". */
4510 ds_put_format(&match, " && is_chassis_resident(%s)",
4511 op->od->l3redirect_port->json_key);
4512 }
9975d7be 4513 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
09b39248 4514 ds_cstr(&match), "next;");
9975d7be
BP
4515 }
4516
4517 /* Logical router ingress table 1: IP Input. */
78aab811 4518 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
4519 if (!od->nbr) {
4520 continue;
4521 }
4522
4523 /* L3 admission control: drop multicast and broadcast source, localhost
4524 * source or destination, and zero network source or destination
4525 * (priority 100). */
4526 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100,
4527 "ip4.mcast || "
4528 "ip4.src == 255.255.255.255 || "
4529 "ip4.src == 127.0.0.0/8 || "
4530 "ip4.dst == 127.0.0.0/8 || "
4531 "ip4.src == 0.0.0.0/8 || "
4532 "ip4.dst == 0.0.0.0/8",
4533 "drop;");
4534
0bac7164
BP
4535 /* ARP reply handling. Use ARP replies to populate the logical
4536 * router's ARP table. */
4537 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "arp.op == 2",
4538 "put_arp(inport, arp.spa, arp.sha);");
4539
9975d7be
BP
4540 /* Drop Ethernet local broadcast. By definition this traffic should
4541 * not be forwarded.*/
4542 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
4543 "eth.bcast", "drop;");
4544
9975d7be
BP
4545 /* TTL discard.
4546 *
4547 * XXX Need to send ICMP time exceeded if !ip.later_frag. */
09b39248
JP
4548 ds_clear(&match);
4549 ds_put_cstr(&match, "ip4 && ip.ttl == {0, 1}");
4550 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30,
4551 ds_cstr(&match), "drop;");
9975d7be 4552
c34a87b6
JP
4553 /* ND advertisement handling. Use advertisements to populate
4554 * the logical router's ARP/ND table. */
4555 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "nd_na",
4556 "put_nd(inport, nd.target, nd.tll);");
4557
4558 /* Lean from neighbor solicitations that were not directed at
4559 * us. (A priority-90 flow will respond to requests to us and
4560 * learn the sender's mac address. */
4561 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 80, "nd_ns",
4562 "put_nd(inport, ip6.src, nd.sll);");
4563
9975d7be
BP
4564 /* Pass other traffic not already handled to the next table for
4565 * routing. */
4566 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;");
78aab811
JP
4567 }
4568
6fdb7cd6 4569 /* Logical router ingress table 1: IP Input for IPv4. */
9975d7be 4570 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 4571 if (!op->nbrp) {
9975d7be
BP
4572 continue;
4573 }
4574
41a15b71
MS
4575 if (op->derived) {
4576 /* No ingress packets are accepted on a chassisredirect
4577 * port, so no need to program flows for that port. */
4578 continue;
4579 }
9975d7be 4580
6fdb7cd6
JP
4581 if (op->lrp_networks.n_ipv4_addrs) {
4582 /* L3 admission control: drop packets that originate from an
4583 * IPv4 address owned by the router or a broadcast address
4584 * known to the router (priority 100). */
4585 ds_clear(&match);
4586 ds_put_cstr(&match, "ip4.src == ");
4587 op_put_v4_networks(&match, op, true);
06a26dd2 4588 ds_put_cstr(&match, " && "REGBIT_EGRESS_LOOPBACK" == 0");
6fdb7cd6
JP
4589 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
4590 ds_cstr(&match), "drop;");
4591
4592 /* ICMP echo reply. These flows reply to ICMP echo requests
4593 * received for the router's IP address. Since packets only
4594 * get here as part of the logical router datapath, the inport
4595 * (i.e. the incoming locally attached net) does not matter.
4596 * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */
4597 ds_clear(&match);
4598 ds_put_cstr(&match, "ip4.dst == ");
4599 op_put_v4_networks(&match, op, false);
4600 ds_put_cstr(&match, " && icmp4.type == 8 && icmp4.code == 0");
4601
4602 ds_clear(&actions);
4603 ds_put_format(&actions,
4604 "ip4.dst <-> ip4.src; "
4605 "ip.ttl = 255; "
4606 "icmp4.type = 0; "
bf143492 4607 "flags.loopback = 1; "
6fdb7cd6
JP
4608 "next; ");
4609 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
4610 ds_cstr(&match), ds_cstr(&actions));
4611 }
dd7652e6 4612
9975d7be
BP
4613 /* ARP reply. These flows reply to ARP requests for the router's own
4614 * IP address. */
4685e523
JP
4615 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
4616 ds_clear(&match);
4617 ds_put_format(&match,
4618 "inport == %s && arp.tpa == %s && arp.op == 1",
4619 op->json_key, op->lrp_networks.ipv4_addrs[i].addr_s);
41a15b71
MS
4620 if (op->od->l3dgw_port && op == op->od->l3dgw_port
4621 && op->od->l3redirect_port) {
4622 /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
4623 * should only be sent from the "redirect-chassis", so that
4624 * upstream MAC learning points to the "redirect-chassis".
4625 * Also need to avoid generation of multiple ARP responses
4626 * from different chassis. */
4627 ds_put_format(&match, " && is_chassis_resident(%s)",
4628 op->od->l3redirect_port->json_key);
4629 }
4685e523
JP
4630
4631 ds_clear(&actions);
4632 ds_put_format(&actions,
4633 "eth.dst = eth.src; "
4634 "eth.src = %s; "
4635 "arp.op = 2; /* ARP reply */ "
4636 "arp.tha = arp.sha; "
4637 "arp.sha = %s; "
4638 "arp.tpa = arp.spa; "
4639 "arp.spa = %s; "
4640 "outport = %s; "
bf143492 4641 "flags.loopback = 1; "
4685e523
JP
4642 "output;",
4643 op->lrp_networks.ea_s,
4644 op->lrp_networks.ea_s,
4645 op->lrp_networks.ipv4_addrs[i].addr_s,
4646 op->json_key);
4647 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
4648 ds_cstr(&match), ds_cstr(&actions));
4649 }
9975d7be 4650
cc4583aa
GS
4651 /* A set to hold all load-balancer vips that need ARP responses. */
4652 struct sset all_ips = SSET_INITIALIZER(&all_ips);
485d373b
MM
4653 int addr_family;
4654 get_router_load_balancer_ips(op->od, &all_ips, &addr_family);
cc4583aa
GS
4655
4656 const char *ip_address;
4657 SSET_FOR_EACH(ip_address, &all_ips) {
cc4583aa 4658 ds_clear(&match);
485d373b
MM
4659 if (addr_family == AF_INET) {
4660 ds_put_format(&match,
4661 "inport == %s && arp.tpa == %s && arp.op == 1",
4662 op->json_key, ip_address);
4663 } else {
4664 ds_put_format(&match,
4665 "inport == %s && nd_ns && nd.target == %s",
4666 op->json_key, ip_address);
4667 }
cc4583aa
GS
4668
4669 ds_clear(&actions);
485d373b
MM
4670 if (addr_family == AF_INET) {
4671 ds_put_format(&actions,
cc4583aa
GS
4672 "eth.dst = eth.src; "
4673 "eth.src = %s; "
4674 "arp.op = 2; /* ARP reply */ "
4675 "arp.tha = arp.sha; "
4676 "arp.sha = %s; "
4677 "arp.tpa = arp.spa; "
485d373b 4678 "arp.spa = %s; "
cc4583aa
GS
4679 "outport = %s; "
4680 "flags.loopback = 1; "
4681 "output;",
4682 op->lrp_networks.ea_s,
4683 op->lrp_networks.ea_s,
485d373b 4684 ip_address,
cc4583aa 4685 op->json_key);
485d373b
MM
4686 } else {
4687 ds_put_format(&actions,
4688 "nd_na { "
4689 "eth.src = %s; "
4690 "ip6.src = %s; "
4691 "nd.target = %s; "
4692 "nd.tll = %s; "
4693 "outport = inport; "
4694 "flags.loopback = 1; "
4695 "output; "
4696 "};",
4697 op->lrp_networks.ea_s,
4698 ip_address,
4699 ip_address,
4700 op->lrp_networks.ea_s);
4701 }
cc4583aa
GS
4702 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
4703 ds_cstr(&match), ds_cstr(&actions));
4704 }
4705
4706 sset_destroy(&all_ips);
4707
65d8810c
GS
4708 /* A gateway router can have 2 SNAT IP addresses to force DNATed and
4709 * LBed traffic respectively to be SNATed. In addition, there can be
4710 * a number of SNAT rules in the NAT table. */
4711 ovs_be32 *snat_ips = xmalloc(sizeof *snat_ips *
4712 (op->od->nbr->n_nat + 2));
dde5ea7b 4713 size_t n_snat_ips = 0;
65d8810c
GS
4714
4715 ovs_be32 snat_ip;
4716 const char *dnat_force_snat_ip = get_force_snat_ip(op->od, "dnat",
4717 &snat_ip);
4718 if (dnat_force_snat_ip) {
4719 snat_ips[n_snat_ips++] = snat_ip;
4720 }
4721
4722 const char *lb_force_snat_ip = get_force_snat_ip(op->od, "lb",
4723 &snat_ip);
4724 if (lb_force_snat_ip) {
4725 snat_ips[n_snat_ips++] = snat_ip;
4726 }
4727
de297547
GS
4728 for (int i = 0; i < op->od->nbr->n_nat; i++) {
4729 const struct nbrec_nat *nat;
4730
4731 nat = op->od->nbr->nat[i];
4732
de297547
GS
4733 ovs_be32 ip;
4734 if (!ip_parse(nat->external_ip, &ip) || !ip) {
4735 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
dde5ea7b 4736 VLOG_WARN_RL(&rl, "bad ip address %s in nat configuration "
de297547
GS
4737 "for router %s", nat->external_ip, op->key);
4738 continue;
4739 }
4740
dde5ea7b
GS
4741 if (!strcmp(nat->type, "snat")) {
4742 snat_ips[n_snat_ips++] = ip;
4743 continue;
4744 }
4745
4746 /* ARP handling for external IP addresses.
4747 *
4748 * DNAT IP addresses are external IP addresses that need ARP
4749 * handling. */
09b39248
JP
4750 ds_clear(&match);
4751 ds_put_format(&match,
4752 "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1",
4753 op->json_key, IP_ARGS(ip));
4685e523 4754
09b39248
JP
4755 ds_clear(&actions);
4756 ds_put_format(&actions,
de297547 4757 "eth.dst = eth.src; "
de297547 4758 "arp.op = 2; /* ARP reply */ "
06a26dd2
MS
4759 "arp.tha = arp.sha; ");
4760
4761 if (op->od->l3dgw_port && op == op->od->l3dgw_port) {
4762 struct eth_addr mac;
4763 if (nat->external_mac &&
4764 eth_addr_from_string(nat->external_mac, &mac)
4765 && nat->logical_port) {
4766 /* distributed NAT case, use nat->external_mac */
4767 ds_put_format(&actions,
4768 "eth.src = "ETH_ADDR_FMT"; "
4769 "arp.sha = "ETH_ADDR_FMT"; ",
4770 ETH_ADDR_ARGS(mac),
4771 ETH_ADDR_ARGS(mac));
4772 /* Traffic with eth.src = nat->external_mac should only be
4773 * sent from the chassis where nat->logical_port is
4774 * resident, so that upstream MAC learning points to the
4775 * correct chassis. Also need to avoid generation of
4776 * multiple ARP responses from different chassis. */
4777 ds_put_format(&match, " && is_chassis_resident(\"%s\")",
4778 nat->logical_port);
4779 } else {
4780 ds_put_format(&actions,
4781 "eth.src = %s; "
4782 "arp.sha = %s; ",
4783 op->lrp_networks.ea_s,
4784 op->lrp_networks.ea_s);
4785 /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
4786 * should only be sent from the "redirect-chassis", so that
4787 * upstream MAC learning points to the "redirect-chassis".
4788 * Also need to avoid generation of multiple ARP responses
4789 * from different chassis. */
4790 if (op->od->l3redirect_port) {
4791 ds_put_format(&match, " && is_chassis_resident(%s)",
4792 op->od->l3redirect_port->json_key);
4793 }
4794 }
4795 } else {
4796 ds_put_format(&actions,
4797 "eth.src = %s; "
4798 "arp.sha = %s; ",
4799 op->lrp_networks.ea_s,
4800 op->lrp_networks.ea_s);
4801 }
4802 ds_put_format(&actions,
de297547
GS
4803 "arp.tpa = arp.spa; "
4804 "arp.spa = "IP_FMT"; "
4805 "outport = %s; "
bf143492 4806 "flags.loopback = 1; "
de297547 4807 "output;",
de297547
GS
4808 IP_ARGS(ip),
4809 op->json_key);
4810 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
09b39248 4811 ds_cstr(&match), ds_cstr(&actions));
de297547
GS
4812 }
4813
4685e523
JP
4814 ds_clear(&match);
4815 ds_put_cstr(&match, "ip4.dst == {");
4816 bool has_drop_ips = false;
4817 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
49da9ec0 4818 bool snat_ip_is_router_ip = false;
dde5ea7b
GS
4819 for (int j = 0; j < n_snat_ips; j++) {
4820 /* Packets to SNAT IPs should not be dropped. */
4821 if (op->lrp_networks.ipv4_addrs[i].addr == snat_ips[j]) {
49da9ec0
CSV
4822 snat_ip_is_router_ip = true;
4823 break;
4685e523 4824 }
4ef48e9d 4825 }
49da9ec0
CSV
4826 if (snat_ip_is_router_ip) {
4827 continue;
4828 }
4685e523
JP
4829 ds_put_format(&match, "%s, ",
4830 op->lrp_networks.ipv4_addrs[i].addr_s);
4831 has_drop_ips = true;
4ef48e9d 4832 }
4685e523
JP
4833 ds_chomp(&match, ' ');
4834 ds_chomp(&match, ',');
4835 ds_put_cstr(&match, "}");
4ef48e9d 4836
4685e523
JP
4837 if (has_drop_ips) {
4838 /* Drop IP traffic to this router. */
09b39248
JP
4839 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
4840 ds_cstr(&match), "drop;");
4ef48e9d 4841 }
4685e523 4842
dde5ea7b 4843 free(snat_ips);
9975d7be
BP
4844 }
4845
6fdb7cd6
JP
4846 /* Logical router ingress table 1: IP Input for IPv6. */
4847 HMAP_FOR_EACH (op, key_node, ports) {
4848 if (!op->nbrp) {
4849 continue;
4850 }
4851
41a15b71
MS
4852 if (op->derived) {
4853 /* No ingress packets are accepted on a chassisredirect
4854 * port, so no need to program flows for that port. */
4855 continue;
4856 }
4857
6fdb7cd6
JP
4858 if (op->lrp_networks.n_ipv6_addrs) {
4859 /* L3 admission control: drop packets that originate from an
4860 * IPv6 address owned by the router (priority 100). */
4861 ds_clear(&match);
4862 ds_put_cstr(&match, "ip6.src == ");
4863 op_put_v6_networks(&match, op);
4864 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
4865 ds_cstr(&match), "drop;");
4866
4867 /* ICMPv6 echo reply. These flows reply to echo requests
4868 * received for the router's IP address. */
4869 ds_clear(&match);
4870 ds_put_cstr(&match, "ip6.dst == ");
4871 op_put_v6_networks(&match, op);
4872 ds_put_cstr(&match, " && icmp6.type == 128 && icmp6.code == 0");
4873
4874 ds_clear(&actions);
4875 ds_put_cstr(&actions,
4876 "ip6.dst <-> ip6.src; "
4877 "ip.ttl = 255; "
4878 "icmp6.type = 129; "
bf143492 4879 "flags.loopback = 1; "
6fdb7cd6
JP
4880 "next; ");
4881 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
4882 ds_cstr(&match), ds_cstr(&actions));
4883
4884 /* Drop IPv6 traffic to this router. */
4885 ds_clear(&match);
4886 ds_put_cstr(&match, "ip6.dst == ");
4887 op_put_v6_networks(&match, op);
4888 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
4889 ds_cstr(&match), "drop;");
4890 }
4891
4892 /* ND reply. These flows reply to ND solicitations for the
4893 * router's own IP address. */
4894 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
4895 ds_clear(&match);
4896 ds_put_format(&match,
4897 "inport == %s && nd_ns && ip6.dst == {%s, %s} "
4898 "&& nd.target == %s",
4899 op->json_key,
4900 op->lrp_networks.ipv6_addrs[i].addr_s,
4901 op->lrp_networks.ipv6_addrs[i].sn_addr_s,
4902 op->lrp_networks.ipv6_addrs[i].addr_s);
41a15b71
MS
4903 if (op->od->l3dgw_port && op == op->od->l3dgw_port
4904 && op->od->l3redirect_port) {
4905 /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
4906 * should only be sent from the "redirect-chassis", so that
4907 * upstream MAC learning points to the "redirect-chassis".
4908 * Also need to avoid generation of multiple ND replies
4909 * from different chassis. */
4910 ds_put_format(&match, " && is_chassis_resident(%s)",
4911 op->od->l3redirect_port->json_key);
4912 }
6fdb7cd6
JP
4913
4914 ds_clear(&actions);
4915 ds_put_format(&actions,
c34a87b6 4916 "put_nd(inport, ip6.src, nd.sll); "
6fdb7cd6
JP
4917 "nd_na { "
4918 "eth.src = %s; "
4919 "ip6.src = %s; "
4920 "nd.target = %s; "
4921 "nd.tll = %s; "
4922 "outport = inport; "
bf143492 4923 "flags.loopback = 1; "
6fdb7cd6
JP
4924 "output; "
4925 "};",
4926 op->lrp_networks.ea_s,
4927 op->lrp_networks.ipv6_addrs[i].addr_s,
4928 op->lrp_networks.ipv6_addrs[i].addr_s,
4929 op->lrp_networks.ea_s);
4930 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
4931 ds_cstr(&match), ds_cstr(&actions));
4932 }
4933 }
4934
06a26dd2 4935 /* NAT, Defrag and load balancing. */
de297547
GS
4936 HMAP_FOR_EACH (od, key_node, datapaths) {
4937 if (!od->nbr) {
4938 continue;
4939 }
4940
4941 /* Packets are allowed by default. */
cc4583aa 4942 ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG, 0, "1", "next;");
de297547
GS
4943 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;");
4944 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;");
4945 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;");
06a26dd2
MS
4946 ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 0, "1", "next;");
4947 ovn_lflow_add(lflows, od, S_ROUTER_OUT_EGR_LOOP, 0, "1", "next;");
de297547 4948
06a26dd2
MS
4949 /* NAT rules are only valid on Gateway routers and routers with
4950 * l3dgw_port (router has a port with "redirect-chassis"
4951 * specified). */
4952 if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) {
de297547
GS
4953 continue;
4954 }
4955
65d8810c
GS
4956 ovs_be32 snat_ip;
4957 const char *dnat_force_snat_ip = get_force_snat_ip(od, "dnat",
4958 &snat_ip);
4959 const char *lb_force_snat_ip = get_force_snat_ip(od, "lb",
4960 &snat_ip);
4961
de297547
GS
4962 for (int i = 0; i < od->nbr->n_nat; i++) {
4963 const struct nbrec_nat *nat;
4964
4965 nat = od->nbr->nat[i];
4966
4967 ovs_be32 ip, mask;
4968
4969 char *error = ip_parse_masked(nat->external_ip, &ip, &mask);
4970 if (error || mask != OVS_BE32_MAX) {
4971 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4972 VLOG_WARN_RL(&rl, "bad external ip %s for nat",
4973 nat->external_ip);
4974 free(error);
4975 continue;
4976 }
4977
4978 /* Check the validity of nat->logical_ip. 'logical_ip' can
4979 * be a subnet when the type is "snat". */
4980 error = ip_parse_masked(nat->logical_ip, &ip, &mask);
4981 if (!strcmp(nat->type, "snat")) {
4982 if (error) {
4983 static struct vlog_rate_limit rl =
4984 VLOG_RATE_LIMIT_INIT(5, 1);
4985 VLOG_WARN_RL(&rl, "bad ip network or ip %s for snat "
4986 "in router "UUID_FMT"",
4987 nat->logical_ip, UUID_ARGS(&od->key));
4988 free(error);
4989 continue;
4990 }
4991 } else {
4992 if (error || mask != OVS_BE32_MAX) {
4993 static struct vlog_rate_limit rl =
4994 VLOG_RATE_LIMIT_INIT(5, 1);
4995 VLOG_WARN_RL(&rl, "bad ip %s for dnat in router "
4996 ""UUID_FMT"", nat->logical_ip, UUID_ARGS(&od->key));
4997 free(error);
4998 continue;
4999 }
5000 }
5001
06a26dd2
MS
5002 /* For distributed router NAT, determine whether this NAT rule
5003 * satisfies the conditions for distributed NAT processing. */
5004 bool distributed = false;
5005 struct eth_addr mac;
5006 if (od->l3dgw_port && !strcmp(nat->type, "dnat_and_snat") &&
5007 nat->logical_port && nat->external_mac) {
5008 if (eth_addr_from_string(nat->external_mac, &mac)) {
5009 distributed = true;
5010 } else {
5011 static struct vlog_rate_limit rl =
5012 VLOG_RATE_LIMIT_INIT(5, 1);
5013 VLOG_WARN_RL(&rl, "bad mac %s for dnat in router "
5014 ""UUID_FMT"", nat->external_mac, UUID_ARGS(&od->key));
5015 continue;
5016 }
5017 }
5018
de297547
GS
5019 /* Ingress UNSNAT table: It is for already established connections'
5020 * reverse traffic. i.e., SNAT has already been done in egress
5021 * pipeline and now the packet has entered the ingress pipeline as
5022 * part of a reply. We undo the SNAT here.
5023 *
5024 * Undoing SNAT has to happen before DNAT processing. This is
5025 * because when the packet was DNATed in ingress pipeline, it did
5026 * not know about the possibility of eventual additional SNAT in
5027 * egress pipeline. */
5028 if (!strcmp(nat->type, "snat")
5029 || !strcmp(nat->type, "dnat_and_snat")) {
06a26dd2
MS
5030 if (!od->l3dgw_port) {
5031 /* Gateway router. */
5032 ds_clear(&match);
5033 ds_put_format(&match, "ip && ip4.dst == %s",
5034 nat->external_ip);
5035 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 90,
5036 ds_cstr(&match), "ct_snat; next;");
5037 } else {
5038 /* Distributed router. */
5039
5040 /* Traffic received on l3dgw_port is subject to NAT. */
5041 ds_clear(&match);
5042 ds_put_format(&match, "ip && ip4.dst == %s"
5043 " && inport == %s",
5044 nat->external_ip,
5045 od->l3dgw_port->json_key);
5046 if (!distributed && od->l3redirect_port) {
5047 /* Flows for NAT rules that are centralized are only
5048 * programmed on the "redirect-chassis". */
5049 ds_put_format(&match, " && is_chassis_resident(%s)",
5050 od->l3redirect_port->json_key);
5051 }
5052 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 100,
5053 ds_cstr(&match), "ct_snat;");
5054
5055 /* Traffic received on other router ports must be
5056 * redirected to the central instance of the l3dgw_port
5057 * for NAT processing. */
5058 ds_clear(&match);
5059 ds_put_format(&match, "ip && ip4.dst == %s",
5060 nat->external_ip);
5061 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 50,
5062 ds_cstr(&match),
5063 REGBIT_NAT_REDIRECT" = 1; next;");
5064 }
de297547
GS
5065 }
5066
5067 /* Ingress DNAT table: Packets enter the pipeline with destination
5068 * IP address that needs to be DNATted from a external IP address
5069 * to a logical IP address. */
5070 if (!strcmp(nat->type, "dnat")
5071 || !strcmp(nat->type, "dnat_and_snat")) {
06a26dd2
MS
5072 if (!od->l3dgw_port) {
5073 /* Gateway router. */
5074 /* Packet when it goes from the initiator to destination.
5075 * We need to set flags.loopback because the router can
5076 * send the packet back through the same interface. */
5077 ds_clear(&match);
5078 ds_put_format(&match, "ip && ip4.dst == %s",
5079 nat->external_ip);
5080 ds_clear(&actions);
5081 if (dnat_force_snat_ip) {
5082 /* Indicate to the future tables that a DNAT has taken
5083 * place and a force SNAT needs to be done in the
5084 * Egress SNAT table. */
5085 ds_put_format(&actions,
5086 "flags.force_snat_for_dnat = 1; ");
5087 }
5088 ds_put_format(&actions, "flags.loopback = 1; ct_dnat(%s);",
5089 nat->logical_ip);
5090 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100,
5091 ds_cstr(&match), ds_cstr(&actions));
5092 } else {
5093 /* Distributed router. */
5094
5095 /* Traffic received on l3dgw_port is subject to NAT. */
5096 ds_clear(&match);
5097 ds_put_format(&match, "ip && ip4.dst == %s"
5098 " && inport == %s",
5099 nat->external_ip,
5100 od->l3dgw_port->json_key);
5101 if (!distributed && od->l3redirect_port) {
5102 /* Flows for NAT rules that are centralized are only
5103 * programmed on the "redirect-chassis". */
5104 ds_put_format(&match, " && is_chassis_resident(%s)",
5105 od->l3redirect_port->json_key);
5106 }
5107 ds_clear(&actions);
5108 ds_put_format(&actions, "ct_dnat(%s);",
5109 nat->logical_ip);
5110 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100,
5111 ds_cstr(&match), ds_cstr(&actions));
5112
5113 /* Traffic received on other router ports must be
5114 * redirected to the central instance of the l3dgw_port
5115 * for NAT processing. */
5116 ds_clear(&match);
5117 ds_put_format(&match, "ip && ip4.dst == %s",
5118 nat->external_ip);
5119 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50,
5120 ds_cstr(&match),
5121 REGBIT_NAT_REDIRECT" = 1; next;");
5122 }
5123 }
5124
5125 /* Egress UNDNAT table: It is for already established connections'
5126 * reverse traffic. i.e., DNAT has already been done in ingress
5127 * pipeline and now the packet has entered the egress pipeline as
5128 * part of a reply. We undo the DNAT here.
5129 *
5130 * Note that this only applies for NAT on a distributed router.
5131 * Undo DNAT on a gateway router is done in the ingress DNAT
5132 * pipeline stage. */
5133 if (od->l3dgw_port && (!strcmp(nat->type, "dnat")
5134 || !strcmp(nat->type, "dnat_and_snat"))) {
09b39248 5135 ds_clear(&match);
06a26dd2
MS
5136 ds_put_format(&match, "ip && ip4.src == %s"
5137 " && outport == %s",
5138 nat->logical_ip,
5139 od->l3dgw_port->json_key);
5140 if (!distributed && od->l3redirect_port) {
5141 /* Flows for NAT rules that are centralized are only
5142 * programmed on the "redirect-chassis". */
5143 ds_put_format(&match, " && is_chassis_resident(%s)",
5144 od->l3redirect_port->json_key);
5145 }
09b39248 5146 ds_clear(&actions);
06a26dd2
MS
5147 if (distributed) {
5148 ds_put_format(&actions, "eth.src = "ETH_ADDR_FMT"; ",
5149 ETH_ADDR_ARGS(mac));
65d8810c 5150 }
06a26dd2
MS
5151 ds_put_format(&actions, "ct_dnat;");
5152 ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 100,
09b39248 5153 ds_cstr(&match), ds_cstr(&actions));
de297547
GS
5154 }
5155
5156 /* Egress SNAT table: Packets enter the egress pipeline with
5157 * source ip address that needs to be SNATted to a external ip
5158 * address. */
5159 if (!strcmp(nat->type, "snat")
5160 || !strcmp(nat->type, "dnat_and_snat")) {
06a26dd2
MS
5161 if (!od->l3dgw_port) {
5162 /* Gateway router. */
5163 ds_clear(&match);
5164 ds_put_format(&match, "ip && ip4.src == %s",
5165 nat->logical_ip);
5166 ds_clear(&actions);
5167 ds_put_format(&actions, "ct_snat(%s);", nat->external_ip);
5168
5169 /* The priority here is calculated such that the
5170 * nat->logical_ip with the longest mask gets a higher
5171 * priority. */
5172 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT,
5173 count_1bits(ntohl(mask)) + 1,
5174 ds_cstr(&match), ds_cstr(&actions));
5175 } else {
5176 /* Distributed router. */
5177 ds_clear(&match);
5178 ds_put_format(&match, "ip && ip4.src == %s"
5179 " && outport == %s",
5180 nat->logical_ip,
5181 od->l3dgw_port->json_key);
5182 if (!distributed && od->l3redirect_port) {
5183 /* Flows for NAT rules that are centralized are only
5184 * programmed on the "redirect-chassis". */
5185 ds_put_format(&match, " && is_chassis_resident(%s)",
5186 od->l3redirect_port->json_key);
5187 }
5188 ds_clear(&actions);
5189 if (distributed) {
5190 ds_put_format(&actions, "eth.src = "ETH_ADDR_FMT"; ",
5191 ETH_ADDR_ARGS(mac));
5192 }
5193 ds_put_format(&actions, "ct_snat(%s);", nat->external_ip);
5194
5195 /* The priority here is calculated such that the
5196 * nat->logical_ip with the longest mask gets a higher
5197 * priority. */
5198 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT,
5199 count_1bits(ntohl(mask)) + 1,
5200 ds_cstr(&match), ds_cstr(&actions));
5201 }
5202 }
5203
5204 /* Logical router ingress table 0:
5205 * For NAT on a distributed router, add rules allowing
5206 * ingress traffic with eth.dst matching nat->external_mac
5207 * on the l3dgw_port instance where nat->logical_port is
5208 * resident. */
5209 if (distributed) {
09b39248 5210 ds_clear(&match);
06a26dd2
MS
5211 ds_put_format(&match,
5212 "eth.dst == "ETH_ADDR_FMT" && inport == %s"
5213 " && is_chassis_resident(\"%s\")",
5214 ETH_ADDR_ARGS(mac),
5215 od->l3dgw_port->json_key,
5216 nat->logical_port);
5217 ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 50,
5218 ds_cstr(&match), "next;");
5219 }
5220
5221 /* Ingress Gateway Redirect Table: For NAT on a distributed
5222 * router, add flows that are specific to a NAT rule. These
5223 * flows indicate the presence of an applicable NAT rule that
5224 * can be applied in a distributed manner. */
5225 if (distributed) {
5226 ds_clear(&match);
5227 ds_put_format(&match, "ip4.src == %s && outport == %s",
5228 nat->logical_ip,
5229 od->l3dgw_port->json_key);
5230 ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 100,
5231 ds_cstr(&match), "next;");
5232 }
de297547 5233
06a26dd2
MS
5234 /* Egress Loopback table: For NAT on a distributed router.
5235 * If packets in the egress pipeline on the distributed
5236 * gateway port have ip.dst matching a NAT external IP, then
5237 * loop a clone of the packet back to the beginning of the
5238 * ingress pipeline with inport = outport. */
5239 if (od->l3dgw_port) {
5240 /* Distributed router. */
5241 ds_clear(&match);
5242 ds_put_format(&match, "ip4.dst == %s && outport == %s",
5243 nat->external_ip,
5244 od->l3dgw_port->json_key);
5245 ds_clear(&actions);
5246 ds_put_format(&actions,
5247 "clone { ct_clear; "
5248 "inport = outport; outport = \"\"; "
5249 "flags = 0; flags.loopback = 1; ");
71f21279
BP
5250 for (int j = 0; j < MFF_N_LOG_REGS; j++) {
5251 ds_put_format(&actions, "reg%d = 0; ", j);
06a26dd2
MS
5252 }
5253 ds_put_format(&actions, REGBIT_EGRESS_LOOPBACK" = 1; "
5254 "next(pipeline=ingress, table=0); };");
5255 ovn_lflow_add(lflows, od, S_ROUTER_OUT_EGR_LOOP, 100,
09b39248 5256 ds_cstr(&match), ds_cstr(&actions));
de297547
GS
5257 }
5258 }
5259
65d8810c 5260 /* Handle force SNAT options set in the gateway router. */
06a26dd2 5261 if (dnat_force_snat_ip && !od->l3dgw_port) {
65d8810c
GS
5262 /* If a packet with destination IP address as that of the
5263 * gateway router (as set in options:dnat_force_snat_ip) is seen,
5264 * UNSNAT it. */
5265 ds_clear(&match);
5266 ds_put_format(&match, "ip && ip4.dst == %s", dnat_force_snat_ip);
5267 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 110,
5268 ds_cstr(&match), "ct_snat; next;");
5269
5270 /* Higher priority rules to force SNAT with the IP addresses
5271 * configured in the Gateway router. This only takes effect
5272 * when the packet has already been DNATed once. */
5273 ds_clear(&match);
5274 ds_put_format(&match, "flags.force_snat_for_dnat == 1 && ip");
5275 ds_clear(&actions);
5276 ds_put_format(&actions, "ct_snat(%s);", dnat_force_snat_ip);
5277 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 100,
5278 ds_cstr(&match), ds_cstr(&actions));
5279 }
06a26dd2 5280 if (lb_force_snat_ip && !od->l3dgw_port) {
65d8810c
GS
5281 /* If a packet with destination IP address as that of the
5282 * gateway router (as set in options:lb_force_snat_ip) is seen,
5283 * UNSNAT it. */
5284 ds_clear(&match);
5285 ds_put_format(&match, "ip && ip4.dst == %s", lb_force_snat_ip);
5286 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 100,
5287 ds_cstr(&match), "ct_snat; next;");
5288
5289 /* Load balanced traffic will have flags.force_snat_for_lb set.
5290 * Force SNAT it. */
5291 ds_clear(&match);
5292 ds_put_format(&match, "flags.force_snat_for_lb == 1 && ip");
5293 ds_clear(&actions);
5294 ds_put_format(&actions, "ct_snat(%s);", lb_force_snat_ip);
5295 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 100,
5296 ds_cstr(&match), ds_cstr(&actions));
5297 }
5298
06a26dd2
MS
5299 if (!od->l3dgw_port) {
5300 /* For gateway router, re-circulate every packet through
5301 * the DNAT zone. This helps with two things.
5302 *
5303 * 1. Any packet that needs to be unDNATed in the reverse
5304 * direction gets unDNATed. Ideally this could be done in
5305 * the egress pipeline. But since the gateway router
5306 * does not have any feature that depends on the source
5307 * ip address being external IP address for IP routing,
5308 * we can do it here, saving a future re-circulation.
5309 *
5310 * 2. Any packet that was sent through SNAT zone in the
5311 * previous table automatically gets re-circulated to get
5312 * back the new destination IP address that is needed for
5313 * routing in the openflow pipeline. */
5314 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50,
5315 "ip", "flags.loopback = 1; ct_dnat;");
5316 } else {
5317 /* For NAT on a distributed router, add flows to Ingress
5318 * IP Routing table, Ingress ARP Resolution table, and
5319 * Ingress Gateway Redirect Table that are not specific to a
5320 * NAT rule. */
5321
5322 /* The highest priority IN_IP_ROUTING rule matches packets
5323 * with REGBIT_NAT_REDIRECT (set in DNAT or UNSNAT stages),
5324 * with action "ip.ttl--; next;". The IN_GW_REDIRECT table
5325 * will take care of setting the outport. */
5326 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 300,
5327 REGBIT_NAT_REDIRECT" == 1", "ip.ttl--; next;");
5328
5329 /* The highest priority IN_ARP_RESOLVE rule matches packets
5330 * with REGBIT_NAT_REDIRECT (set in DNAT or UNSNAT stages),
5331 * then sets eth.dst to the distributed gateway port's
5332 * ethernet address. */
5333 ds_clear(&actions);
5334 ds_put_format(&actions, "eth.dst = %s; next;",
5335 od->l3dgw_port->lrp_networks.ea_s);
5336 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 200,
5337 REGBIT_NAT_REDIRECT" == 1", ds_cstr(&actions));
5338
5339 /* The highest priority IN_GW_REDIRECT rule redirects packets
5340 * with REGBIT_NAT_REDIRECT (set in DNAT or UNSNAT stages) to
5341 * the central instance of the l3dgw_port for NAT processing. */
5342 ds_clear(&actions);
5343 ds_put_format(&actions, "outport = %s; next;",
5344 od->l3redirect_port->json_key);
5345 ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 200,
5346 REGBIT_NAT_REDIRECT" == 1", ds_cstr(&actions));
5347 }
5348
5349 /* Load balancing and packet defrag are only valid on
6f39e18d
NS
5350 * Gateway routers or router with gateway port. */
5351 if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) {
06a26dd2
MS
5352 continue;
5353 }
8697d426
MS
5354
5355 /* A set to hold all ips that need defragmentation and tracking. */
5356 struct sset all_ips = SSET_INITIALIZER(&all_ips);
5357
5358 for (int i = 0; i < od->nbr->n_load_balancer; i++) {
5359 struct nbrec_load_balancer *lb = od->nbr->load_balancer[i];
5360 struct smap *vips = &lb->vips;
5361 struct smap_node *node;
5362
5363 SMAP_FOR_EACH (node, vips) {
5364 uint16_t port = 0;
485d373b 5365 int addr_family;
8697d426
MS
5366
5367 /* node->key contains IP:port or just IP. */
5368 char *ip_address = NULL;
485d373b
MM
5369 ip_address_and_port_from_lb_key(node->key, &ip_address, &port,
5370 &addr_family);
8697d426
MS
5371 if (!ip_address) {
5372 continue;
5373 }
5374
5375 if (!sset_contains(&all_ips, ip_address)) {
5376 sset_add(&all_ips, ip_address);
485d373b
MM
5377 /* If there are any load balancing rules, we should send
5378 * the packet to conntrack for defragmentation and
5379 * tracking. This helps with two things.
5380 *
5381 * 1. With tracking, we can send only new connections to
5382 * pick a DNAT ip address from a group.
5383 * 2. If there are L4 ports in load balancing rules, we
5384 * need the defragmentation to match on L4 ports. */
5385 ds_clear(&match);
5386 if (addr_family == AF_INET) {
5387 ds_put_format(&match, "ip && ip4.dst == %s",
5388 ip_address);
5389 } else {
5390 ds_put_format(&match, "ip && ip6.dst == %s",
5391 ip_address);
5392 }
5393 ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG,
5394 100, ds_cstr(&match), "ct_next;");
8697d426
MS
5395 }
5396
5397 /* Higher priority rules are added for load-balancing in DNAT
5398 * table. For every match (on a VIP[:port]), we add two flows
5399 * via add_router_lb_flow(). One flow is for specific matching
5400 * on ct.new with an action of "ct_lb($targets);". The other
5401 * flow is for ct.est with an action of "ct_dnat;". */
5402 ds_clear(&actions);
5403 ds_put_format(&actions, "ct_lb(%s);", node->value);
5404
5405 ds_clear(&match);
485d373b
MM
5406 if (addr_family == AF_INET) {
5407 ds_put_format(&match, "ip && ip4.dst == %s",
5408 ip_address);
5409 } else {
5410 ds_put_format(&match, "ip && ip6.dst == %s",
5411 ip_address);
5412 }
8697d426
MS
5413 free(ip_address);
5414
6f39e18d
NS
5415 int prio = 110;
5416 bool is_udp = lb->protocol && !strcmp(lb->protocol, "udp") ?
5417 true : false;
8697d426 5418 if (port) {
6f39e18d 5419 if (is_udp) {
8697d426
MS
5420 ds_put_format(&match, " && udp && udp.dst == %d",
5421 port);
5422 } else {
5423 ds_put_format(&match, " && tcp && tcp.dst == %d",
5424 port);
5425 }
6f39e18d
NS
5426 prio = 120;
5427 }
5428
5429 if (od->l3redirect_port) {
5430 ds_put_format(&match, " && is_chassis_resident(%s)",
5431 od->l3redirect_port->json_key);
8697d426 5432 }
6f39e18d 5433 add_router_lb_flow(lflows, od, &match, &actions, prio,
485d373b
MM
5434 lb_force_snat_ip, node->value, is_udp,
5435 addr_family);
8697d426
MS
5436 }
5437 }
8697d426 5438 sset_destroy(&all_ips);
de297547
GS
5439 }
5440
4364646c
ZKL
5441 /* Logical router ingress table 5 and 6: IPv6 Router Adv (RA) options and
5442 * response. */
5443 HMAP_FOR_EACH (op, key_node, ports) {
5444 if (!op->nbrp || op->nbrp->peer || !op->peer) {
5445 continue;
5446 }
5447
5448 if (!op->lrp_networks.n_ipv6_addrs) {
5449 continue;
5450 }
5451
5452 const char *address_mode = smap_get(
5453 &op->nbrp->ipv6_ra_configs, "address_mode");
5454
5455 if (!address_mode) {
5456 continue;
5457 }
5458 if (strcmp(address_mode, "slaac") &&
5459 strcmp(address_mode, "dhcpv6_stateful") &&
5460 strcmp(address_mode, "dhcpv6_stateless")) {
5461 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
5462 VLOG_WARN_RL(&rl, "Invalid address mode [%s] defined",
5463 address_mode);
5464 continue;
5465 }
5466
5467 ds_clear(&match);
5468 ds_put_format(&match, "inport == %s && ip6.dst == ff02::2 && nd_rs",
5469 op->json_key);
5470 ds_clear(&actions);
5471
5472 const char *mtu_s = smap_get(
5473 &op->nbrp->ipv6_ra_configs, "mtu");
5474
5475 /* As per RFC 2460, 1280 is minimum IPv6 MTU. */
5476 uint32_t mtu = (mtu_s && atoi(mtu_s) >= 1280) ? atoi(mtu_s) : 0;
5477
5478 ds_put_format(&actions, REGBIT_ND_RA_OPTS_RESULT" = put_nd_ra_opts("
5479 "addr_mode = \"%s\", slla = %s",
5480 address_mode, op->lrp_networks.ea_s);
5481 if (mtu > 0) {
5482 ds_put_format(&actions, ", mtu = %u", mtu);
5483 }
5484
5485 bool add_rs_response_flow = false;
5486
5487 for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
5488 if (in6_is_lla(&op->lrp_networks.ipv6_addrs[i].network)) {
5489 continue;
5490 }
5491
5492 /* Add the prefix option if the address mode is slaac or
5493 * dhcpv6_stateless. */
5494 if (strcmp(address_mode, "dhcpv6_stateful")) {
5495 ds_put_format(&actions, ", prefix = %s/%u",
5496 op->lrp_networks.ipv6_addrs[i].network_s,
5497 op->lrp_networks.ipv6_addrs[i].plen);
5498 }
5499 add_rs_response_flow = true;
5500 }
5501
5502 if (add_rs_response_flow) {
5503 ds_put_cstr(&actions, "); next;");
5504 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ND_RA_OPTIONS, 50,
5505 ds_cstr(&match), ds_cstr(&actions));
5506 ds_clear(&actions);
5507 ds_clear(&match);
5508 ds_put_format(&match, "inport == %s && ip6.dst == ff02::2 && "
5509 "nd_ra && "REGBIT_ND_RA_OPTS_RESULT, op->json_key);
5510
5511 char ip6_str[INET6_ADDRSTRLEN + 1];
5512 struct in6_addr lla;
5513 in6_generate_lla(op->lrp_networks.ea, &lla);
5514 memset(ip6_str, 0, sizeof(ip6_str));
5515 ipv6_string_mapped(ip6_str, &lla);
5516 ds_put_format(&actions, "eth.dst = eth.src; eth.src = %s; "
5517 "ip6.dst = ip6.src; ip6.src = %s; "
5518 "outport = inport; flags.loopback = 1; "
5519 "output;",
5520 op->lrp_networks.ea_s, ip6_str);
5521 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ND_RA_RESPONSE, 50,
5522 ds_cstr(&match), ds_cstr(&actions));
5523 }
5524 }
5525
5526 /* Logical router ingress table 5, 6: RS responder, by default goto next.
5527 * (priority 0)*/
5528 HMAP_FOR_EACH (od, key_node, datapaths) {
5529 if (!od->nbr) {
5530 continue;
5531 }
5532
5533 ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_OPTIONS, 0, "1", "next;");
5534 ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_RESPONSE, 0, "1", "next;");
5535 }
5536
5537 /* Logical router ingress table 7: IP Routing.
9975d7be
BP
5538 *
5539 * A packet that arrives at this table is an IP packet that should be
6fdb7cd6
JP
5540 * routed to the address in 'ip[46].dst'. This table sets outport to
5541 * the correct output port, eth.src to the output port's MAC
5542 * address, and '[xx]reg0' to the next-hop IP address (leaving
5543 * 'ip[46].dst', the packet’s final destination, unchanged), and
5544 * advances to the next table for ARP/ND resolution. */
9975d7be 5545 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 5546 if (!op->nbrp) {
9975d7be
BP
5547 continue;
5548 }
5549
4685e523
JP
5550 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
5551 add_route(lflows, op, op->lrp_networks.ipv4_addrs[i].addr_s,
5552 op->lrp_networks.ipv4_addrs[i].network_s,
440a9f4b 5553 op->lrp_networks.ipv4_addrs[i].plen, NULL, NULL);
4685e523 5554 }
6fdb7cd6
JP
5555
5556 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
5557 add_route(lflows, op, op->lrp_networks.ipv6_addrs[i].addr_s,
5558 op->lrp_networks.ipv6_addrs[i].network_s,
440a9f4b 5559 op->lrp_networks.ipv6_addrs[i].plen, NULL, NULL);
6fdb7cd6 5560 }
9975d7be 5561 }
4685e523 5562
6fdb7cd6 5563 /* Convert the static routes to flows. */
9975d7be
BP
5564 HMAP_FOR_EACH (od, key_node, datapaths) {
5565 if (!od->nbr) {
5566 continue;
5567 }
5568
28dc3fe9
SR
5569 for (int i = 0; i < od->nbr->n_static_routes; i++) {
5570 const struct nbrec_logical_router_static_route *route;
5571
5572 route = od->nbr->static_routes[i];
5573 build_static_route_flow(lflows, od, ports, route);
5574 }
9975d7be 5575 }
6fdb7cd6 5576
9975d7be
BP
5577 /* XXX destination unreachable */
5578
4364646c 5579 /* Local router ingress table 8: ARP Resolution.
9975d7be
BP
5580 *
5581 * Any packet that reaches this table is an IP packet whose next-hop IP
5582 * address is in reg0. (ip4.dst is the final destination.) This table
5583 * resolves the IP address in reg0 into an output port in outport and an
5584 * Ethernet address in eth.dst. */
5585 HMAP_FOR_EACH (op, key_node, ports) {
7ebfcd3d
NS
5586 if (op->nbsp && !lsp_is_enabled(op->nbsp)) {
5587 continue;
5588 }
5589
0ee00741 5590 if (op->nbrp) {
6fdb7cd6
JP
5591 /* This is a logical router port. If next-hop IP address in
5592 * '[xx]reg0' matches IP address of this router port, then
5593 * the packet is intended to eventually be sent to this
5594 * logical port. Set the destination mac address using this
5595 * port's mac address.
509afdc3
GS
5596 *
5597 * The packet is still in peer's logical pipeline. So the match
5598 * should be on peer's outport. */
6fdb7cd6
JP
5599 if (op->peer && op->nbrp->peer) {
5600 if (op->lrp_networks.n_ipv4_addrs) {
5601 ds_clear(&match);
5602 ds_put_format(&match, "outport == %s && reg0 == ",
5603 op->peer->json_key);
5604 op_put_v4_networks(&match, op, false);
5605
5606 ds_clear(&actions);
5607 ds_put_format(&actions, "eth.dst = %s; next;",
5608 op->lrp_networks.ea_s);
5609 ovn_lflow_add(lflows, op->peer->od, S_ROUTER_IN_ARP_RESOLVE,
5610 100, ds_cstr(&match), ds_cstr(&actions));
5611 }
4685e523 5612
6fdb7cd6
JP
5613 if (op->lrp_networks.n_ipv6_addrs) {
5614 ds_clear(&match);
5615 ds_put_format(&match, "outport == %s && xxreg0 == ",
5616 op->peer->json_key);
5617 op_put_v6_networks(&match, op);
5618
5619 ds_clear(&actions);
5620 ds_put_format(&actions, "eth.dst = %s; next;",
5621 op->lrp_networks.ea_s);
5622 ovn_lflow_add(lflows, op->peer->od, S_ROUTER_IN_ARP_RESOLVE,
5623 100, ds_cstr(&match), ds_cstr(&actions));
5624 }
509afdc3 5625 }
0ee00741 5626 } else if (op->od->n_router_ports && strcmp(op->nbsp->type, "router")) {
75cf9d2b
GS
5627 /* This is a logical switch port that backs a VM or a container.
5628 * Extract its addresses. For each of the address, go through all
5629 * the router ports attached to the switch (to which this port
5630 * connects) and if the address in question is reachable from the
6fdb7cd6 5631 * router port, add an ARP/ND entry in that router's pipeline. */
75cf9d2b 5632
e93b43d6 5633 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
4685e523 5634 const char *ea_s = op->lsp_addrs[i].ea_s;
e93b43d6 5635 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
4685e523 5636 const char *ip_s = op->lsp_addrs[i].ipv4_addrs[j].addr_s;
e93b43d6 5637 for (size_t k = 0; k < op->od->n_router_ports; k++) {
80f408f4
JP
5638 /* Get the Logical_Router_Port that the
5639 * Logical_Switch_Port is connected to, as
5640 * 'peer'. */
86e98048 5641 const char *peer_name = smap_get(
0ee00741 5642 &op->od->router_ports[k]->nbsp->options,
86e98048
BP
5643 "router-port");
5644 if (!peer_name) {
5645 continue;
5646 }
5647
e93b43d6 5648 struct ovn_port *peer = ovn_port_find(ports, peer_name);
0ee00741 5649 if (!peer || !peer->nbrp) {
86e98048
BP
5650 continue;
5651 }
5652
4685e523 5653 if (!find_lrp_member_ip(peer, ip_s)) {
86e98048
BP
5654 continue;
5655 }
5656
09b39248 5657 ds_clear(&match);
e93b43d6 5658 ds_put_format(&match, "outport == %s && reg0 == %s",
4685e523
JP
5659 peer->json_key, ip_s);
5660
09b39248 5661 ds_clear(&actions);
4685e523 5662 ds_put_format(&actions, "eth.dst = %s; next;", ea_s);
86e98048 5663 ovn_lflow_add(lflows, peer->od,
09b39248
JP
5664 S_ROUTER_IN_ARP_RESOLVE, 100,
5665 ds_cstr(&match), ds_cstr(&actions));
86e98048 5666 }
9975d7be 5667 }
6fdb7cd6
JP
5668
5669 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
5670 const char *ip_s = op->lsp_addrs[i].ipv6_addrs[j].addr_s;
5671 for (size_t k = 0; k < op->od->n_router_ports; k++) {
5672 /* Get the Logical_Router_Port that the
5673 * Logical_Switch_Port is connected to, as
5674 * 'peer'. */
5675 const char *peer_name = smap_get(
5676 &op->od->router_ports[k]->nbsp->options,
5677 "router-port");
5678 if (!peer_name) {
5679 continue;
5680 }
5681
5682 struct ovn_port *peer = ovn_port_find(ports, peer_name);
5683 if (!peer || !peer->nbrp) {
5684 continue;
5685 }
5686
5687 if (!find_lrp_member_ip(peer, ip_s)) {
5688 continue;
5689 }
5690
5691 ds_clear(&match);
5692 ds_put_format(&match, "outport == %s && xxreg0 == %s",
5693 peer->json_key, ip_s);
5694
5695 ds_clear(&actions);
5696 ds_put_format(&actions, "eth.dst = %s; next;", ea_s);
5697 ovn_lflow_add(lflows, peer->od,
5698 S_ROUTER_IN_ARP_RESOLVE, 100,
5699 ds_cstr(&match), ds_cstr(&actions));
5700 }
5701 }
9975d7be 5702 }
0ee00741 5703 } else if (!strcmp(op->nbsp->type, "router")) {
75cf9d2b
GS
5704 /* This is a logical switch port that connects to a router. */
5705
5706 /* The peer of this switch port is the router port for which
5707 * we need to add logical flows such that it can resolve
5708 * ARP entries for all the other router ports connected to
5709 * the switch in question. */
5710
0ee00741 5711 const char *peer_name = smap_get(&op->nbsp->options,
75cf9d2b
GS
5712 "router-port");
5713 if (!peer_name) {
5714 continue;
5715 }
5716
5717 struct ovn_port *peer = ovn_port_find(ports, peer_name);
0ee00741 5718 if (!peer || !peer->nbrp) {
75cf9d2b
GS
5719 continue;
5720 }
5721
4685e523 5722 for (size_t i = 0; i < op->od->n_router_ports; i++) {
75cf9d2b 5723 const char *router_port_name = smap_get(
0ee00741 5724 &op->od->router_ports[i]->nbsp->options,
75cf9d2b
GS
5725 "router-port");
5726 struct ovn_port *router_port = ovn_port_find(ports,
5727 router_port_name);
0ee00741 5728 if (!router_port || !router_port->nbrp) {
75cf9d2b
GS
5729 continue;
5730 }
5731
5732 /* Skip the router port under consideration. */
5733 if (router_port == peer) {
5734 continue;
5735 }
5736
6fdb7cd6
JP
5737 if (router_port->lrp_networks.n_ipv4_addrs) {
5738 ds_clear(&match);
5739 ds_put_format(&match, "outport == %s && reg0 == ",
5740 peer->json_key);
5741 op_put_v4_networks(&match, router_port, false);
5742
5743 ds_clear(&actions);
5744 ds_put_format(&actions, "eth.dst = %s; next;",
5745 router_port->lrp_networks.ea_s);
5746 ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE,
5747 100, ds_cstr(&match), ds_cstr(&actions));
5748 }
4685e523 5749
6fdb7cd6
JP
5750 if (router_port->lrp_networks.n_ipv6_addrs) {
5751 ds_clear(&match);
5752 ds_put_format(&match, "outport == %s && xxreg0 == ",
5753 peer->json_key);
5754 op_put_v6_networks(&match, router_port);
5755
5756 ds_clear(&actions);
5757 ds_put_format(&actions, "eth.dst = %s; next;",
5758 router_port->lrp_networks.ea_s);
5759 ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE,
5760 100, ds_cstr(&match), ds_cstr(&actions));
5761 }
75cf9d2b 5762 }
9975d7be
BP
5763 }
5764 }
75cf9d2b 5765
0bac7164
BP
5766 HMAP_FOR_EACH (od, key_node, datapaths) {
5767 if (!od->nbr) {
5768 continue;
5769 }
5770
c34a87b6
JP
5771 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip4",
5772 "get_arp(outport, reg0); next;");
5773
5774 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip6",
5775 "get_nd(outport, xxreg0); next;");
0bac7164
BP
5776 }
5777
4364646c 5778 /* Logical router ingress table 9: Gateway redirect.
41a15b71
MS
5779 *
5780 * For traffic with outport equal to the l3dgw_port
5781 * on a distributed router, this table redirects a subset
5782 * of the traffic to the l3redirect_port which represents
5783 * the central instance of the l3dgw_port.
5784 */
5785 HMAP_FOR_EACH (od, key_node, datapaths) {
5786 if (!od->nbr) {
5787 continue;
5788 }
5789 if (od->l3dgw_port && od->l3redirect_port) {
5790 /* For traffic with outport == l3dgw_port, if the
5791 * packet did not match any higher priority redirect
5792 * rule, then the traffic is redirected to the central
5793 * instance of the l3dgw_port. */
5794 ds_clear(&match);
5795 ds_put_format(&match, "outport == %s",
5796 od->l3dgw_port->json_key);
5797 ds_clear(&actions);
5798 ds_put_format(&actions, "outport = %s; next;",
5799 od->l3redirect_port->json_key);
5800 ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 50,
5801 ds_cstr(&match), ds_cstr(&actions));
5802
5803 /* If the Ethernet destination has not been resolved,
5804 * redirect to the central instance of the l3dgw_port.
5805 * Such traffic will be replaced by an ARP request or ND
5806 * Neighbor Solicitation in the ARP request ingress
5807 * table, before being redirected to the central instance.
5808 */
5809 ds_put_format(&match, " && eth.dst == 00:00:00:00:00:00");
5810 ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 150,
5811 ds_cstr(&match), ds_cstr(&actions));
5812 }
5813
5814 /* Packets are allowed by default. */
5815 ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 0, "1", "next;");
5816 }
5817
4364646c 5818 /* Local router ingress table 10: ARP request.
0bac7164
BP
5819 *
5820 * In the common case where the Ethernet destination has been resolved,
94300e09 5821 * this table outputs the packet (priority 0). Otherwise, it composes
b1a3a6a4 5822 * and sends an ARP/IPv6 NA request (priority 100). */
0bac7164
BP
5823 HMAP_FOR_EACH (od, key_node, datapaths) {
5824 if (!od->nbr) {
5825 continue;
5826 }
5827
5828 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100,
5829 "eth.dst == 00:00:00:00:00:00",
5830 "arp { "
5831 "eth.dst = ff:ff:ff:ff:ff:ff; "
5832 "arp.spa = reg1; "
47021598 5833 "arp.tpa = reg0; "
0bac7164
BP
5834 "arp.op = 1; " /* ARP request */
5835 "output; "
5836 "};");
b1a3a6a4
NS
5837 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100,
5838 "eth.dst == 00:00:00:00:00:00",
5839 "nd_ns { "
5840 "nd.target = xxreg0; "
5841 "output; "
5842 "};");
0bac7164
BP
5843 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1", "output;");
5844 }
9975d7be 5845
de297547 5846 /* Logical router egress table 1: Delivery (priority 100).
9975d7be
BP
5847 *
5848 * Priority 100 rules deliver packets to enabled logical ports. */
5849 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 5850 if (!op->nbrp) {
9975d7be
BP
5851 continue;
5852 }
5853
0ee00741 5854 if (!lrport_is_enabled(op->nbrp)) {
9975d7be
BP
5855 /* Drop packets to disabled logical ports (since logical flow
5856 * tables are default-drop). */
5857 continue;
5858 }
5859
41a15b71
MS
5860 if (op->derived) {
5861 /* No egress packets should be processed in the context of
5862 * a chassisredirect port. The chassisredirect port should
5863 * be replaced by the l3dgw port in the local output
5864 * pipeline stage before egress processing. */
5865 continue;
5866 }
5867
09b39248
JP
5868 ds_clear(&match);
5869 ds_put_format(&match, "outport == %s", op->json_key);
9975d7be 5870 ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100,
09b39248 5871 ds_cstr(&match), "output;");
9975d7be 5872 }
09b39248
JP
5873
5874 ds_destroy(&match);
5875 ds_destroy(&actions);
9975d7be
BP
5876}
5877
5878/* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
5879 * constructing their contents based on the OVN_NB database. */
5880static void
5881build_lflows(struct northd_context *ctx, struct hmap *datapaths,
5882 struct hmap *ports)
5883{
5884 struct hmap lflows = HMAP_INITIALIZER(&lflows);
5885 struct hmap mcgroups = HMAP_INITIALIZER(&mcgroups);
5886
5887 build_lswitch_flows(datapaths, ports, &lflows, &mcgroups);
5888 build_lrouter_flows(datapaths, ports, &lflows);
5889
5868eb24
BP
5890 /* Push changes to the Logical_Flow table to database. */
5891 const struct sbrec_logical_flow *sbflow, *next_sbflow;
5892 SBREC_LOGICAL_FLOW_FOR_EACH_SAFE (sbflow, next_sbflow, ctx->ovnsb_idl) {
5893 struct ovn_datapath *od
5894 = ovn_datapath_from_sbrec(datapaths, sbflow->logical_datapath);
5895 if (!od) {
5896 sbrec_logical_flow_delete(sbflow);
5897 continue;
eb00399e 5898 }
eb00399e 5899
9975d7be 5900 enum ovn_datapath_type dp_type = od->nbs ? DP_SWITCH : DP_ROUTER;
880fcd14
BP
5901 enum ovn_pipeline pipeline
5902 = !strcmp(sbflow->pipeline, "ingress") ? P_IN : P_OUT;
5868eb24 5903 struct ovn_lflow *lflow = ovn_lflow_find(
880fcd14
BP
5904 &lflows, od, ovn_stage_build(dp_type, pipeline, sbflow->table_id),
5905 sbflow->priority, sbflow->match, sbflow->actions);
5868eb24
BP
5906 if (lflow) {
5907 ovn_lflow_destroy(&lflows, lflow);
5908 } else {
5909 sbrec_logical_flow_delete(sbflow);
4edcdcf4
RB
5910 }
5911 }
5868eb24
BP
5912 struct ovn_lflow *lflow, *next_lflow;
5913 HMAP_FOR_EACH_SAFE (lflow, next_lflow, hmap_node, &lflows) {
880fcd14
BP
5914 enum ovn_pipeline pipeline = ovn_stage_get_pipeline(lflow->stage);
5915 uint8_t table = ovn_stage_get_table(lflow->stage);
5916
5868eb24
BP
5917 sbflow = sbrec_logical_flow_insert(ctx->ovnsb_txn);
5918 sbrec_logical_flow_set_logical_datapath(sbflow, lflow->od->sb);
9975d7be
BP
5919 sbrec_logical_flow_set_pipeline(
5920 sbflow, pipeline == P_IN ? "ingress" : "egress");
880fcd14 5921 sbrec_logical_flow_set_table_id(sbflow, table);
5868eb24
BP
5922 sbrec_logical_flow_set_priority(sbflow, lflow->priority);
5923 sbrec_logical_flow_set_match(sbflow, lflow->match);
5924 sbrec_logical_flow_set_actions(sbflow, lflow->actions);
091e3af9 5925
d8026bbf
BP
5926 /* Trim the source locator lflow->where, which looks something like
5927 * "ovn/northd/ovn-northd.c:1234", down to just the part following the
5928 * last slash, e.g. "ovn-northd.c:1234". */
5929 const char *slash = strrchr(lflow->where, '/');
5930#if _WIN32
5931 const char *backslash = strrchr(lflow->where, '\\');
5932 if (!slash || backslash > slash) {
5933 slash = backslash;
5934 }
5935#endif
5936 const char *where = slash ? slash + 1 : lflow->where;
5937
17bfa2aa
HZ
5938 struct smap ids = SMAP_INITIALIZER(&ids);
5939 smap_add(&ids, "stage-name", ovn_stage_to_str(lflow->stage));
5940 smap_add(&ids, "source", where);
5941 if (lflow->stage_hint) {
5942 smap_add(&ids, "stage-hint", lflow->stage_hint);
5943 }
aaf881c6 5944 sbrec_logical_flow_set_external_ids(sbflow, &ids);
17bfa2aa 5945 smap_destroy(&ids);
091e3af9 5946
5868eb24 5947 ovn_lflow_destroy(&lflows, lflow);
eb00399e 5948 }
5868eb24
BP
5949 hmap_destroy(&lflows);
5950
5951 /* Push changes to the Multicast_Group table to database. */
5952 const struct sbrec_multicast_group *sbmc, *next_sbmc;
5953 SBREC_MULTICAST_GROUP_FOR_EACH_SAFE (sbmc, next_sbmc, ctx->ovnsb_idl) {
5954 struct ovn_datapath *od = ovn_datapath_from_sbrec(datapaths,
5955 sbmc->datapath);
5956 if (!od) {
5957 sbrec_multicast_group_delete(sbmc);
5958 continue;
5959 }
eb00399e 5960
5868eb24
BP
5961 struct multicast_group group = { .name = sbmc->name,
5962 .key = sbmc->tunnel_key };
5963 struct ovn_multicast *mc = ovn_multicast_find(&mcgroups, od, &group);
5964 if (mc) {
5965 ovn_multicast_update_sbrec(mc, sbmc);
5966 ovn_multicast_destroy(&mcgroups, mc);
5967 } else {
5968 sbrec_multicast_group_delete(sbmc);
5969 }
5970 }
5971 struct ovn_multicast *mc, *next_mc;
5972 HMAP_FOR_EACH_SAFE (mc, next_mc, hmap_node, &mcgroups) {
5973 sbmc = sbrec_multicast_group_insert(ctx->ovnsb_txn);
5974 sbrec_multicast_group_set_datapath(sbmc, mc->datapath->sb);
5975 sbrec_multicast_group_set_name(sbmc, mc->group->name);
5976 sbrec_multicast_group_set_tunnel_key(sbmc, mc->group->key);
5977 ovn_multicast_update_sbrec(mc, sbmc);
5978 ovn_multicast_destroy(&mcgroups, mc);
4edcdcf4 5979 }
5868eb24 5980 hmap_destroy(&mcgroups);
4edcdcf4 5981}
ea382567
RB
5982
5983/* OVN_Northbound and OVN_Southbound have an identical Address_Set table.
5984 * We always update OVN_Southbound to match the current data in
5985 * OVN_Northbound, so that the address sets used in Logical_Flows in
5986 * OVN_Southbound is checked against the proper set.*/
5987static void
5988sync_address_sets(struct northd_context *ctx)
5989{
5990 struct shash sb_address_sets = SHASH_INITIALIZER(&sb_address_sets);
5991
5992 const struct sbrec_address_set *sb_address_set;
5993 SBREC_ADDRESS_SET_FOR_EACH (sb_address_set, ctx->ovnsb_idl) {
5994 shash_add(&sb_address_sets, sb_address_set->name, sb_address_set);
5995 }
5996
5997 const struct nbrec_address_set *nb_address_set;
5998 NBREC_ADDRESS_SET_FOR_EACH (nb_address_set, ctx->ovnnb_idl) {
5999 sb_address_set = shash_find_and_delete(&sb_address_sets,
6000 nb_address_set->name);
6001 if (!sb_address_set) {
6002 sb_address_set = sbrec_address_set_insert(ctx->ovnsb_txn);
6003 sbrec_address_set_set_name(sb_address_set, nb_address_set->name);
6004 }
6005
6006 sbrec_address_set_set_addresses(sb_address_set,
6007 /* "char **" is not compatible with "const char **" */
6008 (const char **) nb_address_set->addresses,
6009 nb_address_set->n_addresses);
6010 }
6011
6012 struct shash_node *node, *next;
6013 SHASH_FOR_EACH_SAFE (node, next, &sb_address_sets) {
6014 sbrec_address_set_delete(node->data);
6015 shash_delete(&sb_address_sets, node);
6016 }
6017 shash_destroy(&sb_address_sets);
6018}
302eda27
NS
6019
6020/*
6021 * struct 'dns_info' is used to sync the DNS records between OVN Northbound db
6022 * and Southbound db.
6023 */
6024struct dns_info {
6025 struct hmap_node hmap_node;
6026 const struct nbrec_dns *nb_dns; /* DNS record in the Northbound db. */
6027 const struct sbrec_dns *sb_dns; /* DNS record in the Soutbound db. */
6028
6029 /* Datapaths to which the DNS entry is associated with it. */
6030 const struct sbrec_datapath_binding **sbs;
6031 size_t n_sbs;
6032};
6033
6034static inline struct dns_info *
6035get_dns_info_from_hmap(struct hmap *dns_map, struct uuid *uuid)
6036{
6037 struct dns_info *dns_info;
6038 size_t hash = uuid_hash(uuid);
6039 HMAP_FOR_EACH_WITH_HASH (dns_info, hmap_node, hash, dns_map) {
6040 if (uuid_equals(&dns_info->nb_dns->header_.uuid, uuid)) {
6041 return dns_info;
6042 }
6043 }
6044
6045 return NULL;
6046}
6047
6048static void
6049sync_dns_entries(struct northd_context *ctx, struct hmap *datapaths)
6050{
6051 struct hmap dns_map = HMAP_INITIALIZER(&dns_map);
6052 struct ovn_datapath *od;
6053 HMAP_FOR_EACH (od, key_node, datapaths) {
6054 if (!od->nbs || !od->nbs->n_dns_records) {
6055 continue;
6056 }
6057
6058 for (size_t i = 0; i < od->nbs->n_dns_records; i++) {
6059 struct dns_info *dns_info = get_dns_info_from_hmap(
6060 &dns_map, &od->nbs->dns_records[i]->header_.uuid);
6061 if (!dns_info) {
6062 size_t hash = uuid_hash(
6063 &od->nbs->dns_records[i]->header_.uuid);
6064 dns_info = xzalloc(sizeof *dns_info);;
6065 dns_info->nb_dns = od->nbs->dns_records[i];
6066 hmap_insert(&dns_map, &dns_info->hmap_node, hash);
6067 }
6068
6069 dns_info->n_sbs++;
6070 dns_info->sbs = xrealloc(dns_info->sbs,
6071 dns_info->n_sbs * sizeof *dns_info->sbs);
6072 dns_info->sbs[dns_info->n_sbs - 1] = od->sb;
6073 }
6074 }
6075
6076 const struct sbrec_dns *sbrec_dns, *next;
6077 SBREC_DNS_FOR_EACH_SAFE (sbrec_dns, next, ctx->ovnsb_idl) {
6078 const char *nb_dns_uuid = smap_get(&sbrec_dns->external_ids, "dns_id");
6079 struct uuid dns_uuid;
6080 if (!nb_dns_uuid || !uuid_from_string(&dns_uuid, nb_dns_uuid)) {
6081 sbrec_dns_delete(sbrec_dns);
6082 continue;
6083 }
6084
6085 struct dns_info *dns_info =
6086 get_dns_info_from_hmap(&dns_map, &dns_uuid);
6087 if (dns_info) {
6088 dns_info->sb_dns = sbrec_dns;
6089 } else {
6090 sbrec_dns_delete(sbrec_dns);
6091 }
6092 }
6093
6094 struct dns_info *dns_info;
6095 HMAP_FOR_EACH_POP (dns_info, hmap_node, &dns_map) {
6096 if (!dns_info->sb_dns) {
71f21279 6097 sbrec_dns = sbrec_dns_insert(ctx->ovnsb_txn);
302eda27
NS
6098 dns_info->sb_dns = sbrec_dns;
6099 char *dns_id = xasprintf(
6100 UUID_FMT, UUID_ARGS(&dns_info->nb_dns->header_.uuid));
6101 const struct smap external_ids =
6102 SMAP_CONST1(&external_ids, "dns_id", dns_id);
6103 sbrec_dns_set_external_ids(sbrec_dns, &external_ids);
6104 free(dns_id);
6105 }
6106
6107 /* Set the datapaths and records. If nothing has changed, then
6108 * this will be a no-op.
6109 */
6110 sbrec_dns_set_datapaths(
6111 dns_info->sb_dns,
6112 (struct sbrec_datapath_binding **)dns_info->sbs,
6113 dns_info->n_sbs);
6114 sbrec_dns_set_records(dns_info->sb_dns, &dns_info->nb_dns->records);
6115 free(dns_info->sbs);
6116 free(dns_info);
6117 }
6118 hmap_destroy(&dns_map);
6119}
6120
5868eb24 6121\f
4edcdcf4 6122static void
b86f4767 6123ovnnb_db_run(struct northd_context *ctx, struct chassis_index *chassis_index,
6124 struct ovsdb_idl_loop *sb_loop)
4edcdcf4 6125{
b511690b 6126 if (!ctx->ovnsb_txn || !ctx->ovnnb_txn) {
331e7aef
NS
6127 return;
6128 }
5868eb24
BP
6129 struct hmap datapaths, ports;
6130 build_datapaths(ctx, &datapaths);
b86f4767 6131 build_ports(ctx, &datapaths, chassis_index, &ports);
b511690b 6132 build_ipam(&datapaths, &ports);
5868eb24
BP
6133 build_lflows(ctx, &datapaths, &ports);
6134
ea382567 6135 sync_address_sets(ctx);
302eda27 6136 sync_dns_entries(ctx, &datapaths);
ea382567 6137
5868eb24
BP
6138 struct ovn_datapath *dp, *next_dp;
6139 HMAP_FOR_EACH_SAFE (dp, next_dp, key_node, &datapaths) {
6140 ovn_datapath_destroy(&datapaths, dp);
6141 }
6142 hmap_destroy(&datapaths);
6143
6144 struct ovn_port *port, *next_port;
6145 HMAP_FOR_EACH_SAFE (port, next_port, key_node, &ports) {
6146 ovn_port_destroy(&ports, port);
6147 }
6148 hmap_destroy(&ports);
fa183acc
BP
6149
6150 /* Copy nb_cfg from northbound to southbound database.
6151 *
6152 * Also set up to update sb_cfg once our southbound transaction commits. */
6153 const struct nbrec_nb_global *nb = nbrec_nb_global_first(ctx->ovnnb_idl);
14338f22
GS
6154 if (!nb) {
6155 nb = nbrec_nb_global_insert(ctx->ovnnb_txn);
6156 }
fa183acc 6157 const struct sbrec_sb_global *sb = sbrec_sb_global_first(ctx->ovnsb_idl);
14338f22
GS
6158 if (!sb) {
6159 sb = sbrec_sb_global_insert(ctx->ovnsb_txn);
fa183acc 6160 }
14338f22
GS
6161 sbrec_sb_global_set_nb_cfg(sb, nb->nb_cfg);
6162 sb_loop->next_cfg = nb->nb_cfg;
8639f9be
ND
6163
6164 cleanup_macam(&macam);
ac0630a2
RB
6165}
6166
fa183acc
BP
6167/* Handle changes to the 'chassis' column of the 'Port_Binding' table. When
6168 * this column is not empty, it means we need to set the corresponding logical
6169 * port as 'up' in the northbound DB. */
ac0630a2 6170static void
fa183acc 6171update_logical_port_status(struct northd_context *ctx)
ac0630a2 6172{
fc3113bc 6173 struct hmap lports_hmap;
5868eb24 6174 const struct sbrec_port_binding *sb;
0ee00741 6175 const struct nbrec_logical_switch_port *nbsp;
fc3113bc
RB
6176
6177 struct lport_hash_node {
6178 struct hmap_node node;
0ee00741 6179 const struct nbrec_logical_switch_port *nbsp;
4ec3d7c7 6180 } *hash_node;
f93818dd 6181
fc3113bc 6182 hmap_init(&lports_hmap);
f93818dd 6183
0ee00741 6184 NBREC_LOGICAL_SWITCH_PORT_FOR_EACH(nbsp, ctx->ovnnb_idl) {
fc3113bc 6185 hash_node = xzalloc(sizeof *hash_node);
0ee00741
HK
6186 hash_node->nbsp = nbsp;
6187 hmap_insert(&lports_hmap, &hash_node->node, hash_string(nbsp->name, 0));
fc3113bc
RB
6188 }
6189
5868eb24 6190 SBREC_PORT_BINDING_FOR_EACH(sb, ctx->ovnsb_idl) {
0ee00741 6191 nbsp = NULL;
fc3113bc 6192 HMAP_FOR_EACH_WITH_HASH(hash_node, node,
5868eb24
BP
6193 hash_string(sb->logical_port, 0),
6194 &lports_hmap) {
0ee00741
HK
6195 if (!strcmp(sb->logical_port, hash_node->nbsp->name)) {
6196 nbsp = hash_node->nbsp;
fc3113bc
RB
6197 break;
6198 }
f93818dd
RB
6199 }
6200
0ee00741 6201 if (!nbsp) {
dcda6e0d 6202 /* The logical port doesn't exist for this port binding. This can
2e2762d4 6203 * happen under normal circumstances when ovn-northd hasn't gotten
dcda6e0d 6204 * around to pruning the Port_Binding yet. */
f93818dd
RB
6205 continue;
6206 }
6207
0ee00741 6208 if (sb->chassis && (!nbsp->up || !*nbsp->up)) {
f93818dd 6209 bool up = true;
0ee00741
HK
6210 nbrec_logical_switch_port_set_up(nbsp, &up, 1);
6211 } else if (!sb->chassis && (!nbsp->up || *nbsp->up)) {
f93818dd 6212 bool up = false;
0ee00741 6213 nbrec_logical_switch_port_set_up(nbsp, &up, 1);
f93818dd
RB
6214 }
6215 }
fc3113bc 6216
4ec3d7c7 6217 HMAP_FOR_EACH_POP(hash_node, node, &lports_hmap) {
fc3113bc
RB
6218 free(hash_node);
6219 }
6220 hmap_destroy(&lports_hmap);
ac0630a2 6221}
45f98d4c 6222
16936e4d 6223static struct gen_opts_map supported_dhcp_opts[] = {
281977f7
NS
6224 OFFERIP,
6225 DHCP_OPT_NETMASK,
6226 DHCP_OPT_ROUTER,
6227 DHCP_OPT_DNS_SERVER,
6228 DHCP_OPT_LOG_SERVER,
6229 DHCP_OPT_LPR_SERVER,
6230 DHCP_OPT_SWAP_SERVER,
6231 DHCP_OPT_POLICY_FILTER,
6232 DHCP_OPT_ROUTER_SOLICITATION,
6233 DHCP_OPT_NIS_SERVER,
6234 DHCP_OPT_NTP_SERVER,
6235 DHCP_OPT_SERVER_ID,
6236 DHCP_OPT_TFTP_SERVER,
6237 DHCP_OPT_CLASSLESS_STATIC_ROUTE,
6238 DHCP_OPT_MS_CLASSLESS_STATIC_ROUTE,
6239 DHCP_OPT_IP_FORWARD_ENABLE,
6240 DHCP_OPT_ROUTER_DISCOVERY,
6241 DHCP_OPT_ETHERNET_ENCAP,
6242 DHCP_OPT_DEFAULT_TTL,
6243 DHCP_OPT_TCP_TTL,
6244 DHCP_OPT_MTU,
6245 DHCP_OPT_LEASE_TIME,
6246 DHCP_OPT_T1,
6247 DHCP_OPT_T2
6248};
6249
16936e4d 6250static struct gen_opts_map supported_dhcpv6_opts[] = {
33ac3c83
NS
6251 DHCPV6_OPT_IA_ADDR,
6252 DHCPV6_OPT_SERVER_ID,
6253 DHCPV6_OPT_DOMAIN_SEARCH,
6254 DHCPV6_OPT_DNS_SERVER
6255};
6256
281977f7
NS
6257static void
6258check_and_add_supported_dhcp_opts_to_sb_db(struct northd_context *ctx)
6259{
6260 struct hmap dhcp_opts_to_add = HMAP_INITIALIZER(&dhcp_opts_to_add);
6261 for (size_t i = 0; (i < sizeof(supported_dhcp_opts) /
6262 sizeof(supported_dhcp_opts[0])); i++) {
6263 hmap_insert(&dhcp_opts_to_add, &supported_dhcp_opts[i].hmap_node,
6264 dhcp_opt_hash(supported_dhcp_opts[i].name));
6265 }
6266
6267 const struct sbrec_dhcp_options *opt_row, *opt_row_next;
6268 SBREC_DHCP_OPTIONS_FOR_EACH_SAFE(opt_row, opt_row_next, ctx->ovnsb_idl) {
16936e4d 6269 struct gen_opts_map *dhcp_opt =
281977f7
NS
6270 dhcp_opts_find(&dhcp_opts_to_add, opt_row->name);
6271 if (dhcp_opt) {
6272 hmap_remove(&dhcp_opts_to_add, &dhcp_opt->hmap_node);
6273 } else {
6274 sbrec_dhcp_options_delete(opt_row);
6275 }
6276 }
6277
16936e4d 6278 struct gen_opts_map *opt;
281977f7
NS
6279 HMAP_FOR_EACH (opt, hmap_node, &dhcp_opts_to_add) {
6280 struct sbrec_dhcp_options *sbrec_dhcp_option =
6281 sbrec_dhcp_options_insert(ctx->ovnsb_txn);
6282 sbrec_dhcp_options_set_name(sbrec_dhcp_option, opt->name);
6283 sbrec_dhcp_options_set_code(sbrec_dhcp_option, opt->code);
6284 sbrec_dhcp_options_set_type(sbrec_dhcp_option, opt->type);
6285 }
6286
6287 hmap_destroy(&dhcp_opts_to_add);
6288}
6289
33ac3c83
NS
6290static void
6291check_and_add_supported_dhcpv6_opts_to_sb_db(struct northd_context *ctx)
6292{
6293 struct hmap dhcpv6_opts_to_add = HMAP_INITIALIZER(&dhcpv6_opts_to_add);
6294 for (size_t i = 0; (i < sizeof(supported_dhcpv6_opts) /
6295 sizeof(supported_dhcpv6_opts[0])); i++) {
6296 hmap_insert(&dhcpv6_opts_to_add, &supported_dhcpv6_opts[i].hmap_node,
6297 dhcp_opt_hash(supported_dhcpv6_opts[i].name));
6298 }
6299
6300 const struct sbrec_dhcpv6_options *opt_row, *opt_row_next;
6301 SBREC_DHCPV6_OPTIONS_FOR_EACH_SAFE(opt_row, opt_row_next, ctx->ovnsb_idl) {
16936e4d 6302 struct gen_opts_map *dhcp_opt =
33ac3c83
NS
6303 dhcp_opts_find(&dhcpv6_opts_to_add, opt_row->name);
6304 if (dhcp_opt) {
6305 hmap_remove(&dhcpv6_opts_to_add, &dhcp_opt->hmap_node);
6306 } else {
6307 sbrec_dhcpv6_options_delete(opt_row);
6308 }
6309 }
6310
16936e4d 6311 struct gen_opts_map *opt;
33ac3c83
NS
6312 HMAP_FOR_EACH(opt, hmap_node, &dhcpv6_opts_to_add) {
6313 struct sbrec_dhcpv6_options *sbrec_dhcpv6_option =
6314 sbrec_dhcpv6_options_insert(ctx->ovnsb_txn);
6315 sbrec_dhcpv6_options_set_name(sbrec_dhcpv6_option, opt->name);
6316 sbrec_dhcpv6_options_set_code(sbrec_dhcpv6_option, opt->code);
6317 sbrec_dhcpv6_options_set_type(sbrec_dhcpv6_option, opt->type);
6318 }
6319
6320 hmap_destroy(&dhcpv6_opts_to_add);
6321}
6322
75ddb5f4
LR
6323static const char *rbac_chassis_auth[] =
6324 {"name"};
6325static const char *rbac_chassis_update[] =
6326 {"nb_cfg", "external_ids", "encaps", "vtep_logical_switches"};
6327
6328static const char *rbac_encap_auth[] =
af0e9721 6329 {"chassis_name"};
75ddb5f4
LR
6330static const char *rbac_encap_update[] =
6331 {"type", "options", "ip"};
6332
6333static const char *rbac_port_binding_auth[] =
6334 {""};
6335static const char *rbac_port_binding_update[] =
6336 {"chassis"};
6337
6338static const char *rbac_mac_binding_auth[] =
6339 {""};
6340static const char *rbac_mac_binding_update[] =
6341 {"logical_port", "ip", "mac", "datapath"};
6342
6343static struct rbac_perm_cfg {
6344 const char *table;
6345 const char **auth;
6346 int n_auth;
6347 bool insdel;
6348 const char **update;
6349 int n_update;
6350 const struct sbrec_rbac_permission *row;
6351} rbac_perm_cfg[] = {
6352 {
6353 .table = "Chassis",
6354 .auth = rbac_chassis_auth,
6355 .n_auth = ARRAY_SIZE(rbac_chassis_auth),
6356 .insdel = true,
6357 .update = rbac_chassis_update,
6358 .n_update = ARRAY_SIZE(rbac_chassis_update),
6359 .row = NULL
6360 },{
6361 .table = "Encap",
6362 .auth = rbac_encap_auth,
6363 .n_auth = ARRAY_SIZE(rbac_encap_auth),
6364 .insdel = true,
6365 .update = rbac_encap_update,
6366 .n_update = ARRAY_SIZE(rbac_encap_update),
6367 .row = NULL
6368 },{
6369 .table = "Port_Binding",
6370 .auth = rbac_port_binding_auth,
6371 .n_auth = ARRAY_SIZE(rbac_port_binding_auth),
6372 .insdel = false,
6373 .update = rbac_port_binding_update,
6374 .n_update = ARRAY_SIZE(rbac_port_binding_update),
6375 .row = NULL
6376 },{
6377 .table = "MAC_Binding",
6378 .auth = rbac_mac_binding_auth,
6379 .n_auth = ARRAY_SIZE(rbac_mac_binding_auth),
6380 .insdel = true,
6381 .update = rbac_mac_binding_update,
6382 .n_update = ARRAY_SIZE(rbac_mac_binding_update),
6383 .row = NULL
6384 },{
6385 .table = NULL,
6386 .auth = NULL,
6387 .n_auth = 0,
6388 .insdel = false,
6389 .update = NULL,
6390 .n_update = 0,
6391 .row = NULL
6392 }
6393};
6394
6395static bool
6396ovn_rbac_validate_perm(const struct sbrec_rbac_permission *perm)
6397{
6398 struct rbac_perm_cfg *pcfg;
6399 int i, j, n_found;
6400
6401 for (pcfg = rbac_perm_cfg; pcfg->table; pcfg++) {
6402 if (!strcmp(perm->table, pcfg->table)) {
6403 break;
6404 }
6405 }
6406 if (!pcfg->table) {
6407 return false;
6408 }
6409 if (perm->n_authorization != pcfg->n_auth ||
6410 perm->n_update != pcfg->n_update) {
6411 return false;
6412 }
6413 if (perm->insert_delete != pcfg->insdel) {
6414 return false;
6415 }
6416 /* verify perm->authorization vs. pcfg->auth */
6417 n_found = 0;
6418 for (i = 0; i < pcfg->n_auth; i++) {
6419 for (j = 0; j < perm->n_authorization; j++) {
6420 if (!strcmp(pcfg->auth[i], perm->authorization[j])) {
6421 n_found++;
6422 break;
6423 }
6424 }
6425 }
6426 if (n_found != pcfg->n_auth) {
6427 return false;
6428 }
6429
6430 /* verify perm->update vs. pcfg->update */
6431 n_found = 0;
6432 for (i = 0; i < pcfg->n_update; i++) {
6433 for (j = 0; j < perm->n_update; j++) {
6434 if (!strcmp(pcfg->update[i], perm->update[j])) {
6435 n_found++;
6436 break;
6437 }
6438 }
6439 }
6440 if (n_found != pcfg->n_update) {
6441 return false;
6442 }
6443
6444 /* Success, db state matches expected state */
6445 pcfg->row = perm;
6446 return true;
6447}
6448
6449static void
6450ovn_rbac_create_perm(struct rbac_perm_cfg *pcfg,
6451 struct northd_context *ctx,
6452 const struct sbrec_rbac_role *rbac_role)
6453{
6454 struct sbrec_rbac_permission *rbac_perm;
6455
6456 rbac_perm = sbrec_rbac_permission_insert(ctx->ovnsb_txn);
6457 sbrec_rbac_permission_set_table(rbac_perm, pcfg->table);
6458 sbrec_rbac_permission_set_authorization(rbac_perm,
6459 pcfg->auth,
6460 pcfg->n_auth);
6461 sbrec_rbac_permission_set_insert_delete(rbac_perm, pcfg->insdel);
6462 sbrec_rbac_permission_set_update(rbac_perm,
6463 pcfg->update,
6464 pcfg->n_update);
6465 sbrec_rbac_role_update_permissions_setkey(rbac_role, pcfg->table,
6466 rbac_perm);
6467}
6468
6469static void
6470check_and_update_rbac(struct northd_context *ctx)
6471{
6472 const struct sbrec_rbac_role *rbac_role = NULL;
6473 const struct sbrec_rbac_permission *perm_row, *perm_next;
6474 const struct sbrec_rbac_role *role_row, *role_row_next;
6475 struct rbac_perm_cfg *pcfg;
6476
6477 for (pcfg = rbac_perm_cfg; pcfg->table; pcfg++) {
6478 pcfg->row = NULL;
6479 }
6480
6481 SBREC_RBAC_PERMISSION_FOR_EACH_SAFE (perm_row, perm_next, ctx->ovnsb_idl) {
6482 if (!ovn_rbac_validate_perm(perm_row)) {
6483 sbrec_rbac_permission_delete(perm_row);
6484 }
6485 }
6486 SBREC_RBAC_ROLE_FOR_EACH_SAFE (role_row, role_row_next, ctx->ovnsb_idl) {
6487 if (strcmp(role_row->name, "ovn-controller")) {
6488 sbrec_rbac_role_delete(role_row);
6489 } else {
6490 rbac_role = role_row;
6491 }
6492 }
6493
6494 if (!rbac_role) {
6495 rbac_role = sbrec_rbac_role_insert(ctx->ovnsb_txn);
6496 sbrec_rbac_role_set_name(rbac_role, "ovn-controller");
6497 }
6498
6499 for (pcfg = rbac_perm_cfg; pcfg->table; pcfg++) {
6500 if (!pcfg->row) {
6501 ovn_rbac_create_perm(pcfg, ctx, rbac_role);
6502 }
6503 }
6504}
6505
fa183acc
BP
6506/* Updates the sb_cfg and hv_cfg columns in the northbound NB_Global table. */
6507static void
6508update_northbound_cfg(struct northd_context *ctx,
6509 struct ovsdb_idl_loop *sb_loop)
6510{
6511 /* Update northbound sb_cfg if appropriate. */
6512 const struct nbrec_nb_global *nbg = nbrec_nb_global_first(ctx->ovnnb_idl);
6513 int64_t sb_cfg = sb_loop->cur_cfg;
6514 if (nbg && sb_cfg && nbg->sb_cfg != sb_cfg) {
6515 nbrec_nb_global_set_sb_cfg(nbg, sb_cfg);
6516 }
6517
6518 /* Update northbound hv_cfg if appropriate. */
6519 if (nbg) {
6520 /* Find minimum nb_cfg among all chassis. */
6521 const struct sbrec_chassis *chassis;
6522 int64_t hv_cfg = nbg->nb_cfg;
6523 SBREC_CHASSIS_FOR_EACH (chassis, ctx->ovnsb_idl) {
6524 if (chassis->nb_cfg < hv_cfg) {
6525 hv_cfg = chassis->nb_cfg;
6526 }
6527 }
6528
6529 /* Update hv_cfg. */
6530 if (nbg->hv_cfg != hv_cfg) {
6531 nbrec_nb_global_set_hv_cfg(nbg, hv_cfg);
6532 }
6533 }
6534}
6535
6536/* Handle a fairly small set of changes in the southbound database. */
6537static void
6538ovnsb_db_run(struct northd_context *ctx, struct ovsdb_idl_loop *sb_loop)
6539{
6540 if (!ctx->ovnnb_txn || !ovsdb_idl_has_ever_connected(ctx->ovnsb_idl)) {
6541 return;
6542 }
6543
6544 update_logical_port_status(ctx);
6545 update_northbound_cfg(ctx, sb_loop);
6546}
6547\f
ac0630a2
RB
6548static void
6549parse_options(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
6550{
6551 enum {
67d9b930 6552 DAEMON_OPTION_ENUMS,
ac0630a2 6553 VLOG_OPTION_ENUMS,
e18a1d08 6554 SSL_OPTION_ENUMS,
ac0630a2
RB
6555 };
6556 static const struct option long_options[] = {
ec78987f 6557 {"ovnsb-db", required_argument, NULL, 'd'},
ac0630a2
RB
6558 {"ovnnb-db", required_argument, NULL, 'D'},
6559 {"help", no_argument, NULL, 'h'},
6560 {"options", no_argument, NULL, 'o'},
6561 {"version", no_argument, NULL, 'V'},
67d9b930 6562 DAEMON_LONG_OPTIONS,
ac0630a2
RB
6563 VLOG_LONG_OPTIONS,
6564 STREAM_SSL_LONG_OPTIONS,
6565 {NULL, 0, NULL, 0},
6566 };
6567 char *short_options = ovs_cmdl_long_options_to_short_options(long_options);
6568
6569 for (;;) {
6570 int c;
6571
6572 c = getopt_long(argc, argv, short_options, long_options, NULL);
6573 if (c == -1) {
6574 break;
6575 }
6576
6577 switch (c) {
67d9b930 6578 DAEMON_OPTION_HANDLERS;
ac0630a2
RB
6579 VLOG_OPTION_HANDLERS;
6580 STREAM_SSL_OPTION_HANDLERS;
6581
6582 case 'd':
ec78987f 6583 ovnsb_db = optarg;
ac0630a2
RB
6584 break;
6585
6586 case 'D':
6587 ovnnb_db = optarg;
6588 break;
6589
6590 case 'h':
6591 usage();
6592 exit(EXIT_SUCCESS);
6593
6594 case 'o':
6595 ovs_cmdl_print_options(long_options);
6596 exit(EXIT_SUCCESS);
6597
6598 case 'V':
6599 ovs_print_version(0, 0);
6600 exit(EXIT_SUCCESS);
6601
6602 default:
6603 break;
6604 }
6605 }
6606
ec78987f 6607 if (!ovnsb_db) {
60bdd011 6608 ovnsb_db = default_sb_db();
ac0630a2
RB
6609 }
6610
6611 if (!ovnnb_db) {
60bdd011 6612 ovnnb_db = default_nb_db();
ac0630a2
RB
6613 }
6614
6615 free(short_options);
6616}
6617
5868eb24
BP
6618static void
6619add_column_noalert(struct ovsdb_idl *idl,
6620 const struct ovsdb_idl_column *column)
6621{
6622 ovsdb_idl_add_column(idl, column);
6623 ovsdb_idl_omit_alert(idl, column);
6624}
6625
ac0630a2
RB
6626int
6627main(int argc, char *argv[])
6628{
ac0630a2 6629 int res = EXIT_SUCCESS;
7b303ff9
AW
6630 struct unixctl_server *unixctl;
6631 int retval;
6632 bool exiting;
ac0630a2
RB
6633
6634 fatal_ignore_sigpipe();
3dada172 6635 ovs_cmdl_proctitle_init(argc, argv);
ac0630a2 6636 set_program_name(argv[0]);
485f0696 6637 service_start(&argc, &argv);
ac0630a2 6638 parse_options(argc, argv);
67d9b930 6639
e91b927d 6640 daemonize_start(false);
7b303ff9
AW
6641
6642 retval = unixctl_server_create(NULL, &unixctl);
6643 if (retval) {
6644 exit(EXIT_FAILURE);
6645 }
6646 unixctl_command_register("exit", "", 0, 0, ovn_northd_exit, &exiting);
6647
6648 daemonize_complete();
67d9b930 6649
fa183acc 6650 /* We want to detect (almost) all changes to the ovn-nb db. */
331e7aef
NS
6651 struct ovsdb_idl_loop ovnnb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
6652 ovsdb_idl_create(ovnnb_db, &nbrec_idl_class, true, true));
fa183acc
BP
6653 ovsdb_idl_omit_alert(ovnnb_idl_loop.idl, &nbrec_nb_global_col_sb_cfg);
6654 ovsdb_idl_omit_alert(ovnnb_idl_loop.idl, &nbrec_nb_global_col_hv_cfg);
331e7aef 6655
fa183acc 6656 /* We want to detect only selected changes to the ovn-sb db. */
331e7aef
NS
6657 struct ovsdb_idl_loop ovnsb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
6658 ovsdb_idl_create(ovnsb_db, &sbrec_idl_class, false, true));
6659
fa183acc
BP
6660 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_sb_global);
6661 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_sb_global_col_nb_cfg);
6662
331e7aef
NS
6663 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_logical_flow);
6664 add_column_noalert(ovnsb_idl_loop.idl,
6665 &sbrec_logical_flow_col_logical_datapath);
6666 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_pipeline);
6667 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_table_id);
6668 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_priority);
6669 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_match);
6670 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_actions);
6671
6672 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_multicast_group);
6673 add_column_noalert(ovnsb_idl_loop.idl,
6674 &sbrec_multicast_group_col_datapath);
6675 add_column_noalert(ovnsb_idl_loop.idl,
6676 &sbrec_multicast_group_col_tunnel_key);
6677 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_name);
6678 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_ports);
6679
6680 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_datapath_binding);
6681 add_column_noalert(ovnsb_idl_loop.idl,
6682 &sbrec_datapath_binding_col_tunnel_key);
6683 add_column_noalert(ovnsb_idl_loop.idl,
6684 &sbrec_datapath_binding_col_external_ids);
6685
6686 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_port_binding);
6687 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_datapath);
6688 add_column_noalert(ovnsb_idl_loop.idl,
6689 &sbrec_port_binding_col_logical_port);
6690 add_column_noalert(ovnsb_idl_loop.idl,
6691 &sbrec_port_binding_col_tunnel_key);
6692 add_column_noalert(ovnsb_idl_loop.idl,
6693 &sbrec_port_binding_col_parent_port);
6694 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_tag);
6695 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_type);
6696 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_options);
6697 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_mac);
f40c5588
MS
6698 add_column_noalert(ovnsb_idl_loop.idl,
6699 &sbrec_port_binding_col_nat_addresses);
331e7aef 6700 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_port_binding_col_chassis);
b86f4767 6701 ovsdb_idl_add_column(ovnsb_idl_loop.idl,
6702 &sbrec_port_binding_col_gateway_chassis);
6703 ovsdb_idl_add_column(ovnsb_idl_loop.idl,
6704 &sbrec_gateway_chassis_col_chassis);
6705 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_gateway_chassis_col_name);
6706 ovsdb_idl_add_column(ovnsb_idl_loop.idl,
6707 &sbrec_gateway_chassis_col_priority);
6708 ovsdb_idl_add_column(ovnsb_idl_loop.idl,
6709 &sbrec_gateway_chassis_col_external_ids);
6710 ovsdb_idl_add_column(ovnsb_idl_loop.idl,
6711 &sbrec_gateway_chassis_col_options);
4a680bff
BP
6712 add_column_noalert(ovnsb_idl_loop.idl,
6713 &sbrec_port_binding_col_external_ids);
6e31816f
CSV
6714 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_mac_binding);
6715 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_datapath);
6716 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_ip);
6717 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_mac);
6718 add_column_noalert(ovnsb_idl_loop.idl,
6719 &sbrec_mac_binding_col_logical_port);
281977f7
NS
6720 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_dhcp_options);
6721 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_code);
6722 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_type);
6723 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_name);
33ac3c83
NS
6724 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_dhcpv6_options);
6725 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_code);
6726 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_type);
6727 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_name);
ea382567
RB
6728 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_address_set);
6729 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_name);
6730 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_addresses);
6731
302eda27
NS
6732 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_dns);
6733 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dns_col_datapaths);
6734 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dns_col_records);
6735 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dns_col_external_ids);
6736
75ddb5f4
LR
6737 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_rbac_role);
6738 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_rbac_role_col_name);
6739 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_rbac_role_col_permissions);
6740
6741 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_rbac_permission);
6742 add_column_noalert(ovnsb_idl_loop.idl,
6743 &sbrec_rbac_permission_col_table);
6744 add_column_noalert(ovnsb_idl_loop.idl,
6745 &sbrec_rbac_permission_col_authorization);
6746 add_column_noalert(ovnsb_idl_loop.idl,
6747 &sbrec_rbac_permission_col_insert_delete);
6748 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_rbac_permission_col_update);
6749
fa183acc
BP
6750 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_chassis);
6751 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_nb_cfg);
b86f4767 6752 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_name);
fa183acc 6753
46a54ce7
RB
6754 /* Ensure that only a single ovn-northd is active in the deployment by
6755 * acquiring a lock called "ovn_northd" on the southbound database
6756 * and then only performing DB transactions if the lock is held. */
6757 ovsdb_idl_set_lock(ovnsb_idl_loop.idl, "ovn_northd");
6758 bool had_lock = false;
6759
331e7aef 6760 /* Main loop. */
7b303ff9
AW
6761 exiting = false;
6762 while (!exiting) {
331e7aef
NS
6763 struct northd_context ctx = {
6764 .ovnnb_idl = ovnnb_idl_loop.idl,
6765 .ovnnb_txn = ovsdb_idl_loop_run(&ovnnb_idl_loop),
6766 .ovnsb_idl = ovnsb_idl_loop.idl,
6767 .ovnsb_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop),
6768 };
ac0630a2 6769
46a54ce7
RB
6770 if (!had_lock && ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) {
6771 VLOG_INFO("ovn-northd lock acquired. "
6772 "This ovn-northd instance is now active.");
6773 had_lock = true;
6774 } else if (had_lock && !ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) {
6775 VLOG_INFO("ovn-northd lock lost. "
6776 "This ovn-northd instance is now on standby.");
6777 had_lock = false;
6778 }
b86f4767 6779
46a54ce7
RB
6780 struct chassis_index chassis_index;
6781 bool destroy_chassis_index = false;
6782 if (ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) {
6783 chassis_index_init(&chassis_index, ctx.ovnsb_idl);
6784 destroy_chassis_index = true;
6785
6786 ovnnb_db_run(&ctx, &chassis_index, &ovnsb_idl_loop);
6787 ovnsb_db_run(&ctx, &ovnsb_idl_loop);
6788 if (ctx.ovnsb_txn) {
6789 check_and_add_supported_dhcp_opts_to_sb_db(&ctx);
6790 check_and_add_supported_dhcpv6_opts_to_sb_db(&ctx);
6791 check_and_update_rbac(&ctx);
6792 }
281977f7 6793 }
f93818dd 6794
331e7aef
NS
6795 unixctl_server_run(unixctl);
6796 unixctl_server_wait(unixctl);
6797 if (exiting) {
6798 poll_immediate_wake();
ac0630a2 6799 }
331e7aef
NS
6800 ovsdb_idl_loop_commit_and_wait(&ovnnb_idl_loop);
6801 ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);
ac0630a2 6802
331e7aef 6803 poll_block();
485f0696
GS
6804 if (should_service_stop()) {
6805 exiting = true;
6806 }
b86f4767 6807
46a54ce7
RB
6808 if (destroy_chassis_index) {
6809 chassis_index_destroy(&chassis_index);
6810 }
ac0630a2
RB
6811 }
6812
7b303ff9 6813 unixctl_server_destroy(unixctl);
331e7aef
NS
6814 ovsdb_idl_loop_destroy(&ovnnb_idl_loop);
6815 ovsdb_idl_loop_destroy(&ovnsb_idl_loop);
485f0696 6816 service_stop();
ac0630a2
RB
6817
6818 exit(res);
6819}
7b303ff9
AW
6820
6821static void
6822ovn_northd_exit(struct unixctl_conn *conn, int argc OVS_UNUSED,
6823 const char *argv[] OVS_UNUSED, void *exiting_)
6824{
6825 bool *exiting = exiting_;
6826 *exiting = true;
6827
6828 unixctl_command_reply(conn, NULL);
6829}