]> git.proxmox.com Git - mirror_ovs.git/blame - ovn/northd/ovn-northd.c
ovn-northd ipam: Support 'exclude_ips' option
[mirror_ovs.git] / ovn / northd / ovn-northd.c
CommitLineData
ac0630a2
RB
1/*
2 * Licensed under the Apache License, Version 2.0 (the "License");
3 * you may not use this file except in compliance with the License.
4 * You may obtain a copy of the License at:
5 *
6 * http://www.apache.org/licenses/LICENSE-2.0
7 *
8 * Unless required by applicable law or agreed to in writing, software
9 * distributed under the License is distributed on an "AS IS" BASIS,
10 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 * See the License for the specific language governing permissions and
12 * limitations under the License.
13 */
14
15#include <config.h>
16
17#include <getopt.h>
18#include <stdlib.h>
19#include <stdio.h>
20
b511690b 21#include "bitmap.h"
ac0630a2 22#include "command-line.h"
67d9b930 23#include "daemon.h"
ac0630a2 24#include "dirs.h"
3e8a2ad1 25#include "openvswitch/dynamic-string.h"
ac0630a2 26#include "fatal-signal.h"
4edcdcf4 27#include "hash.h"
ee89ea7b
TW
28#include "openvswitch/hmap.h"
29#include "openvswitch/json.h"
8b2ed684 30#include "ovn/lex.h"
06a26dd2 31#include "ovn/lib/logical-fields.h"
281977f7 32#include "ovn/lib/ovn-dhcp.h"
e3df8838
BP
33#include "ovn/lib/ovn-nb-idl.h"
34#include "ovn/lib/ovn-sb-idl.h"
218351dd 35#include "ovn/lib/ovn-util.h"
a6095f81 36#include "ovn/actions.h"
064d7f84 37#include "packets.h"
ac0630a2 38#include "poll-loop.h"
5868eb24 39#include "smap.h"
7a15be69 40#include "sset.h"
ac0630a2
RB
41#include "stream.h"
42#include "stream-ssl.h"
7b303ff9 43#include "unixctl.h"
ac0630a2 44#include "util.h"
4edcdcf4 45#include "uuid.h"
ac0630a2
RB
46#include "openvswitch/vlog.h"
47
2e2762d4 48VLOG_DEFINE_THIS_MODULE(ovn_northd);
ac0630a2 49
7b303ff9
AW
50static unixctl_cb_func ovn_northd_exit;
51
2e2762d4 52struct northd_context {
f93818dd 53 struct ovsdb_idl *ovnnb_idl;
ec78987f 54 struct ovsdb_idl *ovnsb_idl;
f93818dd 55 struct ovsdb_idl_txn *ovnnb_txn;
3c78b3ca 56 struct ovsdb_idl_txn *ovnsb_txn;
f93818dd
RB
57};
58
ac0630a2 59static const char *ovnnb_db;
ec78987f 60static const char *ovnsb_db;
ac0630a2 61
8639f9be
ND
62#define MAC_ADDR_PREFIX 0x0A0000000000ULL
63#define MAC_ADDR_SPACE 0xffffff
64
65/* MAC address management (macam) table of "struct eth_addr"s, that holds the
66 * MAC addresses allocated by the OVN ipam module. */
67static struct hmap macam = HMAP_INITIALIZER(&macam);
b511690b
GS
68
69#define MAX_OVN_TAGS 4096
880fcd14
BP
70\f
71/* Pipeline stages. */
ac0630a2 72
880fcd14
BP
73/* The two pipelines in an OVN logical flow table. */
74enum ovn_pipeline {
75 P_IN, /* Ingress pipeline. */
76 P_OUT /* Egress pipeline. */
77};
091e3af9 78
880fcd14
BP
79/* The two purposes for which ovn-northd uses OVN logical datapaths. */
80enum ovn_datapath_type {
81 DP_SWITCH, /* OVN logical switch. */
82 DP_ROUTER /* OVN logical router. */
091e3af9
JP
83};
84
880fcd14
BP
85/* Returns an "enum ovn_stage" built from the arguments.
86 *
87 * (It's better to use ovn_stage_build() for type-safety reasons, but inline
88 * functions can't be used in enums or switch cases.) */
89#define OVN_STAGE_BUILD(DP_TYPE, PIPELINE, TABLE) \
90 (((DP_TYPE) << 9) | ((PIPELINE) << 8) | (TABLE))
91
92/* A stage within an OVN logical switch or router.
091e3af9 93 *
880fcd14
BP
94 * An "enum ovn_stage" indicates whether the stage is part of a logical switch
95 * or router, whether the stage is part of the ingress or egress pipeline, and
96 * the table within that pipeline. The first three components are combined to
685f4dfe 97 * form the stage's full name, e.g. S_SWITCH_IN_PORT_SEC_L2,
880fcd14
BP
98 * S_ROUTER_OUT_DELIVERY. */
99enum ovn_stage {
1a03fc7d
BS
100#define PIPELINE_STAGES \
101 /* Logical switch ingress stages. */ \
102 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_L2, 0, "ls_in_port_sec_l2") \
103 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_IP, 1, "ls_in_port_sec_ip") \
104 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_ND, 2, "ls_in_port_sec_nd") \
105 PIPELINE_STAGE(SWITCH, IN, PRE_ACL, 3, "ls_in_pre_acl") \
106 PIPELINE_STAGE(SWITCH, IN, PRE_LB, 4, "ls_in_pre_lb") \
107 PIPELINE_STAGE(SWITCH, IN, PRE_STATEFUL, 5, "ls_in_pre_stateful") \
108 PIPELINE_STAGE(SWITCH, IN, ACL, 6, "ls_in_acl") \
109 PIPELINE_STAGE(SWITCH, IN, QOS_MARK, 7, "ls_in_qos_mark") \
110 PIPELINE_STAGE(SWITCH, IN, LB, 8, "ls_in_lb") \
111 PIPELINE_STAGE(SWITCH, IN, STATEFUL, 9, "ls_in_stateful") \
112 PIPELINE_STAGE(SWITCH, IN, ARP_ND_RSP, 10, "ls_in_arp_rsp") \
113 PIPELINE_STAGE(SWITCH, IN, DHCP_OPTIONS, 11, "ls_in_dhcp_options") \
114 PIPELINE_STAGE(SWITCH, IN, DHCP_RESPONSE, 12, "ls_in_dhcp_response") \
115 PIPELINE_STAGE(SWITCH, IN, L2_LKUP, 13, "ls_in_l2_lkup") \
e0c9e58b
JP
116 \
117 /* Logical switch egress stages. */ \
7a15be69
GS
118 PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 0, "ls_out_pre_lb") \
119 PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 1, "ls_out_pre_acl") \
120 PIPELINE_STAGE(SWITCH, OUT, PRE_STATEFUL, 2, "ls_out_pre_stateful") \
121 PIPELINE_STAGE(SWITCH, OUT, LB, 3, "ls_out_lb") \
122 PIPELINE_STAGE(SWITCH, OUT, ACL, 4, "ls_out_acl") \
1a03fc7d
BS
123 PIPELINE_STAGE(SWITCH, OUT, QOS_MARK, 5, "ls_out_qos_mark") \
124 PIPELINE_STAGE(SWITCH, OUT, STATEFUL, 6, "ls_out_stateful") \
125 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_IP, 7, "ls_out_port_sec_ip") \
126 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_L2, 8, "ls_out_port_sec_l2") \
e0c9e58b
JP
127 \
128 /* Logical router ingress stages. */ \
129 PIPELINE_STAGE(ROUTER, IN, ADMISSION, 0, "lr_in_admission") \
130 PIPELINE_STAGE(ROUTER, IN, IP_INPUT, 1, "lr_in_ip_input") \
cc4583aa
GS
131 PIPELINE_STAGE(ROUTER, IN, DEFRAG, 2, "lr_in_defrag") \
132 PIPELINE_STAGE(ROUTER, IN, UNSNAT, 3, "lr_in_unsnat") \
133 PIPELINE_STAGE(ROUTER, IN, DNAT, 4, "lr_in_dnat") \
134 PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 5, "lr_in_ip_routing") \
135 PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 6, "lr_in_arp_resolve") \
41a15b71
MS
136 PIPELINE_STAGE(ROUTER, IN, GW_REDIRECT, 7, "lr_in_gw_redirect") \
137 PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 8, "lr_in_arp_request") \
e0c9e58b
JP
138 \
139 /* Logical router egress stages. */ \
06a26dd2
MS
140 PIPELINE_STAGE(ROUTER, OUT, UNDNAT, 0, "lr_out_undnat") \
141 PIPELINE_STAGE(ROUTER, OUT, SNAT, 1, "lr_out_snat") \
142 PIPELINE_STAGE(ROUTER, OUT, EGR_LOOP, 2, "lr_out_egr_loop") \
143 PIPELINE_STAGE(ROUTER, OUT, DELIVERY, 3, "lr_out_delivery")
880fcd14
BP
144
145#define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
146 S_##DP_TYPE##_##PIPELINE##_##STAGE \
147 = OVN_STAGE_BUILD(DP_##DP_TYPE, P_##PIPELINE, TABLE),
148 PIPELINE_STAGES
149#undef PIPELINE_STAGE
091e3af9
JP
150};
151
6bb4a18e
JP
152/* Due to various hard-coded priorities need to implement ACLs, the
153 * northbound database supports a smaller range of ACL priorities than
154 * are available to logical flows. This value is added to an ACL
155 * priority to determine the ACL's logical flow priority. */
156#define OVN_ACL_PRI_OFFSET 1000
157
06a26dd2 158/* Register definitions specific to switches. */
facf8652 159#define REGBIT_CONNTRACK_DEFRAG "reg0[0]"
fa313a8c 160#define REGBIT_CONNTRACK_COMMIT "reg0[1]"
7a15be69 161#define REGBIT_CONNTRACK_NAT "reg0[2]"
281977f7 162#define REGBIT_DHCP_OPTS_RESULT "reg0[3]"
facf8652 163
06a26dd2
MS
164/* Register definitions for switches and routers. */
165#define REGBIT_NAT_REDIRECT "reg9[0]"
166/* Indicate that this packet has been recirculated using egress
167 * loopback. This allows certain checks to be bypassed, such as a
168 * logical router dropping packets with source IP address equals
169 * one of the logical router's own IP addresses. */
170#define REGBIT_EGRESS_LOOPBACK "reg9[1]"
171
880fcd14
BP
172/* Returns an "enum ovn_stage" built from the arguments. */
173static enum ovn_stage
174ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline,
175 uint8_t table)
176{
177 return OVN_STAGE_BUILD(dp_type, pipeline, table);
178}
179
180/* Returns the pipeline to which 'stage' belongs. */
181static enum ovn_pipeline
182ovn_stage_get_pipeline(enum ovn_stage stage)
183{
184 return (stage >> 8) & 1;
185}
186
187/* Returns the table to which 'stage' belongs. */
188static uint8_t
189ovn_stage_get_table(enum ovn_stage stage)
190{
191 return stage & 0xff;
192}
193
194/* Returns a string name for 'stage'. */
195static const char *
196ovn_stage_to_str(enum ovn_stage stage)
197{
198 switch (stage) {
199#define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
200 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return NAME;
201 PIPELINE_STAGES
202#undef PIPELINE_STAGE
203 default: return "<unknown>";
204 }
205}
9a9961d2
BP
206
207/* Returns the type of the datapath to which a flow with the given 'stage' may
208 * be added. */
209static enum ovn_datapath_type
210ovn_stage_to_datapath_type(enum ovn_stage stage)
211{
212 switch (stage) {
213#define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
214 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return DP_##DP_TYPE;
215 PIPELINE_STAGES
216#undef PIPELINE_STAGE
217 default: OVS_NOT_REACHED();
218 }
219}
880fcd14 220\f
ac0630a2
RB
221static void
222usage(void)
223{
224 printf("\
225%s: OVN northbound management daemon\n\
226usage: %s [OPTIONS]\n\
227\n\
228Options:\n\
229 --ovnnb-db=DATABASE connect to ovn-nb database at DATABASE\n\
230 (default: %s)\n\
ec78987f 231 --ovnsb-db=DATABASE connect to ovn-sb database at DATABASE\n\
ac0630a2
RB
232 (default: %s)\n\
233 -h, --help display this help message\n\
234 -o, --options list available options\n\
235 -V, --version display version information\n\
60bdd011 236", program_name, program_name, default_nb_db(), default_sb_db());
67d9b930 237 daemon_usage();
ac0630a2
RB
238 vlog_usage();
239 stream_usage("database", true, true, false);
240}
241\f
5868eb24
BP
242struct tnlid_node {
243 struct hmap_node hmap_node;
244 uint32_t tnlid;
245};
246
247static void
248destroy_tnlids(struct hmap *tnlids)
4edcdcf4 249{
4ec3d7c7
DDP
250 struct tnlid_node *node;
251 HMAP_FOR_EACH_POP (node, hmap_node, tnlids) {
5868eb24
BP
252 free(node);
253 }
254 hmap_destroy(tnlids);
255}
256
257static void
258add_tnlid(struct hmap *set, uint32_t tnlid)
259{
260 struct tnlid_node *node = xmalloc(sizeof *node);
261 hmap_insert(set, &node->hmap_node, hash_int(tnlid, 0));
262 node->tnlid = tnlid;
4edcdcf4
RB
263}
264
4edcdcf4 265static bool
5868eb24 266tnlid_in_use(const struct hmap *set, uint32_t tnlid)
4edcdcf4 267{
5868eb24
BP
268 const struct tnlid_node *node;
269 HMAP_FOR_EACH_IN_BUCKET (node, hmap_node, hash_int(tnlid, 0), set) {
270 if (node->tnlid == tnlid) {
271 return true;
272 }
273 }
274 return false;
275}
4edcdcf4 276
5868eb24
BP
277static uint32_t
278allocate_tnlid(struct hmap *set, const char *name, uint32_t max,
279 uint32_t *hint)
280{
281 for (uint32_t tnlid = *hint + 1; tnlid != *hint;
282 tnlid = tnlid + 1 <= max ? tnlid + 1 : 1) {
283 if (!tnlid_in_use(set, tnlid)) {
284 add_tnlid(set, tnlid);
285 *hint = tnlid;
286 return tnlid;
287 }
4edcdcf4
RB
288 }
289
5868eb24
BP
290 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
291 VLOG_WARN_RL(&rl, "all %s tunnel ids exhausted", name);
292 return 0;
293}
294\f
a6095f81
BS
295struct ovn_chassis_qdisc_queues {
296 struct hmap_node key_node;
297 uint32_t queue_id;
298 struct uuid chassis_uuid;
299};
300
301static void
302destroy_chassis_queues(struct hmap *set)
303{
304 struct ovn_chassis_qdisc_queues *node;
305 HMAP_FOR_EACH_POP (node, key_node, set) {
306 free(node);
307 }
308 hmap_destroy(set);
309}
310
311static void
312add_chassis_queue(struct hmap *set, struct uuid *chassis_uuid,
313 uint32_t queue_id)
314{
315 struct ovn_chassis_qdisc_queues *node = xmalloc(sizeof *node);
316 node->queue_id = queue_id;
317 memcpy(&node->chassis_uuid, chassis_uuid, sizeof node->chassis_uuid);
318 hmap_insert(set, &node->key_node, uuid_hash(chassis_uuid));
319}
320
321static bool
322chassis_queueid_in_use(const struct hmap *set, struct uuid *chassis_uuid,
323 uint32_t queue_id)
324{
325 const struct ovn_chassis_qdisc_queues *node;
326 HMAP_FOR_EACH_WITH_HASH (node, key_node, uuid_hash(chassis_uuid), set) {
327 if (uuid_equals(chassis_uuid, &node->chassis_uuid)
328 && node->queue_id == queue_id) {
329 return true;
330 }
331 }
332 return false;
333}
334
335static uint32_t
336allocate_chassis_queueid(struct hmap *set, struct sbrec_chassis *chassis)
337{
338 for (uint32_t queue_id = QDISC_MIN_QUEUE_ID + 1;
339 queue_id <= QDISC_MAX_QUEUE_ID;
340 queue_id++) {
341 if (!chassis_queueid_in_use(set, &chassis->header_.uuid, queue_id)) {
342 add_chassis_queue(set, &chassis->header_.uuid, queue_id);
343 return queue_id;
344 }
345 }
346
347 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
348 VLOG_WARN_RL(&rl, "all %s queue ids exhausted", chassis->name);
349 return 0;
350}
351
352static void
353free_chassis_queueid(struct hmap *set, struct sbrec_chassis *chassis,
354 uint32_t queue_id)
355{
356 struct ovn_chassis_qdisc_queues *node;
357 HMAP_FOR_EACH_WITH_HASH (node, key_node,
358 uuid_hash(&chassis->header_.uuid),
359 set) {
360 if (uuid_equals(&chassis->header_.uuid, &node->chassis_uuid)
361 && node->queue_id == queue_id) {
362 hmap_remove(set, &node->key_node);
363 break;
364 }
365 }
366}
367
368static inline bool
369port_has_qos_params(const struct smap *opts)
370{
371 return (smap_get(opts, "qos_max_rate") ||
372 smap_get(opts, "qos_burst"));
373}
374\f
161ea2c8
NS
375
376struct ipam_info {
377 uint32_t start_ipv4;
378 size_t total_ipv4s;
379 unsigned long *allocated_ipv4s; /* A bitmap of allocated IPv4s */
380};
381
9975d7be
BP
382/* The 'key' comes from nbs->header_.uuid or nbr->header_.uuid or
383 * sb->external_ids:logical-switch. */
5868eb24
BP
384struct ovn_datapath {
385 struct hmap_node key_node; /* Index on 'key'. */
9975d7be 386 struct uuid key; /* (nbs/nbr)->header_.uuid. */
4edcdcf4 387
9975d7be
BP
388 const struct nbrec_logical_switch *nbs; /* May be NULL. */
389 const struct nbrec_logical_router *nbr; /* May be NULL. */
5868eb24 390 const struct sbrec_datapath_binding *sb; /* May be NULL. */
4edcdcf4 391
5868eb24 392 struct ovs_list list; /* In list of similar records. */
4edcdcf4 393
9975d7be 394 /* Logical switch data. */
86e98048
BP
395 struct ovn_port **router_ports;
396 size_t n_router_ports;
9975d7be 397
5868eb24
BP
398 struct hmap port_tnlids;
399 uint32_t port_key_hint;
400
401 bool has_unknown;
8639f9be
ND
402
403 /* IPAM data. */
161ea2c8 404 struct ipam_info *ipam_info;
41a15b71
MS
405
406 /* OVN northd only needs to know about the logical router gateway port for
407 * NAT on a distributed router. This "distributed gateway port" is
408 * populated only when there is a "redirect-chassis" specified for one of
409 * the ports on the logical router. Otherwise this will be NULL. */
410 struct ovn_port *l3dgw_port;
411 /* The "derived" OVN port representing the instance of l3dgw_port on
412 * the "redirect-chassis". */
413 struct ovn_port *l3redirect_port;
8639f9be
ND
414};
415
416struct macam_node {
417 struct hmap_node hmap_node;
418 struct eth_addr mac_addr; /* Allocated MAC address. */
5868eb24
BP
419};
420
8639f9be
ND
421static void
422cleanup_macam(struct hmap *macam)
423{
424 struct macam_node *node;
425 HMAP_FOR_EACH_POP (node, hmap_node, macam) {
426 free(node);
427 }
428}
429
5868eb24
BP
430static struct ovn_datapath *
431ovn_datapath_create(struct hmap *datapaths, const struct uuid *key,
9975d7be
BP
432 const struct nbrec_logical_switch *nbs,
433 const struct nbrec_logical_router *nbr,
5868eb24
BP
434 const struct sbrec_datapath_binding *sb)
435{
436 struct ovn_datapath *od = xzalloc(sizeof *od);
437 od->key = *key;
438 od->sb = sb;
9975d7be
BP
439 od->nbs = nbs;
440 od->nbr = nbr;
5868eb24
BP
441 hmap_init(&od->port_tnlids);
442 od->port_key_hint = 0;
443 hmap_insert(datapaths, &od->key_node, uuid_hash(&od->key));
444 return od;
445}
446
447static void
448ovn_datapath_destroy(struct hmap *datapaths, struct ovn_datapath *od)
449{
450 if (od) {
451 /* Don't remove od->list. It is used within build_datapaths() as a
452 * private list and once we've exited that function it is not safe to
453 * use it. */
454 hmap_remove(datapaths, &od->key_node);
455 destroy_tnlids(&od->port_tnlids);
161ea2c8
NS
456 if (od->ipam_info) {
457 bitmap_free(od->ipam_info->allocated_ipv4s);
458 free(od->ipam_info);
459 }
86e98048 460 free(od->router_ports);
5868eb24
BP
461 free(od);
462 }
463}
464
9a9961d2
BP
465/* Returns 'od''s datapath type. */
466static enum ovn_datapath_type
467ovn_datapath_get_type(const struct ovn_datapath *od)
468{
469 return od->nbs ? DP_SWITCH : DP_ROUTER;
470}
471
5868eb24
BP
472static struct ovn_datapath *
473ovn_datapath_find(struct hmap *datapaths, const struct uuid *uuid)
474{
475 struct ovn_datapath *od;
476
477 HMAP_FOR_EACH_WITH_HASH (od, key_node, uuid_hash(uuid), datapaths) {
478 if (uuid_equals(uuid, &od->key)) {
479 return od;
480 }
481 }
482 return NULL;
483}
484
485static struct ovn_datapath *
486ovn_datapath_from_sbrec(struct hmap *datapaths,
487 const struct sbrec_datapath_binding *sb)
488{
489 struct uuid key;
490
9975d7be
BP
491 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
492 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
5868eb24
BP
493 return NULL;
494 }
495 return ovn_datapath_find(datapaths, &key);
496}
497
5412db30
J
498static bool
499lrouter_is_enabled(const struct nbrec_logical_router *lrouter)
500{
501 return !lrouter->enabled || *lrouter->enabled;
502}
503
161ea2c8
NS
504static void
505init_ipam_info_for_datapath(struct ovn_datapath *od)
506{
507 if (!od->nbs) {
508 return;
509 }
510
511 const char *subnet_str = smap_get(&od->nbs->other_config, "subnet");
512 if (!subnet_str) {
513 return;
514 }
515
516 ovs_be32 subnet, mask;
517 char *error = ip_parse_masked(subnet_str, &subnet, &mask);
518 if (error || mask == OVS_BE32_MAX || !ip_is_cidr(mask)) {
519 static struct vlog_rate_limit rl
520 = VLOG_RATE_LIMIT_INIT(5, 1);
521 VLOG_WARN_RL(&rl, "bad 'subnet' %s", subnet_str);
522 free(error);
523 return;
524 }
525
526 od->ipam_info = xzalloc(sizeof *od->ipam_info);
527 od->ipam_info->start_ipv4 = ntohl(subnet) + 1;
528 od->ipam_info->total_ipv4s = ~ntohl(mask);
529 od->ipam_info->allocated_ipv4s =
530 bitmap_allocate(od->ipam_info->total_ipv4s);
531
532 /* Mark first IP as taken */
533 bitmap_set1(od->ipam_info->allocated_ipv4s, 0);
534
535 /* Check if there are any reserver IPs (list) to be excluded from IPAM */
536 const char *exclude_ip_list = smap_get(&od->nbs->other_config,
537 "exclude_ips");
538 if (!exclude_ip_list) {
539 return;
540 }
541
542 struct lexer lexer;
543 lexer_init(&lexer, exclude_ip_list);
544 /* exclude_ip_list could be in the format -
545 * "10.0.0.4 10.0.0.10 10.0.0.20..10.0.0.50 10.0.0.100..10.0.0.110".
546 */
547 lexer_get(&lexer);
548 while (lexer.token.type != LEX_T_END) {
549 if (lexer.token.type != LEX_T_INTEGER) {
550 lexer_syntax_error(&lexer, "expecting address");
551 break;
552 }
553 uint32_t start = ntohl(lexer.token.value.ipv4);
554 lexer_get(&lexer);
555
556 uint32_t end = start + 1;
557 if (lexer_match(&lexer, LEX_T_ELLIPSIS)) {
558 if (lexer.token.type != LEX_T_INTEGER) {
559 lexer_syntax_error(&lexer, "expecting address range");
560 break;
561 }
562 end = ntohl(lexer.token.value.ipv4) + 1;
563 lexer_get(&lexer);
564 }
565
566 /* Clamp start...end to fit the subnet. */
567 start = MAX(od->ipam_info->start_ipv4, start);
568 end = MIN(od->ipam_info->start_ipv4 + od->ipam_info->total_ipv4s, end);
569 if (end > start) {
570 bitmap_set_multiple(od->ipam_info->allocated_ipv4s,
571 start - od->ipam_info->start_ipv4,
572 end - start, 1);
573 } else {
574 lexer_error(&lexer, "excluded addresses not in subnet");
575 }
576 }
577 if (lexer.error) {
578 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
579 VLOG_WARN_RL(&rl, "logical switch "UUID_FMT": bad exclude_ips (%s)",
580 UUID_ARGS(&od->key), lexer.error);
581 }
582 lexer_destroy(&lexer);
583}
584
5868eb24
BP
585static void
586join_datapaths(struct northd_context *ctx, struct hmap *datapaths,
587 struct ovs_list *sb_only, struct ovs_list *nb_only,
588 struct ovs_list *both)
589{
590 hmap_init(datapaths);
417e7e66
BW
591 ovs_list_init(sb_only);
592 ovs_list_init(nb_only);
593 ovs_list_init(both);
5868eb24
BP
594
595 const struct sbrec_datapath_binding *sb, *sb_next;
596 SBREC_DATAPATH_BINDING_FOR_EACH_SAFE (sb, sb_next, ctx->ovnsb_idl) {
597 struct uuid key;
9975d7be
BP
598 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
599 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
600 ovsdb_idl_txn_add_comment(
601 ctx->ovnsb_txn,
602 "deleting Datapath_Binding "UUID_FMT" that lacks "
603 "external-ids:logical-switch and "
604 "external-ids:logical-router",
605 UUID_ARGS(&sb->header_.uuid));
5868eb24
BP
606 sbrec_datapath_binding_delete(sb);
607 continue;
608 }
609
610 if (ovn_datapath_find(datapaths, &key)) {
611 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
9975d7be
BP
612 VLOG_INFO_RL(
613 &rl, "deleting Datapath_Binding "UUID_FMT" with "
614 "duplicate external-ids:logical-switch/router "UUID_FMT,
615 UUID_ARGS(&sb->header_.uuid), UUID_ARGS(&key));
5868eb24
BP
616 sbrec_datapath_binding_delete(sb);
617 continue;
618 }
619
620 struct ovn_datapath *od = ovn_datapath_create(datapaths, &key,
9975d7be 621 NULL, NULL, sb);
417e7e66 622 ovs_list_push_back(sb_only, &od->list);
5868eb24
BP
623 }
624
9975d7be
BP
625 const struct nbrec_logical_switch *nbs;
626 NBREC_LOGICAL_SWITCH_FOR_EACH (nbs, ctx->ovnnb_idl) {
5868eb24 627 struct ovn_datapath *od = ovn_datapath_find(datapaths,
9975d7be 628 &nbs->header_.uuid);
5868eb24 629 if (od) {
9975d7be 630 od->nbs = nbs;
417e7e66
BW
631 ovs_list_remove(&od->list);
632 ovs_list_push_back(both, &od->list);
5868eb24 633 } else {
9975d7be
BP
634 od = ovn_datapath_create(datapaths, &nbs->header_.uuid,
635 nbs, NULL, NULL);
417e7e66 636 ovs_list_push_back(nb_only, &od->list);
5868eb24 637 }
161ea2c8
NS
638
639 init_ipam_info_for_datapath(od);
5868eb24 640 }
9975d7be
BP
641
642 const struct nbrec_logical_router *nbr;
643 NBREC_LOGICAL_ROUTER_FOR_EACH (nbr, ctx->ovnnb_idl) {
5412db30
J
644 if (!lrouter_is_enabled(nbr)) {
645 continue;
646 }
647
9975d7be
BP
648 struct ovn_datapath *od = ovn_datapath_find(datapaths,
649 &nbr->header_.uuid);
650 if (od) {
651 if (!od->nbs) {
652 od->nbr = nbr;
417e7e66
BW
653 ovs_list_remove(&od->list);
654 ovs_list_push_back(both, &od->list);
9975d7be
BP
655 } else {
656 /* Can't happen! */
657 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
658 VLOG_WARN_RL(&rl,
659 "duplicate UUID "UUID_FMT" in OVN_Northbound",
660 UUID_ARGS(&nbr->header_.uuid));
661 continue;
662 }
663 } else {
664 od = ovn_datapath_create(datapaths, &nbr->header_.uuid,
665 NULL, nbr, NULL);
417e7e66 666 ovs_list_push_back(nb_only, &od->list);
9975d7be 667 }
9975d7be 668 }
5868eb24
BP
669}
670
671static uint32_t
672ovn_datapath_allocate_key(struct hmap *dp_tnlids)
673{
674 static uint32_t hint;
675 return allocate_tnlid(dp_tnlids, "datapath", (1u << 24) - 1, &hint);
676}
677
0bac7164
BP
678/* Updates the southbound Datapath_Binding table so that it contains the
679 * logical switches and routers specified by the northbound database.
680 *
681 * Initializes 'datapaths' to contain a "struct ovn_datapath" for every logical
682 * switch and router. */
5868eb24
BP
683static void
684build_datapaths(struct northd_context *ctx, struct hmap *datapaths)
685{
686 struct ovs_list sb_only, nb_only, both;
687
688 join_datapaths(ctx, datapaths, &sb_only, &nb_only, &both);
689
417e7e66 690 if (!ovs_list_is_empty(&nb_only)) {
5868eb24
BP
691 /* First index the in-use datapath tunnel IDs. */
692 struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
693 struct ovn_datapath *od;
694 LIST_FOR_EACH (od, list, &both) {
695 add_tnlid(&dp_tnlids, od->sb->tunnel_key);
696 }
697
698 /* Add southbound record for each unmatched northbound record. */
699 LIST_FOR_EACH (od, list, &nb_only) {
700 uint16_t tunnel_key = ovn_datapath_allocate_key(&dp_tnlids);
701 if (!tunnel_key) {
702 break;
703 }
704
705 od->sb = sbrec_datapath_binding_insert(ctx->ovnsb_txn);
706
0f8e9c12
BP
707 /* Get the logical-switch or logical-router UUID to set in
708 * external-ids. */
5868eb24 709 char uuid_s[UUID_LEN + 1];
9975d7be
BP
710 sprintf(uuid_s, UUID_FMT, UUID_ARGS(&od->key));
711 const char *key = od->nbs ? "logical-switch" : "logical-router";
0f8e9c12
BP
712
713 /* Get name to set in external-ids. */
714 const char *name = od->nbs ? od->nbs->name : od->nbr->name;
715
716 /* Set external-ids. */
717 struct smap ids = SMAP_INITIALIZER(&ids);
718 smap_add(&ids, key, uuid_s);
719 if (*name) {
720 smap_add(&ids, "name", name);
721 }
722 sbrec_datapath_binding_set_external_ids(od->sb, &ids);
723 smap_destroy(&ids);
5868eb24
BP
724
725 sbrec_datapath_binding_set_tunnel_key(od->sb, tunnel_key);
726 }
727 destroy_tnlids(&dp_tnlids);
728 }
729
730 /* Delete southbound records without northbound matches. */
731 struct ovn_datapath *od, *next;
732 LIST_FOR_EACH_SAFE (od, next, list, &sb_only) {
417e7e66 733 ovs_list_remove(&od->list);
5868eb24
BP
734 sbrec_datapath_binding_delete(od->sb);
735 ovn_datapath_destroy(datapaths, od);
736 }
737}
738\f
739struct ovn_port {
740 struct hmap_node key_node; /* Index on 'key'. */
9975d7be
BP
741 char *key; /* nbs->name, nbr->name, sb->logical_port. */
742 char *json_key; /* 'key', quoted for use in JSON. */
5868eb24 743
9975d7be
BP
744 const struct sbrec_port_binding *sb; /* May be NULL. */
745
e93b43d6 746 /* Logical switch port data. */
0ee00741 747 const struct nbrec_logical_switch_port *nbsp; /* May be NULL. */
e93b43d6
JP
748
749 struct lport_addresses *lsp_addrs; /* Logical switch port addresses. */
750 unsigned int n_lsp_addrs;
751
752 struct lport_addresses *ps_addrs; /* Port security addresses. */
753 unsigned int n_ps_addrs;
754
9975d7be 755 /* Logical router port data. */
0ee00741 756 const struct nbrec_logical_router_port *nbrp; /* May be NULL. */
e93b43d6 757
4685e523 758 struct lport_addresses lrp_networks;
c9bdf7bd 759
41a15b71
MS
760 bool derived; /* Indicates whether this is an additional port
761 * derived from nbsp or nbrp. */
762
ad386c3f
BP
763 /* The port's peer:
764 *
765 * - A switch port S of type "router" has a router port R as a peer,
766 * and R in turn has S has its peer.
767 *
768 * - Two connected logical router ports have each other as peer. */
9975d7be 769 struct ovn_port *peer;
5868eb24
BP
770
771 struct ovn_datapath *od;
772
773 struct ovs_list list; /* In list of similar records. */
774};
775
776static struct ovn_port *
777ovn_port_create(struct hmap *ports, const char *key,
0ee00741
HK
778 const struct nbrec_logical_switch_port *nbsp,
779 const struct nbrec_logical_router_port *nbrp,
5868eb24
BP
780 const struct sbrec_port_binding *sb)
781{
782 struct ovn_port *op = xzalloc(sizeof *op);
9975d7be
BP
783
784 struct ds json_key = DS_EMPTY_INITIALIZER;
785 json_string_escape(key, &json_key);
786 op->json_key = ds_steal_cstr(&json_key);
787
788 op->key = xstrdup(key);
5868eb24 789 op->sb = sb;
0ee00741
HK
790 op->nbsp = nbsp;
791 op->nbrp = nbrp;
41a15b71 792 op->derived = false;
5868eb24
BP
793 hmap_insert(ports, &op->key_node, hash_string(op->key, 0));
794 return op;
795}
796
797static void
798ovn_port_destroy(struct hmap *ports, struct ovn_port *port)
799{
800 if (port) {
801 /* Don't remove port->list. It is used within build_ports() as a
802 * private list and once we've exited that function it is not safe to
803 * use it. */
804 hmap_remove(ports, &port->key_node);
e93b43d6
JP
805
806 for (int i = 0; i < port->n_lsp_addrs; i++) {
807 destroy_lport_addresses(&port->lsp_addrs[i]);
808 }
809 free(port->lsp_addrs);
810
811 for (int i = 0; i < port->n_ps_addrs; i++) {
812 destroy_lport_addresses(&port->ps_addrs[i]);
813 }
814 free(port->ps_addrs);
815
4685e523 816 destroy_lport_addresses(&port->lrp_networks);
9975d7be
BP
817 free(port->json_key);
818 free(port->key);
5868eb24
BP
819 free(port);
820 }
821}
822
823static struct ovn_port *
824ovn_port_find(struct hmap *ports, const char *name)
825{
826 struct ovn_port *op;
827
828 HMAP_FOR_EACH_WITH_HASH (op, key_node, hash_string(name, 0), ports) {
829 if (!strcmp(op->key, name)) {
830 return op;
831 }
832 }
833 return NULL;
834}
835
836static uint32_t
837ovn_port_allocate_key(struct ovn_datapath *od)
838{
839 return allocate_tnlid(&od->port_tnlids, "port",
840 (1u << 15) - 1, &od->port_key_hint);
841}
842
41a15b71
MS
843static char *
844chassis_redirect_name(const char *port_name)
845{
846 return xasprintf("cr-%s", port_name);
847}
848
8639f9be
ND
849static bool
850ipam_is_duplicate_mac(struct eth_addr *ea, uint64_t mac64, bool warn)
851{
852 struct macam_node *macam_node;
853 HMAP_FOR_EACH_WITH_HASH (macam_node, hmap_node, hash_uint64(mac64),
854 &macam) {
855 if (eth_addr_equals(*ea, macam_node->mac_addr)) {
856 if (warn) {
857 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
858 VLOG_WARN_RL(&rl, "Duplicate MAC set: "ETH_ADDR_FMT,
859 ETH_ADDR_ARGS(macam_node->mac_addr));
860 }
861 return true;
862 }
863 }
864 return false;
865}
866
8639f9be
ND
867static void
868ipam_insert_mac(struct eth_addr *ea, bool check)
869{
870 if (!ea) {
871 return;
872 }
873
874 uint64_t mac64 = eth_addr_to_uint64(*ea);
875 /* If the new MAC was not assigned by this address management system or
876 * check is true and the new MAC is a duplicate, do not insert it into the
877 * macam hmap. */
878 if (((mac64 ^ MAC_ADDR_PREFIX) >> 24)
879 || (check && ipam_is_duplicate_mac(ea, mac64, true))) {
880 return;
881 }
882
883 struct macam_node *new_macam_node = xmalloc(sizeof *new_macam_node);
884 new_macam_node->mac_addr = *ea;
885 hmap_insert(&macam, &new_macam_node->hmap_node, hash_uint64(mac64));
886}
887
888static void
161ea2c8 889ipam_insert_ip(struct ovn_datapath *od, uint32_t ip)
8639f9be 890{
161ea2c8 891 if (!od || !od->ipam_info || !od->ipam_info->allocated_ipv4s) {
8639f9be
ND
892 return;
893 }
894
161ea2c8
NS
895 if (ip >= od->ipam_info->start_ipv4 &&
896 ip < (od->ipam_info->start_ipv4 + od->ipam_info->total_ipv4s)) {
897 bitmap_set1(od->ipam_info->allocated_ipv4s,
898 ip - od->ipam_info->start_ipv4);
8639f9be 899 }
8639f9be
ND
900}
901
902static void
903ipam_insert_lsp_addresses(struct ovn_datapath *od, struct ovn_port *op,
904 char *address)
905{
906 if (!od || !op || !address || !strcmp(address, "unknown")
20418099 907 || !strcmp(address, "router") || is_dynamic_lsp_address(address)) {
8639f9be
ND
908 return;
909 }
910
911 struct lport_addresses laddrs;
912 if (!extract_lsp_addresses(address, &laddrs)) {
913 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
914 VLOG_WARN_RL(&rl, "Extract addresses failed.");
915 return;
916 }
917 ipam_insert_mac(&laddrs.ea, true);
918
919 /* IP is only added to IPAM if the switch's subnet option
920 * is set, whereas MAC is always added to MACAM. */
161ea2c8 921 if (!od->ipam_info || !od->ipam_info->allocated_ipv4s) {
8639f9be
ND
922 destroy_lport_addresses(&laddrs);
923 return;
924 }
925
926 for (size_t j = 0; j < laddrs.n_ipv4_addrs; j++) {
927 uint32_t ip = ntohl(laddrs.ipv4_addrs[j].addr);
161ea2c8 928 ipam_insert_ip(od, ip);
8639f9be
ND
929 }
930
931 destroy_lport_addresses(&laddrs);
932}
933
934static void
935ipam_add_port_addresses(struct ovn_datapath *od, struct ovn_port *op)
936{
937 if (!od || !op) {
938 return;
939 }
940
941 if (op->nbsp) {
942 /* Add all the port's addresses to address data structures. */
943 for (size_t i = 0; i < op->nbsp->n_addresses; i++) {
944 ipam_insert_lsp_addresses(od, op, op->nbsp->addresses[i]);
945 }
946 if (op->nbsp->dynamic_addresses) {
947 ipam_insert_lsp_addresses(od, op, op->nbsp->dynamic_addresses);
948 }
949 } else if (op->nbrp) {
950 struct lport_addresses lrp_networks;
951 if (!extract_lrp_networks(op->nbrp, &lrp_networks)) {
952 static struct vlog_rate_limit rl
953 = VLOG_RATE_LIMIT_INIT(1, 1);
954 VLOG_WARN_RL(&rl, "Extract addresses failed.");
955 return;
956 }
957 ipam_insert_mac(&lrp_networks.ea, true);
958
959 if (!op->peer || !op->peer->nbsp || !op->peer->od || !op->peer->od->nbs
960 || !smap_get(&op->peer->od->nbs->other_config, "subnet")) {
961 destroy_lport_addresses(&lrp_networks);
962 return;
963 }
964
965 for (size_t i = 0; i < lrp_networks.n_ipv4_addrs; i++) {
966 uint32_t ip = ntohl(lrp_networks.ipv4_addrs[i].addr);
161ea2c8 967 ipam_insert_ip(op->peer->od, ip);
8639f9be
ND
968 }
969
970 destroy_lport_addresses(&lrp_networks);
971 }
972}
973
974static uint64_t
975ipam_get_unused_mac(void)
976{
977 /* Stores the suffix of the most recently ipam-allocated MAC address. */
978 static uint32_t last_mac;
979
980 uint64_t mac64;
981 struct eth_addr mac;
982 uint32_t mac_addr_suffix, i;
983 for (i = 0; i < MAC_ADDR_SPACE - 1; i++) {
984 /* The tentative MAC's suffix will be in the interval (1, 0xfffffe). */
985 mac_addr_suffix = ((last_mac + i) % (MAC_ADDR_SPACE - 1)) + 1;
986 mac64 = MAC_ADDR_PREFIX | mac_addr_suffix;
987 eth_addr_from_uint64(mac64, &mac);
988 if (!ipam_is_duplicate_mac(&mac, mac64, false)) {
989 last_mac = mac_addr_suffix;
990 break;
991 }
992 }
993
994 if (i == MAC_ADDR_SPACE) {
995 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
996 VLOG_WARN_RL(&rl, "MAC address space exhausted.");
997 mac64 = 0;
998 }
999
1000 return mac64;
1001}
1002
1003static uint32_t
161ea2c8 1004ipam_get_unused_ip(struct ovn_datapath *od)
8639f9be 1005{
161ea2c8 1006 if (!od || !od->ipam_info || !od->ipam_info->allocated_ipv4s) {
8639f9be
ND
1007 return 0;
1008 }
1009
161ea2c8
NS
1010 size_t new_ip_index = bitmap_scan(od->ipam_info->allocated_ipv4s, 0, 0,
1011 od->ipam_info->total_ipv4s - 1);
1012 if (new_ip_index == od->ipam_info->total_ipv4s - 1) {
8639f9be
ND
1013 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1014 VLOG_WARN_RL( &rl, "Subnet address space has been exhausted.");
161ea2c8 1015 return 0;
8639f9be
ND
1016 }
1017
161ea2c8 1018 return od->ipam_info->start_ipv4 + new_ip_index;
8639f9be
ND
1019}
1020
1021static bool
1022ipam_allocate_addresses(struct ovn_datapath *od, struct ovn_port *op,
161ea2c8 1023 const char *addrspec)
8639f9be
ND
1024{
1025 if (!od || !op || !op->nbsp) {
1026 return false;
1027 }
1028
161ea2c8 1029 uint32_t ip = ipam_get_unused_ip(od);
8639f9be
ND
1030 if (!ip) {
1031 return false;
1032 }
1033
1034 struct eth_addr mac;
6374d518
LR
1035 bool check_mac;
1036 int n = 0;
1037
1038 if (ovs_scan(addrspec, ETH_ADDR_SCAN_FMT" dynamic%n",
1039 ETH_ADDR_SCAN_ARGS(mac), &n)
1040 && addrspec[n] == '\0') {
1041 check_mac = true;
1042 } else {
1043 uint64_t mac64 = ipam_get_unused_mac();
1044 if (!mac64) {
1045 return false;
1046 }
1047 eth_addr_from_uint64(mac64, &mac);
1048 check_mac = false;
8639f9be 1049 }
8639f9be 1050
161ea2c8 1051 /* Add MAC to MACAM and IP to IPAM bitmap if both addresses were allocated
8639f9be 1052 * successfully. */
161ea2c8 1053 ipam_insert_ip(od, ip);
6374d518 1054 ipam_insert_mac(&mac, check_mac);
8639f9be
ND
1055
1056 char *new_addr = xasprintf(ETH_ADDR_FMT" "IP_FMT,
1057 ETH_ADDR_ARGS(mac), IP_ARGS(htonl(ip)));
1058 nbrec_logical_switch_port_set_dynamic_addresses(op->nbsp, new_addr);
1059 free(new_addr);
1060
1061 return true;
1062}
1063
1064static void
b511690b 1065build_ipam(struct hmap *datapaths, struct hmap *ports)
8639f9be
ND
1066{
1067 /* IPAM generally stands for IP address management. In non-virtualized
1068 * world, MAC addresses come with the hardware. But, with virtualized
1069 * workloads, they need to be assigned and managed. This function
1070 * does both IP address management (ipam) and MAC address management
1071 * (macam). */
1072
8639f9be
ND
1073 /* If the switch's other_config:subnet is set, allocate new addresses for
1074 * ports that have the "dynamic" keyword in their addresses column. */
1075 struct ovn_datapath *od;
1076 HMAP_FOR_EACH (od, key_node, datapaths) {
161ea2c8
NS
1077 if (!od->nbs || !od->ipam_info || !od->ipam_info->allocated_ipv4s) {
1078 continue;
1079 }
1080
1081 struct ovn_port *op;
1082 for (size_t i = 0; i < od->nbs->n_ports; i++) {
1083 const struct nbrec_logical_switch_port *nbsp =
1084 od->nbs->ports[i];
1085
1086 if (!nbsp) {
8639f9be
ND
1087 continue;
1088 }
1089
161ea2c8
NS
1090 op = ovn_port_find(ports, nbsp->name);
1091 if (!op || (op->nbsp && op->peer)) {
1092 /* Do not allocate addresses for logical switch ports that
1093 * have a peer. */
8639f9be
ND
1094 continue;
1095 }
1096
161ea2c8
NS
1097 for (size_t j = 0; j < nbsp->n_addresses; j++) {
1098 if (is_dynamic_lsp_address(nbsp->addresses[j])
1099 && !nbsp->dynamic_addresses) {
1100 if (!ipam_allocate_addresses(od, op, nbsp->addresses[j])
1101 || !extract_lsp_addresses(nbsp->dynamic_addresses,
1102 &op->lsp_addrs[op->n_lsp_addrs])) {
1103 static struct vlog_rate_limit rl
1104 = VLOG_RATE_LIMIT_INIT(1, 1);
1105 VLOG_INFO_RL(&rl, "Failed to allocate address.");
1106 } else {
1107 op->n_lsp_addrs++;
8639f9be 1108 }
161ea2c8 1109 break;
8639f9be
ND
1110 }
1111 }
161ea2c8
NS
1112
1113 if (!nbsp->n_addresses && nbsp->dynamic_addresses) {
1114 nbrec_logical_switch_port_set_dynamic_addresses(op->nbsp,
1115 NULL);
1116 }
8639f9be
ND
1117 }
1118 }
1119}
1120\f
b511690b
GS
1121/* Tag allocation for nested containers.
1122 *
1123 * For a logical switch port with 'parent_name' and a request to allocate tags,
1124 * keeps a track of all allocated tags. */
1125struct tag_alloc_node {
1126 struct hmap_node hmap_node;
1127 char *parent_name;
1128 unsigned long *allocated_tags; /* A bitmap to track allocated tags. */
1129};
1130
1131static void
1132tag_alloc_destroy(struct hmap *tag_alloc_table)
1133{
1134 struct tag_alloc_node *node;
1135 HMAP_FOR_EACH_POP (node, hmap_node, tag_alloc_table) {
1136 bitmap_free(node->allocated_tags);
1137 free(node->parent_name);
1138 free(node);
1139 }
1140 hmap_destroy(tag_alloc_table);
1141}
1142
1143static struct tag_alloc_node *
1144tag_alloc_get_node(struct hmap *tag_alloc_table, const char *parent_name)
1145{
1146 /* If a node for the 'parent_name' exists, return it. */
1147 struct tag_alloc_node *tag_alloc_node;
1148 HMAP_FOR_EACH_WITH_HASH (tag_alloc_node, hmap_node,
1149 hash_string(parent_name, 0),
1150 tag_alloc_table) {
1151 if (!strcmp(tag_alloc_node->parent_name, parent_name)) {
1152 return tag_alloc_node;
1153 }
1154 }
1155
1156 /* Create a new node. */
1157 tag_alloc_node = xmalloc(sizeof *tag_alloc_node);
1158 tag_alloc_node->parent_name = xstrdup(parent_name);
1159 tag_alloc_node->allocated_tags = bitmap_allocate(MAX_OVN_TAGS);
1160 /* Tag 0 is invalid for nested containers. */
1161 bitmap_set1(tag_alloc_node->allocated_tags, 0);
1162 hmap_insert(tag_alloc_table, &tag_alloc_node->hmap_node,
1163 hash_string(parent_name, 0));
1164
1165 return tag_alloc_node;
1166}
1167
1168static void
1169tag_alloc_add_existing_tags(struct hmap *tag_alloc_table,
1170 const struct nbrec_logical_switch_port *nbsp)
1171{
1172 /* Add the tags of already existing nested containers. If there is no
1173 * 'nbsp->parent_name' or no 'nbsp->tag' set, there is nothing to do. */
1174 if (!nbsp->parent_name || !nbsp->parent_name[0] || !nbsp->tag) {
1175 return;
1176 }
1177
1178 struct tag_alloc_node *tag_alloc_node;
1179 tag_alloc_node = tag_alloc_get_node(tag_alloc_table, nbsp->parent_name);
1180 bitmap_set1(tag_alloc_node->allocated_tags, *nbsp->tag);
1181}
1182
1183static void
1184tag_alloc_create_new_tag(struct hmap *tag_alloc_table,
1185 const struct nbrec_logical_switch_port *nbsp)
1186{
1187 if (!nbsp->tag_request) {
1188 return;
1189 }
1190
1191 if (nbsp->parent_name && nbsp->parent_name[0]
1192 && *nbsp->tag_request == 0) {
1193 /* For nested containers that need allocation, do the allocation. */
1194
1195 if (nbsp->tag) {
1196 /* This has already been allocated. */
1197 return;
1198 }
1199
1200 struct tag_alloc_node *tag_alloc_node;
1201 int64_t tag;
1202 tag_alloc_node = tag_alloc_get_node(tag_alloc_table,
1203 nbsp->parent_name);
1204 tag = bitmap_scan(tag_alloc_node->allocated_tags, 0, 1, MAX_OVN_TAGS);
1205 if (tag == MAX_OVN_TAGS) {
1206 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1207 VLOG_ERR_RL(&rl, "out of vlans for logical switch ports with "
1208 "parent %s", nbsp->parent_name);
1209 return;
1210 }
1211 bitmap_set1(tag_alloc_node->allocated_tags, tag);
1212 nbrec_logical_switch_port_set_tag(nbsp, &tag, 1);
1213 } else if (*nbsp->tag_request != 0) {
1214 /* For everything else, copy the contents of 'tag_request' to 'tag'. */
1215 nbrec_logical_switch_port_set_tag(nbsp, nbsp->tag_request, 1);
1216 }
1217}
1218\f
8639f9be 1219
6c4f7a8a
NS
1220/*
1221 * This function checks if the MAC in "address" parameter (if present) is
1222 * different from the one stored in Logical_Switch_Port.dynamic_addresses
1223 * and updates it.
1224 */
1225static void
1226check_and_update_mac_in_dynamic_addresses(
1227 const char *address,
1228 const struct nbrec_logical_switch_port *nbsp)
1229{
1230 if (!nbsp->dynamic_addresses) {
1231 return;
1232 }
1233 int buf_index = 0;
1234 struct eth_addr ea;
1235 if (!ovs_scan_len(address, &buf_index,
1236 ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(ea))) {
1237 return;
1238 }
1239
1240 struct eth_addr present_ea;
1241 buf_index = 0;
1242 if (ovs_scan_len(nbsp->dynamic_addresses, &buf_index,
1243 ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(present_ea))
1244 && !eth_addr_equals(ea, present_ea)) {
1245 /* MAC address has changed. Update it */
1246 char *new_addr = xasprintf(
1247 ETH_ADDR_FMT"%s", ETH_ADDR_ARGS(ea),
1248 &nbsp->dynamic_addresses[buf_index]);
1249 nbrec_logical_switch_port_set_dynamic_addresses(
1250 nbsp, new_addr);
1251 free(new_addr);
1252 }
1253}
1254
5868eb24
BP
1255static void
1256join_logical_ports(struct northd_context *ctx,
1257 struct hmap *datapaths, struct hmap *ports,
a6095f81 1258 struct hmap *chassis_qdisc_queues,
b511690b
GS
1259 struct hmap *tag_alloc_table, struct ovs_list *sb_only,
1260 struct ovs_list *nb_only, struct ovs_list *both)
5868eb24
BP
1261{
1262 hmap_init(ports);
417e7e66
BW
1263 ovs_list_init(sb_only);
1264 ovs_list_init(nb_only);
1265 ovs_list_init(both);
5868eb24
BP
1266
1267 const struct sbrec_port_binding *sb;
1268 SBREC_PORT_BINDING_FOR_EACH (sb, ctx->ovnsb_idl) {
1269 struct ovn_port *op = ovn_port_create(ports, sb->logical_port,
9975d7be 1270 NULL, NULL, sb);
417e7e66 1271 ovs_list_push_back(sb_only, &op->list);
5868eb24
BP
1272 }
1273
1274 struct ovn_datapath *od;
1275 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
1276 if (od->nbs) {
1277 for (size_t i = 0; i < od->nbs->n_ports; i++) {
0ee00741
HK
1278 const struct nbrec_logical_switch_port *nbsp
1279 = od->nbs->ports[i];
1280 struct ovn_port *op = ovn_port_find(ports, nbsp->name);
9975d7be 1281 if (op) {
0ee00741 1282 if (op->nbsp || op->nbrp) {
9975d7be
BP
1283 static struct vlog_rate_limit rl
1284 = VLOG_RATE_LIMIT_INIT(5, 1);
1285 VLOG_WARN_RL(&rl, "duplicate logical port %s",
0ee00741 1286 nbsp->name);
9975d7be
BP
1287 continue;
1288 }
0ee00741 1289 op->nbsp = nbsp;
417e7e66 1290 ovs_list_remove(&op->list);
a6095f81
BS
1291
1292 uint32_t queue_id = smap_get_int(&op->sb->options,
1293 "qdisc_queue_id", 0);
1294 if (queue_id && op->sb->chassis) {
1295 add_chassis_queue(
1296 chassis_qdisc_queues, &op->sb->chassis->header_.uuid,
1297 queue_id);
1298 }
1299
417e7e66 1300 ovs_list_push_back(both, &op->list);
e93b43d6
JP
1301
1302 /* This port exists due to a SB binding, but should
1303 * not have been initialized fully. */
1304 ovs_assert(!op->n_lsp_addrs && !op->n_ps_addrs);
9975d7be 1305 } else {
0ee00741 1306 op = ovn_port_create(ports, nbsp->name, nbsp, NULL, NULL);
417e7e66 1307 ovs_list_push_back(nb_only, &op->list);
9975d7be
BP
1308 }
1309
e93b43d6 1310 op->lsp_addrs
0ee00741
HK
1311 = xmalloc(sizeof *op->lsp_addrs * nbsp->n_addresses);
1312 for (size_t j = 0; j < nbsp->n_addresses; j++) {
20418099
MS
1313 if (!strcmp(nbsp->addresses[j], "unknown")
1314 || !strcmp(nbsp->addresses[j], "router")) {
e93b43d6
JP
1315 continue;
1316 }
6374d518 1317 if (is_dynamic_lsp_address(nbsp->addresses[j])) {
8639f9be 1318 if (nbsp->dynamic_addresses) {
6c4f7a8a
NS
1319 check_and_update_mac_in_dynamic_addresses(
1320 nbsp->addresses[j], nbsp);
8639f9be
ND
1321 if (!extract_lsp_addresses(nbsp->dynamic_addresses,
1322 &op->lsp_addrs[op->n_lsp_addrs])) {
1323 static struct vlog_rate_limit rl
1324 = VLOG_RATE_LIMIT_INIT(1, 1);
1325 VLOG_INFO_RL(&rl, "invalid syntax '%s' in "
1326 "logical switch port "
1327 "dynamic_addresses. No "
1328 "MAC address found",
1329 op->nbsp->dynamic_addresses);
1330 continue;
1331 }
1332 } else {
1333 continue;
1334 }
1335 } else if (!extract_lsp_addresses(nbsp->addresses[j],
e93b43d6
JP
1336 &op->lsp_addrs[op->n_lsp_addrs])) {
1337 static struct vlog_rate_limit rl
1338 = VLOG_RATE_LIMIT_INIT(1, 1);
1339 VLOG_INFO_RL(&rl, "invalid syntax '%s' in logical "
1340 "switch port addresses. No MAC "
1341 "address found",
0ee00741 1342 op->nbsp->addresses[j]);
e93b43d6
JP
1343 continue;
1344 }
1345 op->n_lsp_addrs++;
1346 }
1347
1348 op->ps_addrs
0ee00741
HK
1349 = xmalloc(sizeof *op->ps_addrs * nbsp->n_port_security);
1350 for (size_t j = 0; j < nbsp->n_port_security; j++) {
1351 if (!extract_lsp_addresses(nbsp->port_security[j],
e93b43d6
JP
1352 &op->ps_addrs[op->n_ps_addrs])) {
1353 static struct vlog_rate_limit rl
1354 = VLOG_RATE_LIMIT_INIT(1, 1);
1355 VLOG_INFO_RL(&rl, "invalid syntax '%s' in port "
1356 "security. No MAC address found",
0ee00741 1357 op->nbsp->port_security[j]);
e93b43d6
JP
1358 continue;
1359 }
1360 op->n_ps_addrs++;
1361 }
1362
9975d7be 1363 op->od = od;
8639f9be 1364 ipam_add_port_addresses(od, op);
b511690b 1365 tag_alloc_add_existing_tags(tag_alloc_table, nbsp);
9975d7be
BP
1366 }
1367 } else {
1368 for (size_t i = 0; i < od->nbr->n_ports; i++) {
0ee00741
HK
1369 const struct nbrec_logical_router_port *nbrp
1370 = od->nbr->ports[i];
9975d7be 1371
4685e523 1372 struct lport_addresses lrp_networks;
0ee00741 1373 if (!extract_lrp_networks(nbrp, &lrp_networks)) {
9975d7be
BP
1374 static struct vlog_rate_limit rl
1375 = VLOG_RATE_LIMIT_INIT(5, 1);
0ee00741 1376 VLOG_WARN_RL(&rl, "bad 'mac' %s", nbrp->mac);
9975d7be
BP
1377 continue;
1378 }
1379
4685e523 1380 if (!lrp_networks.n_ipv4_addrs && !lrp_networks.n_ipv6_addrs) {
9975d7be
BP
1381 continue;
1382 }
1383
0ee00741 1384 struct ovn_port *op = ovn_port_find(ports, nbrp->name);
9975d7be 1385 if (op) {
0ee00741 1386 if (op->nbsp || op->nbrp) {
9975d7be
BP
1387 static struct vlog_rate_limit rl
1388 = VLOG_RATE_LIMIT_INIT(5, 1);
1389 VLOG_WARN_RL(&rl, "duplicate logical router port %s",
0ee00741 1390 nbrp->name);
9975d7be
BP
1391 continue;
1392 }
0ee00741 1393 op->nbrp = nbrp;
417e7e66
BW
1394 ovs_list_remove(&op->list);
1395 ovs_list_push_back(both, &op->list);
4685e523
JP
1396
1397 /* This port exists but should not have been
1398 * initialized fully. */
1399 ovs_assert(!op->lrp_networks.n_ipv4_addrs
1400 && !op->lrp_networks.n_ipv6_addrs);
9975d7be 1401 } else {
0ee00741 1402 op = ovn_port_create(ports, nbrp->name, NULL, nbrp, NULL);
417e7e66 1403 ovs_list_push_back(nb_only, &op->list);
9975d7be
BP
1404 }
1405
4685e523 1406 op->lrp_networks = lrp_networks;
9975d7be 1407 op->od = od;
8639f9be 1408 ipam_add_port_addresses(op->od, op);
41a15b71
MS
1409
1410 const char *redirect_chassis = smap_get(&op->nbrp->options,
1411 "redirect-chassis");
1412 if (redirect_chassis) {
1413 /* Additional "derived" ovn_port crp represents the
1414 * instance of op on the "redirect-chassis". */
1415 const char *gw_chassis = smap_get(&op->od->nbr->options,
1416 "chassis");
1417 if (gw_chassis) {
1418 static struct vlog_rate_limit rl
1419 = VLOG_RATE_LIMIT_INIT(1, 1);
1420 VLOG_WARN_RL(&rl, "Bad configuration: "
1421 "redirect-chassis configured on port %s "
1422 "on L3 gateway router", nbrp->name);
1423 continue;
1424 }
26b9e08d
MS
1425 if (od->l3dgw_port || od->l3redirect_port) {
1426 static struct vlog_rate_limit rl
1427 = VLOG_RATE_LIMIT_INIT(1, 1);
1428 VLOG_WARN_RL(&rl, "Bad configuration: multiple ports "
1429 "with redirect-chassis on same logical "
1430 "router %s", od->nbr->name);
1431 continue;
1432 }
1433
41a15b71
MS
1434 char *redirect_name = chassis_redirect_name(nbrp->name);
1435 struct ovn_port *crp = ovn_port_find(ports, redirect_name);
1436 if (crp) {
1437 crp->derived = true;
1438 crp->nbrp = nbrp;
1439 ovs_list_remove(&crp->list);
1440 ovs_list_push_back(both, &crp->list);
1441 } else {
1442 crp = ovn_port_create(ports, redirect_name,
1443 NULL, nbrp, NULL);
1444 crp->derived = true;
1445 ovs_list_push_back(nb_only, &crp->list);
1446 }
1447 crp->od = od;
1448 free(redirect_name);
1449
1450 /* Set l3dgw_port and l3redirect_port in od, for later
1451 * use during flow creation. */
26b9e08d
MS
1452 od->l3dgw_port = op;
1453 od->l3redirect_port = crp;
41a15b71 1454 }
5868eb24 1455 }
9975d7be
BP
1456 }
1457 }
1458
1459 /* Connect logical router ports, and logical switch ports of type "router",
1460 * to their peers. */
1461 struct ovn_port *op;
1462 HMAP_FOR_EACH (op, key_node, ports) {
41a15b71 1463 if (op->nbsp && !strcmp(op->nbsp->type, "router") && !op->derived) {
0ee00741 1464 const char *peer_name = smap_get(&op->nbsp->options, "router-port");
9975d7be
BP
1465 if (!peer_name) {
1466 continue;
1467 }
1468
1469 struct ovn_port *peer = ovn_port_find(ports, peer_name);
0ee00741 1470 if (!peer || !peer->nbrp) {
9975d7be
BP
1471 continue;
1472 }
1473
1474 peer->peer = op;
1475 op->peer = peer;
86e98048
BP
1476 op->od->router_ports = xrealloc(
1477 op->od->router_ports,
1478 sizeof *op->od->router_ports * (op->od->n_router_ports + 1));
1479 op->od->router_ports[op->od->n_router_ports++] = op;
20418099
MS
1480
1481 /* Fill op->lsp_addrs for op->nbsp->addresses[] with
1482 * contents "router", which was skipped in the loop above. */
1483 for (size_t j = 0; j < op->nbsp->n_addresses; j++) {
1484 if (!strcmp(op->nbsp->addresses[j], "router")) {
1485 if (extract_lrp_networks(peer->nbrp,
1486 &op->lsp_addrs[op->n_lsp_addrs])) {
1487 op->n_lsp_addrs++;
1488 }
1489 break;
1490 }
1491 }
41a15b71 1492 } else if (op->nbrp && op->nbrp->peer && !op->derived) {
ad386c3f
BP
1493 struct ovn_port *peer = ovn_port_find(ports, op->nbrp->peer);
1494 if (peer) {
1495 if (peer->nbrp) {
1496 op->peer = peer;
60fa6dbb 1497 } else if (peer->nbsp) {
ad386c3f
BP
1498 /* An ovn_port for a switch port of type "router" does have
1499 * a router port as its peer (see the case above for
1500 * "router" ports), but this is set via options:router-port
1501 * in Logical_Switch_Port and does not involve the
1502 * Logical_Router_Port's 'peer' column. */
1503 static struct vlog_rate_limit rl =
1504 VLOG_RATE_LIMIT_INIT(5, 1);
1505 VLOG_WARN_RL(&rl, "Bad configuration: The peer of router "
1506 "port %s is a switch port", op->key);
1507 }
1508 }
5868eb24
BP
1509 }
1510 }
1511}
1512
e914fb54
MS
1513static void
1514ip_address_and_port_from_lb_key(const char *key, char **ip_address,
1515 uint16_t *port);
1516
1517static void
1518get_router_load_balancer_ips(const struct ovn_datapath *od,
1519 struct sset *all_ips)
1520{
1521 if (!od->nbr) {
1522 return;
1523 }
1524
1525 for (int i = 0; i < od->nbr->n_load_balancer; i++) {
1526 struct nbrec_load_balancer *lb = od->nbr->load_balancer[i];
1527 struct smap *vips = &lb->vips;
1528 struct smap_node *node;
1529
1530 SMAP_FOR_EACH (node, vips) {
1531 /* node->key contains IP:port or just IP. */
1532 char *ip_address = NULL;
1533 uint16_t port;
1534
1535 ip_address_and_port_from_lb_key(node->key, &ip_address, &port);
1536 if (!ip_address) {
1537 continue;
1538 }
1539
1540 if (!sset_contains(all_ips, ip_address)) {
1541 sset_add(all_ips, ip_address);
1542 }
1543
1544 free(ip_address);
1545 }
1546 }
1547}
1548
f40c5588
MS
1549/* Returns an array of strings, each consisting of a MAC address followed
1550 * by one or more IP addresses, and if the port is a distributed gateway
1551 * port, followed by 'is_chassis_resident("LPORT_NAME")', where the
1552 * LPORT_NAME is the name of the L3 redirect port or the name of the
1553 * logical_port specified in a NAT rule. These strings include the
1554 * external IP addresses of all NAT rules defined on that router, and all
1555 * of the IP addresses used in load balancer VIPs defined on that router.
e914fb54 1556 *
f40c5588
MS
1557 * The caller must free each of the n returned strings with free(),
1558 * and must free the returned array when it is no longer needed. */
1559static char **
1560get_nat_addresses(const struct ovn_port *op, size_t *n)
e914fb54 1561{
f40c5588 1562 size_t n_nats = 0;
e914fb54
MS
1563 struct eth_addr mac;
1564 if (!op->nbrp || !op->od || !op->od->nbr
1565 || (!op->od->nbr->n_nat && !op->od->nbr->n_load_balancer)
1566 || !eth_addr_from_string(op->nbrp->mac, &mac)) {
f40c5588 1567 *n = n_nats;
e914fb54
MS
1568 return NULL;
1569 }
1570
f40c5588
MS
1571 struct ds c_addresses = DS_EMPTY_INITIALIZER;
1572 ds_put_format(&c_addresses, ETH_ADDR_FMT, ETH_ADDR_ARGS(mac));
1573 bool central_ip_address = false;
1574
1575 char **addresses;
1576 addresses = xmalloc(sizeof *addresses * (op->od->nbr->n_nat + 1));
e914fb54
MS
1577
1578 /* Get NAT IP addresses. */
f40c5588 1579 for (size_t i = 0; i < op->od->nbr->n_nat; i++) {
e914fb54
MS
1580 const struct nbrec_nat *nat = op->od->nbr->nat[i];
1581 ovs_be32 ip, mask;
1582
1583 char *error = ip_parse_masked(nat->external_ip, &ip, &mask);
1584 if (error || mask != OVS_BE32_MAX) {
1585 free(error);
1586 continue;
1587 }
26b9e08d
MS
1588
1589 /* Determine whether this NAT rule satisfies the conditions for
1590 * distributed NAT processing. */
1591 if (op->od->l3redirect_port && !strcmp(nat->type, "dnat_and_snat")
1592 && nat->logical_port && nat->external_mac) {
1593 /* Distributed NAT rule. */
f40c5588
MS
1594 if (eth_addr_from_string(nat->external_mac, &mac)) {
1595 struct ds address = DS_EMPTY_INITIALIZER;
1596 ds_put_format(&address, ETH_ADDR_FMT, ETH_ADDR_ARGS(mac));
1597 ds_put_format(&address, " %s", nat->external_ip);
1598 ds_put_format(&address, " is_chassis_resident(\"%s\")",
1599 nat->logical_port);
1600 addresses[n_nats++] = ds_steal_cstr(&address);
1601 }
26b9e08d
MS
1602 } else {
1603 /* Centralized NAT rule, either on gateway router or distributed
1604 * router. */
f40c5588
MS
1605 ds_put_format(&c_addresses, " %s", nat->external_ip);
1606 central_ip_address = true;
26b9e08d 1607 }
e914fb54
MS
1608 }
1609
1610 /* A set to hold all load-balancer vips. */
1611 struct sset all_ips = SSET_INITIALIZER(&all_ips);
1612 get_router_load_balancer_ips(op->od, &all_ips);
1613
1614 const char *ip_address;
1615 SSET_FOR_EACH (ip_address, &all_ips) {
f40c5588
MS
1616 ds_put_format(&c_addresses, " %s", ip_address);
1617 central_ip_address = true;
e914fb54
MS
1618 }
1619 sset_destroy(&all_ips);
1620
f40c5588
MS
1621 if (central_ip_address) {
1622 /* Gratuitous ARP for centralized NAT rules on distributed gateway
1623 * ports should be restricted to the "redirect-chassis". */
1624 if (op->od->l3redirect_port) {
1625 ds_put_format(&c_addresses, " is_chassis_resident(%s)",
1626 op->od->l3redirect_port->json_key);
1627 }
1628
1629 addresses[n_nats++] = ds_steal_cstr(&c_addresses);
26b9e08d
MS
1630 }
1631
f40c5588
MS
1632 *n = n_nats;
1633
1634 return addresses;
e914fb54
MS
1635}
1636
5868eb24 1637static void
a6095f81
BS
1638ovn_port_update_sbrec(const struct ovn_port *op,
1639 struct hmap *chassis_qdisc_queues)
5868eb24
BP
1640{
1641 sbrec_port_binding_set_datapath(op->sb, op->od->sb);
0ee00741 1642 if (op->nbrp) {
c1645003 1643 /* If the router is for l3 gateway, it resides on a chassis
17bac0ff 1644 * and its port type is "l3gateway". */
c1645003 1645 const char *chassis = smap_get(&op->od->nbr->options, "chassis");
41a15b71
MS
1646 if (op->derived) {
1647 sbrec_port_binding_set_type(op->sb, "chassisredirect");
1648 } else if (chassis) {
17bac0ff 1649 sbrec_port_binding_set_type(op->sb, "l3gateway");
c1645003
GS
1650 } else {
1651 sbrec_port_binding_set_type(op->sb, "patch");
1652 }
9975d7be 1653
c1645003
GS
1654 struct smap new;
1655 smap_init(&new);
41a15b71
MS
1656 if (op->derived) {
1657 const char *redirect_chassis = smap_get(&op->nbrp->options,
1658 "redirect-chassis");
1659 if (redirect_chassis) {
1660 smap_add(&new, "redirect-chassis", redirect_chassis);
1661 }
1662 smap_add(&new, "distributed-port", op->nbrp->name);
1663 } else {
1664 const char *peer = op->peer ? op->peer->key : "<error>";
1665 smap_add(&new, "peer", peer);
1666 if (chassis) {
1667 smap_add(&new, "l3gateway-chassis", chassis);
1668 }
c1645003
GS
1669 }
1670 sbrec_port_binding_set_options(op->sb, &new);
1671 smap_destroy(&new);
9975d7be
BP
1672
1673 sbrec_port_binding_set_parent_port(op->sb, NULL);
1674 sbrec_port_binding_set_tag(op->sb, NULL, 0);
1675 sbrec_port_binding_set_mac(op->sb, NULL, 0);
1676 } else {
0ee00741 1677 if (strcmp(op->nbsp->type, "router")) {
a6095f81
BS
1678 uint32_t queue_id = smap_get_int(
1679 &op->sb->options, "qdisc_queue_id", 0);
1680 bool has_qos = port_has_qos_params(&op->nbsp->options);
1681 struct smap options;
1682
1683 if (op->sb->chassis && has_qos && !queue_id) {
1684 queue_id = allocate_chassis_queueid(chassis_qdisc_queues,
1685 op->sb->chassis);
1686 } else if (!has_qos && queue_id) {
1687 free_chassis_queueid(chassis_qdisc_queues,
1688 op->sb->chassis,
1689 queue_id);
1690 queue_id = 0;
1691 }
1692
1693 smap_clone(&options, &op->nbsp->options);
1694 if (queue_id) {
1695 smap_add_format(&options,
1696 "qdisc_queue_id", "%d", queue_id);
1697 }
1698 sbrec_port_binding_set_options(op->sb, &options);
1699 smap_destroy(&options);
0ee00741 1700 sbrec_port_binding_set_type(op->sb, op->nbsp->type);
9975d7be 1701 } else {
c1645003
GS
1702 const char *chassis = NULL;
1703 if (op->peer && op->peer->od && op->peer->od->nbr) {
1704 chassis = smap_get(&op->peer->od->nbr->options, "chassis");
1705 }
1706
1707 /* A switch port connected to a gateway router is also of
17bac0ff 1708 * type "l3gateway". */
c1645003 1709 if (chassis) {
17bac0ff 1710 sbrec_port_binding_set_type(op->sb, "l3gateway");
c1645003
GS
1711 } else {
1712 sbrec_port_binding_set_type(op->sb, "patch");
1713 }
9975d7be 1714
f99f67bd
BP
1715 const char *router_port = smap_get_def(&op->nbsp->options,
1716 "router-port", "<error>");
c1645003
GS
1717 struct smap new;
1718 smap_init(&new);
1719 smap_add(&new, "peer", router_port);
1720 if (chassis) {
17bac0ff 1721 smap_add(&new, "l3gateway-chassis", chassis);
c1645003 1722 }
f40c5588
MS
1723 sbrec_port_binding_set_options(op->sb, &new);
1724 smap_destroy(&new);
8439c2eb
CSV
1725
1726 const char *nat_addresses = smap_get(&op->nbsp->options,
1727 "nat-addresses");
e914fb54 1728 if (nat_addresses && !strcmp(nat_addresses, "router")) {
26b9e08d
MS
1729 if (op->peer && op->peer->od
1730 && (chassis || op->peer->od->l3redirect_port)) {
f40c5588
MS
1731 size_t n_nats;
1732 char **nats = get_nat_addresses(op->peer, &n_nats);
1733 if (n_nats) {
1734 sbrec_port_binding_set_nat_addresses(op->sb,
1735 (const char **) nats, n_nats);
1736 for (size_t i = 0; i < n_nats; i++) {
1737 free(nats[i]);
1738 }
e914fb54 1739 free(nats);
f40c5588
MS
1740 } else {
1741 sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0);
e914fb54 1742 }
f40c5588
MS
1743 } else {
1744 sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0);
e914fb54 1745 }
26b9e08d
MS
1746 /* Only accept manual specification of ethernet address
1747 * followed by IPv4 addresses on type "l3gateway" ports. */
1748 } else if (nat_addresses && chassis) {
8439c2eb
CSV
1749 struct lport_addresses laddrs;
1750 if (!extract_lsp_addresses(nat_addresses, &laddrs)) {
1751 static struct vlog_rate_limit rl =
1752 VLOG_RATE_LIMIT_INIT(1, 1);
1753 VLOG_WARN_RL(&rl, "Error extracting nat-addresses.");
f40c5588 1754 sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0);
8439c2eb 1755 } else {
f40c5588
MS
1756 sbrec_port_binding_set_nat_addresses(op->sb,
1757 &nat_addresses, 1);
8439c2eb
CSV
1758 destroy_lport_addresses(&laddrs);
1759 }
f40c5588
MS
1760 } else {
1761 sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0);
8439c2eb 1762 }
9975d7be 1763 }
0ee00741
HK
1764 sbrec_port_binding_set_parent_port(op->sb, op->nbsp->parent_name);
1765 sbrec_port_binding_set_tag(op->sb, op->nbsp->tag, op->nbsp->n_tag);
1766 sbrec_port_binding_set_mac(op->sb, (const char **) op->nbsp->addresses,
1767 op->nbsp->n_addresses);
9975d7be 1768 }
5868eb24
BP
1769}
1770
6e31816f
CSV
1771/* Remove mac_binding entries that refer to logical_ports which are
1772 * deleted. */
1773static void
1774cleanup_mac_bindings(struct northd_context *ctx, struct hmap *ports)
1775{
1776 const struct sbrec_mac_binding *b, *n;
1777 SBREC_MAC_BINDING_FOR_EACH_SAFE (b, n, ctx->ovnsb_idl) {
1778 if (!ovn_port_find(ports, b->logical_port)) {
1779 sbrec_mac_binding_delete(b);
1780 }
1781 }
1782}
1783
0bac7164 1784/* Updates the southbound Port_Binding table so that it contains the logical
80f408f4 1785 * switch ports specified by the northbound database.
0bac7164
BP
1786 *
1787 * Initializes 'ports' to contain a "struct ovn_port" for every logical port,
1788 * using the "struct ovn_datapath"s in 'datapaths' to look up logical
1789 * datapaths. */
5868eb24
BP
1790static void
1791build_ports(struct northd_context *ctx, struct hmap *datapaths,
1792 struct hmap *ports)
1793{
1794 struct ovs_list sb_only, nb_only, both;
a6095f81
BS
1795 struct hmap tag_alloc_table = HMAP_INITIALIZER(&tag_alloc_table);
1796 struct hmap chassis_qdisc_queues = HMAP_INITIALIZER(&chassis_qdisc_queues);
5868eb24 1797
a6095f81
BS
1798 join_logical_ports(ctx, datapaths, ports, &chassis_qdisc_queues,
1799 &tag_alloc_table, &sb_only, &nb_only, &both);
5868eb24 1800
5868eb24 1801 struct ovn_port *op, *next;
b511690b
GS
1802 /* For logical ports that are in both databases, update the southbound
1803 * record based on northbound data. Also index the in-use tunnel_keys.
1804 * For logical ports that are in NB database, do any tag allocation
1805 * needed. */
5868eb24 1806 LIST_FOR_EACH_SAFE (op, next, list, &both) {
b511690b
GS
1807 if (op->nbsp) {
1808 tag_alloc_create_new_tag(&tag_alloc_table, op->nbsp);
1809 }
a6095f81 1810 ovn_port_update_sbrec(op, &chassis_qdisc_queues);
5868eb24
BP
1811
1812 add_tnlid(&op->od->port_tnlids, op->sb->tunnel_key);
1813 if (op->sb->tunnel_key > op->od->port_key_hint) {
1814 op->od->port_key_hint = op->sb->tunnel_key;
1815 }
1816 }
1817
1818 /* Add southbound record for each unmatched northbound record. */
1819 LIST_FOR_EACH_SAFE (op, next, list, &nb_only) {
1820 uint16_t tunnel_key = ovn_port_allocate_key(op->od);
1821 if (!tunnel_key) {
1822 continue;
1823 }
1824
1825 op->sb = sbrec_port_binding_insert(ctx->ovnsb_txn);
a6095f81 1826 ovn_port_update_sbrec(op, &chassis_qdisc_queues);
5868eb24
BP
1827
1828 sbrec_port_binding_set_logical_port(op->sb, op->key);
1829 sbrec_port_binding_set_tunnel_key(op->sb, tunnel_key);
1830 }
1831
6e31816f
CSV
1832 bool remove_mac_bindings = false;
1833 if (!ovs_list_is_empty(&sb_only)) {
1834 remove_mac_bindings = true;
1835 }
1836
5868eb24
BP
1837 /* Delete southbound records without northbound matches. */
1838 LIST_FOR_EACH_SAFE(op, next, list, &sb_only) {
417e7e66 1839 ovs_list_remove(&op->list);
5868eb24
BP
1840 sbrec_port_binding_delete(op->sb);
1841 ovn_port_destroy(ports, op);
1842 }
6e31816f
CSV
1843 if (remove_mac_bindings) {
1844 cleanup_mac_bindings(ctx, ports);
1845 }
b511690b
GS
1846
1847 tag_alloc_destroy(&tag_alloc_table);
a6095f81 1848 destroy_chassis_queues(&chassis_qdisc_queues);
5868eb24
BP
1849}
1850\f
1851#define OVN_MIN_MULTICAST 32768
1852#define OVN_MAX_MULTICAST 65535
1853
1854struct multicast_group {
1855 const char *name;
1856 uint16_t key; /* OVN_MIN_MULTICAST...OVN_MAX_MULTICAST. */
1857};
1858
1859#define MC_FLOOD "_MC_flood"
1860static const struct multicast_group mc_flood = { MC_FLOOD, 65535 };
1861
1862#define MC_UNKNOWN "_MC_unknown"
1863static const struct multicast_group mc_unknown = { MC_UNKNOWN, 65534 };
1864
1865static bool
1866multicast_group_equal(const struct multicast_group *a,
1867 const struct multicast_group *b)
1868{
1869 return !strcmp(a->name, b->name) && a->key == b->key;
1870}
1871
1872/* Multicast group entry. */
1873struct ovn_multicast {
1874 struct hmap_node hmap_node; /* Index on 'datapath' and 'key'. */
1875 struct ovn_datapath *datapath;
1876 const struct multicast_group *group;
1877
1878 struct ovn_port **ports;
1879 size_t n_ports, allocated_ports;
1880};
1881
1882static uint32_t
1883ovn_multicast_hash(const struct ovn_datapath *datapath,
1884 const struct multicast_group *group)
1885{
1886 return hash_pointer(datapath, group->key);
1887}
1888
1889static struct ovn_multicast *
1890ovn_multicast_find(struct hmap *mcgroups, struct ovn_datapath *datapath,
1891 const struct multicast_group *group)
1892{
1893 struct ovn_multicast *mc;
1894
1895 HMAP_FOR_EACH_WITH_HASH (mc, hmap_node,
1896 ovn_multicast_hash(datapath, group), mcgroups) {
1897 if (mc->datapath == datapath
1898 && multicast_group_equal(mc->group, group)) {
1899 return mc;
4edcdcf4
RB
1900 }
1901 }
5868eb24
BP
1902 return NULL;
1903}
1904
1905static void
1906ovn_multicast_add(struct hmap *mcgroups, const struct multicast_group *group,
1907 struct ovn_port *port)
1908{
1909 struct ovn_datapath *od = port->od;
1910 struct ovn_multicast *mc = ovn_multicast_find(mcgroups, od, group);
1911 if (!mc) {
1912 mc = xmalloc(sizeof *mc);
1913 hmap_insert(mcgroups, &mc->hmap_node, ovn_multicast_hash(od, group));
1914 mc->datapath = od;
1915 mc->group = group;
1916 mc->n_ports = 0;
1917 mc->allocated_ports = 4;
1918 mc->ports = xmalloc(mc->allocated_ports * sizeof *mc->ports);
1919 }
1920 if (mc->n_ports >= mc->allocated_ports) {
1921 mc->ports = x2nrealloc(mc->ports, &mc->allocated_ports,
1922 sizeof *mc->ports);
1923 }
1924 mc->ports[mc->n_ports++] = port;
1925}
4edcdcf4 1926
5868eb24
BP
1927static void
1928ovn_multicast_destroy(struct hmap *mcgroups, struct ovn_multicast *mc)
1929{
1930 if (mc) {
1931 hmap_remove(mcgroups, &mc->hmap_node);
1932 free(mc->ports);
1933 free(mc);
1934 }
1935}
4edcdcf4 1936
5868eb24
BP
1937static void
1938ovn_multicast_update_sbrec(const struct ovn_multicast *mc,
1939 const struct sbrec_multicast_group *sb)
1940{
1941 struct sbrec_port_binding **ports = xmalloc(mc->n_ports * sizeof *ports);
1942 for (size_t i = 0; i < mc->n_ports; i++) {
1943 ports[i] = CONST_CAST(struct sbrec_port_binding *, mc->ports[i]->sb);
1944 }
1945 sbrec_multicast_group_set_ports(sb, ports, mc->n_ports);
1946 free(ports);
4edcdcf4 1947}
bd39395f 1948\f
48605550 1949/* Logical flow generation.
bd39395f 1950 *
48605550 1951 * This code generates the Logical_Flow table in the southbound database, as a
bd39395f
BP
1952 * function of most of the northbound database.
1953 */
1954
5868eb24
BP
1955struct ovn_lflow {
1956 struct hmap_node hmap_node;
bd39395f 1957
5868eb24 1958 struct ovn_datapath *od;
880fcd14 1959 enum ovn_stage stage;
5868eb24
BP
1960 uint16_t priority;
1961 char *match;
1962 char *actions;
d8026bbf 1963 const char *where;
bd39395f
BP
1964};
1965
1966static size_t
5868eb24 1967ovn_lflow_hash(const struct ovn_lflow *lflow)
bd39395f 1968{
5868eb24 1969 size_t hash = uuid_hash(&lflow->od->key);
880fcd14 1970 hash = hash_2words((lflow->stage << 16) | lflow->priority, hash);
5868eb24
BP
1971 hash = hash_string(lflow->match, hash);
1972 return hash_string(lflow->actions, hash);
bd39395f
BP
1973}
1974
5868eb24
BP
1975static bool
1976ovn_lflow_equal(const struct ovn_lflow *a, const struct ovn_lflow *b)
1977{
1978 return (a->od == b->od
880fcd14 1979 && a->stage == b->stage
5868eb24
BP
1980 && a->priority == b->priority
1981 && !strcmp(a->match, b->match)
1982 && !strcmp(a->actions, b->actions));
1983}
1984
1985static void
1986ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
d8026bbf
BP
1987 enum ovn_stage stage, uint16_t priority,
1988 char *match, char *actions, const char *where)
bd39395f 1989{
5868eb24 1990 lflow->od = od;
880fcd14 1991 lflow->stage = stage;
5868eb24
BP
1992 lflow->priority = priority;
1993 lflow->match = match;
1994 lflow->actions = actions;
d8026bbf 1995 lflow->where = where;
bd39395f
BP
1996}
1997
48605550 1998/* Adds a row with the specified contents to the Logical_Flow table. */
bd39395f 1999static void
d8026bbf
BP
2000ovn_lflow_add_at(struct hmap *lflow_map, struct ovn_datapath *od,
2001 enum ovn_stage stage, uint16_t priority,
2002 const char *match, const char *actions, const char *where)
5868eb24 2003{
9a9961d2
BP
2004 ovs_assert(ovn_stage_to_datapath_type(stage) == ovn_datapath_get_type(od));
2005
5868eb24 2006 struct ovn_lflow *lflow = xmalloc(sizeof *lflow);
880fcd14 2007 ovn_lflow_init(lflow, od, stage, priority,
d8026bbf 2008 xstrdup(match), xstrdup(actions), where);
5868eb24
BP
2009 hmap_insert(lflow_map, &lflow->hmap_node, ovn_lflow_hash(lflow));
2010}
2011
d8026bbf
BP
2012/* Adds a row with the specified contents to the Logical_Flow table. */
2013#define ovn_lflow_add(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, ACTIONS) \
2014 ovn_lflow_add_at(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, ACTIONS, \
2015 OVS_SOURCE_LOCATOR)
2016
5868eb24
BP
2017static struct ovn_lflow *
2018ovn_lflow_find(struct hmap *lflows, struct ovn_datapath *od,
880fcd14 2019 enum ovn_stage stage, uint16_t priority,
5868eb24
BP
2020 const char *match, const char *actions)
2021{
2022 struct ovn_lflow target;
880fcd14 2023 ovn_lflow_init(&target, od, stage, priority,
d8026bbf
BP
2024 CONST_CAST(char *, match), CONST_CAST(char *, actions),
2025 NULL);
5868eb24
BP
2026
2027 struct ovn_lflow *lflow;
2028 HMAP_FOR_EACH_WITH_HASH (lflow, hmap_node, ovn_lflow_hash(&target),
2029 lflows) {
2030 if (ovn_lflow_equal(lflow, &target)) {
2031 return lflow;
bd39395f
BP
2032 }
2033 }
5868eb24
BP
2034 return NULL;
2035}
bd39395f 2036
5868eb24
BP
2037static void
2038ovn_lflow_destroy(struct hmap *lflows, struct ovn_lflow *lflow)
2039{
2040 if (lflow) {
2041 hmap_remove(lflows, &lflow->hmap_node);
2042 free(lflow->match);
2043 free(lflow->actions);
2044 free(lflow);
2045 }
bd39395f
BP
2046}
2047
bd39395f 2048/* Appends port security constraints on L2 address field 'eth_addr_field'
e93b43d6
JP
2049 * (e.g. "eth.src" or "eth.dst") to 'match'. 'ps_addrs', with 'n_ps_addrs'
2050 * elements, is the collection of port_security constraints from an
2051 * OVN_NB Logical_Switch_Port row generated by extract_lsp_addresses(). */
bd39395f 2052static void
685f4dfe 2053build_port_security_l2(const char *eth_addr_field,
e93b43d6
JP
2054 struct lport_addresses *ps_addrs,
2055 unsigned int n_ps_addrs,
685f4dfe 2056 struct ds *match)
bd39395f 2057{
e93b43d6
JP
2058 if (!n_ps_addrs) {
2059 return;
2060 }
bd39395f 2061
e93b43d6 2062 ds_put_format(match, " && %s == {", eth_addr_field);
f7cb14cd 2063
e93b43d6
JP
2064 for (size_t i = 0; i < n_ps_addrs; i++) {
2065 ds_put_format(match, "%s ", ps_addrs[i].ea_s);
bd39395f 2066 }
f7cb14cd 2067 ds_chomp(match, ' ');
bd39395f 2068 ds_put_cstr(match, "}");
bd39395f
BP
2069}
2070
685f4dfe
NS
2071static void
2072build_port_security_ipv6_nd_flow(
2073 struct ds *match, struct eth_addr ea, struct ipv6_netaddr *ipv6_addrs,
2074 int n_ipv6_addrs)
2075{
2076 ds_put_format(match, " && ip6 && nd && ((nd.sll == "ETH_ADDR_FMT" || "
2077 "nd.sll == "ETH_ADDR_FMT") || ((nd.tll == "ETH_ADDR_FMT" || "
2078 "nd.tll == "ETH_ADDR_FMT")", ETH_ADDR_ARGS(eth_addr_zero),
2079 ETH_ADDR_ARGS(ea), ETH_ADDR_ARGS(eth_addr_zero),
2080 ETH_ADDR_ARGS(ea));
2081 if (!n_ipv6_addrs) {
2082 ds_put_cstr(match, "))");
2083 return;
2084 }
2085
2086 char ip6_str[INET6_ADDRSTRLEN + 1];
2087 struct in6_addr lla;
2088 in6_generate_lla(ea, &lla);
2089 memset(ip6_str, 0, sizeof(ip6_str));
2090 ipv6_string_mapped(ip6_str, &lla);
2091 ds_put_format(match, " && (nd.target == %s", ip6_str);
2092
2093 for(int i = 0; i < n_ipv6_addrs; i++) {
2094 memset(ip6_str, 0, sizeof(ip6_str));
2095 ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr);
2096 ds_put_format(match, " || nd.target == %s", ip6_str);
2097 }
2098
2099 ds_put_format(match, ")))");
2100}
2101
2102static void
2103build_port_security_ipv6_flow(
2104 enum ovn_pipeline pipeline, struct ds *match, struct eth_addr ea,
2105 struct ipv6_netaddr *ipv6_addrs, int n_ipv6_addrs)
2106{
2107 char ip6_str[INET6_ADDRSTRLEN + 1];
2108
2109 ds_put_format(match, " && %s == {",
2110 pipeline == P_IN ? "ip6.src" : "ip6.dst");
2111
2112 /* Allow link-local address. */
2113 struct in6_addr lla;
2114 in6_generate_lla(ea, &lla);
2115 ipv6_string_mapped(ip6_str, &lla);
2116 ds_put_format(match, "%s, ", ip6_str);
2117
9e687b23
DL
2118 /* Allow ip6.dst=ff00::/8 for multicast packets */
2119 if (pipeline == P_OUT) {
2120 ds_put_cstr(match, "ff00::/8, ");
2121 }
685f4dfe
NS
2122 for(int i = 0; i < n_ipv6_addrs; i++) {
2123 ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr);
9e687b23 2124 ds_put_format(match, "%s, ", ip6_str);
685f4dfe 2125 }
9e687b23
DL
2126 /* Replace ", " by "}". */
2127 ds_chomp(match, ' ');
2128 ds_chomp(match, ',');
685f4dfe
NS
2129 ds_put_cstr(match, "}");
2130}
2131
2132/**
2133 * Build port security constraints on ARP and IPv6 ND fields
2134 * and add logical flows to S_SWITCH_IN_PORT_SEC_ND stage.
2135 *
2136 * For each port security of the logical port, following
2137 * logical flows are added
2138 * - If the port security has no IP (both IPv4 and IPv6) or
2139 * if it has IPv4 address(es)
2140 * - Priority 90 flow to allow ARP packets for known MAC addresses
2141 * in the eth.src and arp.spa fields. If the port security
2142 * has IPv4 addresses, allow known IPv4 addresses in the arp.tpa field.
2143 *
2144 * - If the port security has no IP (both IPv4 and IPv6) or
2145 * if it has IPv6 address(es)
2146 * - Priority 90 flow to allow IPv6 ND packets for known MAC addresses
2147 * in the eth.src and nd.sll/nd.tll fields. If the port security
2148 * has IPv6 addresses, allow known IPv6 addresses in the nd.target field
2149 * for IPv6 Neighbor Advertisement packet.
2150 *
2151 * - Priority 80 flow to drop ARP and IPv6 ND packets.
2152 */
2153static void
2154build_port_security_nd(struct ovn_port *op, struct hmap *lflows)
2155{
e93b43d6
JP
2156 struct ds match = DS_EMPTY_INITIALIZER;
2157
2158 for (size_t i = 0; i < op->n_ps_addrs; i++) {
2159 struct lport_addresses *ps = &op->ps_addrs[i];
685f4dfe 2160
e93b43d6 2161 bool no_ip = !(ps->n_ipv4_addrs || ps->n_ipv6_addrs);
685f4dfe 2162
e93b43d6
JP
2163 ds_clear(&match);
2164 if (ps->n_ipv4_addrs || no_ip) {
2165 ds_put_format(&match,
2166 "inport == %s && eth.src == %s && arp.sha == %s",
2167 op->json_key, ps->ea_s, ps->ea_s);
685f4dfe 2168
e93b43d6
JP
2169 if (ps->n_ipv4_addrs) {
2170 ds_put_cstr(&match, " && arp.spa == {");
f95523c0 2171 for (size_t j = 0; j < ps->n_ipv4_addrs; j++) {
7d9d86ad
NS
2172 /* When the netmask is applied, if the host portion is
2173 * non-zero, the host can only use the specified
2174 * address in the arp.spa. If zero, the host is allowed
2175 * to use any address in the subnet. */
f95523c0
JP
2176 if (ps->ipv4_addrs[j].plen == 32
2177 || ps->ipv4_addrs[j].addr & ~ps->ipv4_addrs[j].mask) {
2178 ds_put_cstr(&match, ps->ipv4_addrs[j].addr_s);
7d9d86ad 2179 } else {
e93b43d6 2180 ds_put_format(&match, "%s/%d",
f95523c0
JP
2181 ps->ipv4_addrs[j].network_s,
2182 ps->ipv4_addrs[j].plen);
7d9d86ad 2183 }
e93b43d6 2184 ds_put_cstr(&match, ", ");
685f4dfe
NS
2185 }
2186 ds_chomp(&match, ' ');
e93b43d6
JP
2187 ds_chomp(&match, ',');
2188 ds_put_cstr(&match, "}");
685f4dfe
NS
2189 }
2190 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90,
2191 ds_cstr(&match), "next;");
685f4dfe
NS
2192 }
2193
e93b43d6
JP
2194 if (ps->n_ipv6_addrs || no_ip) {
2195 ds_clear(&match);
2196 ds_put_format(&match, "inport == %s && eth.src == %s",
2197 op->json_key, ps->ea_s);
2198 build_port_security_ipv6_nd_flow(&match, ps->ea, ps->ipv6_addrs,
2199 ps->n_ipv6_addrs);
685f4dfe
NS
2200 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90,
2201 ds_cstr(&match), "next;");
685f4dfe 2202 }
685f4dfe
NS
2203 }
2204
e93b43d6
JP
2205 ds_clear(&match);
2206 ds_put_format(&match, "inport == %s && (arp || nd)", op->json_key);
685f4dfe 2207 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 80,
e93b43d6
JP
2208 ds_cstr(&match), "drop;");
2209 ds_destroy(&match);
685f4dfe
NS
2210}
2211
2212/**
2213 * Build port security constraints on IPv4 and IPv6 src and dst fields
2214 * and add logical flows to S_SWITCH_(IN/OUT)_PORT_SEC_IP stage.
2215 *
2216 * For each port security of the logical port, following
2217 * logical flows are added
2218 * - If the port security has IPv4 addresses,
2219 * - Priority 90 flow to allow IPv4 packets for known IPv4 addresses
2220 *
2221 * - If the port security has IPv6 addresses,
2222 * - Priority 90 flow to allow IPv6 packets for known IPv6 addresses
2223 *
2224 * - If the port security has IPv4 addresses or IPv6 addresses or both
2225 * - Priority 80 flow to drop all IPv4 and IPv6 traffic
2226 */
2227static void
2228build_port_security_ip(enum ovn_pipeline pipeline, struct ovn_port *op,
2229 struct hmap *lflows)
2230{
2231 char *port_direction;
2232 enum ovn_stage stage;
2233 if (pipeline == P_IN) {
2234 port_direction = "inport";
2235 stage = S_SWITCH_IN_PORT_SEC_IP;
2236 } else {
2237 port_direction = "outport";
2238 stage = S_SWITCH_OUT_PORT_SEC_IP;
2239 }
2240
e93b43d6
JP
2241 for (size_t i = 0; i < op->n_ps_addrs; i++) {
2242 struct lport_addresses *ps = &op->ps_addrs[i];
685f4dfe 2243
e93b43d6 2244 if (!(ps->n_ipv4_addrs || ps->n_ipv6_addrs)) {
685f4dfe
NS
2245 continue;
2246 }
2247
e93b43d6 2248 if (ps->n_ipv4_addrs) {
685f4dfe
NS
2249 struct ds match = DS_EMPTY_INITIALIZER;
2250 if (pipeline == P_IN) {
9e687b23
DL
2251 /* Permit use of the unspecified address for DHCP discovery */
2252 struct ds dhcp_match = DS_EMPTY_INITIALIZER;
2253 ds_put_format(&dhcp_match, "inport == %s"
e93b43d6 2254 " && eth.src == %s"
9e687b23
DL
2255 " && ip4.src == 0.0.0.0"
2256 " && ip4.dst == 255.255.255.255"
e93b43d6
JP
2257 " && udp.src == 68 && udp.dst == 67",
2258 op->json_key, ps->ea_s);
9e687b23
DL
2259 ovn_lflow_add(lflows, op->od, stage, 90,
2260 ds_cstr(&dhcp_match), "next;");
2261 ds_destroy(&dhcp_match);
e93b43d6 2262 ds_put_format(&match, "inport == %s && eth.src == %s"
9e687b23 2263 " && ip4.src == {", op->json_key,
e93b43d6 2264 ps->ea_s);
685f4dfe 2265 } else {
e93b43d6 2266 ds_put_format(&match, "outport == %s && eth.dst == %s"
685f4dfe 2267 " && ip4.dst == {255.255.255.255, 224.0.0.0/4, ",
e93b43d6 2268 op->json_key, ps->ea_s);
685f4dfe
NS
2269 }
2270
f95523c0
JP
2271 for (int j = 0; j < ps->n_ipv4_addrs; j++) {
2272 ovs_be32 mask = ps->ipv4_addrs[j].mask;
7d9d86ad
NS
2273 /* When the netmask is applied, if the host portion is
2274 * non-zero, the host can only use the specified
2275 * address. If zero, the host is allowed to use any
2276 * address in the subnet.
e93b43d6 2277 */
f95523c0
JP
2278 if (ps->ipv4_addrs[j].plen == 32
2279 || ps->ipv4_addrs[j].addr & ~mask) {
2280 ds_put_format(&match, "%s", ps->ipv4_addrs[j].addr_s);
2281 if (pipeline == P_OUT && ps->ipv4_addrs[j].plen != 32) {
e93b43d6
JP
2282 /* Host is also allowed to receive packets to the
2283 * broadcast address in the specified subnet. */
2284 ds_put_format(&match, ", %s",
f95523c0 2285 ps->ipv4_addrs[j].bcast_s);
7d9d86ad
NS
2286 }
2287 } else {
2288 /* host portion is zero */
f95523c0
JP
2289 ds_put_format(&match, "%s/%d", ps->ipv4_addrs[j].network_s,
2290 ps->ipv4_addrs[j].plen);
7d9d86ad
NS
2291 }
2292 ds_put_cstr(&match, ", ");
685f4dfe
NS
2293 }
2294
2295 /* Replace ", " by "}". */
2296 ds_chomp(&match, ' ');
2297 ds_chomp(&match, ',');
2298 ds_put_cstr(&match, "}");
2299 ovn_lflow_add(lflows, op->od, stage, 90, ds_cstr(&match), "next;");
2300 ds_destroy(&match);
685f4dfe
NS
2301 }
2302
e93b43d6 2303 if (ps->n_ipv6_addrs) {
685f4dfe 2304 struct ds match = DS_EMPTY_INITIALIZER;
9e687b23
DL
2305 if (pipeline == P_IN) {
2306 /* Permit use of unspecified address for duplicate address
2307 * detection */
2308 struct ds dad_match = DS_EMPTY_INITIALIZER;
2309 ds_put_format(&dad_match, "inport == %s"
e93b43d6 2310 " && eth.src == %s"
9e687b23
DL
2311 " && ip6.src == ::"
2312 " && ip6.dst == ff02::/16"
2313 " && icmp6.type == {131, 135, 143}", op->json_key,
e93b43d6 2314 ps->ea_s);
9e687b23
DL
2315 ovn_lflow_add(lflows, op->od, stage, 90,
2316 ds_cstr(&dad_match), "next;");
2317 ds_destroy(&dad_match);
2318 }
e93b43d6 2319 ds_put_format(&match, "%s == %s && %s == %s",
685f4dfe 2320 port_direction, op->json_key,
e93b43d6
JP
2321 pipeline == P_IN ? "eth.src" : "eth.dst", ps->ea_s);
2322 build_port_security_ipv6_flow(pipeline, &match, ps->ea,
2323 ps->ipv6_addrs, ps->n_ipv6_addrs);
685f4dfe
NS
2324 ovn_lflow_add(lflows, op->od, stage, 90,
2325 ds_cstr(&match), "next;");
2326 ds_destroy(&match);
685f4dfe
NS
2327 }
2328
e93b43d6
JP
2329 char *match = xasprintf("%s == %s && %s == %s && ip",
2330 port_direction, op->json_key,
2331 pipeline == P_IN ? "eth.src" : "eth.dst",
2332 ps->ea_s);
685f4dfe
NS
2333 ovn_lflow_add(lflows, op->od, stage, 80, match, "drop;");
2334 free(match);
2335 }
f2a715b5 2336
685f4dfe
NS
2337}
2338
95a9a275 2339static bool
80f408f4 2340lsp_is_enabled(const struct nbrec_logical_switch_port *lsp)
95a9a275 2341{
80f408f4 2342 return !lsp->enabled || *lsp->enabled;
95a9a275
RB
2343}
2344
4c7bf534 2345static bool
80f408f4 2346lsp_is_up(const struct nbrec_logical_switch_port *lsp)
4c7bf534 2347{
80f408f4 2348 return !lsp->up || *lsp->up;
4c7bf534
NS
2349}
2350
281977f7
NS
2351static bool
2352build_dhcpv4_action(struct ovn_port *op, ovs_be32 offer_ip,
213615b3
NS
2353 struct ds *options_action, struct ds *response_action,
2354 struct ds *ipv4_addr_match)
281977f7
NS
2355{
2356 if (!op->nbsp->dhcpv4_options) {
2357 /* CMS has disabled native DHCPv4 for this lport. */
2358 return false;
2359 }
2360
2361 ovs_be32 host_ip, mask;
2362 char *error = ip_parse_masked(op->nbsp->dhcpv4_options->cidr, &host_ip,
2363 &mask);
2364 if (error || ((offer_ip ^ host_ip) & mask)) {
2365 /* Either
2366 * - cidr defined is invalid or
2367 * - the offer ip of the logical port doesn't belong to the cidr
2368 * defined in the DHCPv4 options.
2369 * */
2370 free(error);
2371 return false;
2372 }
2373
2374 const char *server_ip = smap_get(
2375 &op->nbsp->dhcpv4_options->options, "server_id");
2376 const char *server_mac = smap_get(
2377 &op->nbsp->dhcpv4_options->options, "server_mac");
2378 const char *lease_time = smap_get(
2379 &op->nbsp->dhcpv4_options->options, "lease_time");
281977f7 2380
b89d25e5
GL
2381 if (!(server_ip && server_mac && lease_time)) {
2382 /* "server_id", "server_mac" and "lease_time" should be
281977f7
NS
2383 * present in the dhcp_options. */
2384 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
2385 VLOG_WARN_RL(&rl, "Required DHCPv4 options not defined for lport - %s",
2386 op->json_key);
2387 return false;
2388 }
2389
2390 struct smap dhcpv4_options = SMAP_INITIALIZER(&dhcpv4_options);
2391 smap_clone(&dhcpv4_options, &op->nbsp->dhcpv4_options->options);
2392
2393 /* server_mac is not DHCPv4 option, delete it from the smap. */
2394 smap_remove(&dhcpv4_options, "server_mac");
2395 char *netmask = xasprintf(IP_FMT, IP_ARGS(mask));
2396 smap_add(&dhcpv4_options, "netmask", netmask);
2397 free(netmask);
2398
2399 ds_put_format(options_action,
2400 REGBIT_DHCP_OPTS_RESULT" = put_dhcp_opts(offerip = "
2401 IP_FMT", ", IP_ARGS(offer_ip));
7c76bf4e
DDP
2402
2403 /* We're not using SMAP_FOR_EACH because we want a consistent order of the
2404 * options on different architectures (big or little endian, SSE4.2) */
2405 const struct smap_node **sorted_opts = smap_sort(&dhcpv4_options);
2406 for (size_t i = 0; i < smap_count(&dhcpv4_options); i++) {
2407 const struct smap_node *node = sorted_opts[i];
281977f7
NS
2408 ds_put_format(options_action, "%s = %s, ", node->key, node->value);
2409 }
7c76bf4e 2410 free(sorted_opts);
281977f7
NS
2411
2412 ds_chomp(options_action, ' ');
2413 ds_chomp(options_action, ',');
2414 ds_put_cstr(options_action, "); next;");
2415
2416 ds_put_format(response_action, "eth.dst = eth.src; eth.src = %s; "
2417 "ip4.dst = "IP_FMT"; ip4.src = %s; udp.src = 67; "
bf143492
JP
2418 "udp.dst = 68; outport = inport; flags.loopback = 1; "
2419 "output;",
281977f7
NS
2420 server_mac, IP_ARGS(offer_ip), server_ip);
2421
213615b3
NS
2422 ds_put_format(ipv4_addr_match,
2423 "ip4.src == "IP_FMT" && ip4.dst == {%s, 255.255.255.255}",
2424 IP_ARGS(offer_ip), server_ip);
281977f7
NS
2425 smap_destroy(&dhcpv4_options);
2426 return true;
2427}
2428
33ac3c83
NS
2429static bool
2430build_dhcpv6_action(struct ovn_port *op, struct in6_addr *offer_ip,
2431 struct ds *options_action, struct ds *response_action)
2432{
2433 if (!op->nbsp->dhcpv6_options) {
2434 /* CMS has disabled native DHCPv6 for this lport. */
2435 return false;
2436 }
2437
2438 struct in6_addr host_ip, mask;
2439
2440 char *error = ipv6_parse_masked(op->nbsp->dhcpv6_options->cidr, &host_ip,
2441 &mask);
2442 if (error) {
2443 free(error);
2444 return false;
2445 }
2446 struct in6_addr ip6_mask = ipv6_addr_bitxor(offer_ip, &host_ip);
2447 ip6_mask = ipv6_addr_bitand(&ip6_mask, &mask);
2448 if (!ipv6_mask_is_any(&ip6_mask)) {
2449 /* offer_ip doesn't belongs to the cidr defined in lport's DHCPv6
2450 * options.*/
2451 return false;
2452 }
2453
7c76bf4e 2454 const struct smap *options_map = &op->nbsp->dhcpv6_options->options;
33ac3c83 2455 /* "server_id" should be the MAC address. */
7c76bf4e 2456 const char *server_mac = smap_get(options_map, "server_id");
33ac3c83
NS
2457 struct eth_addr ea;
2458 if (!server_mac || !eth_addr_from_string(server_mac, &ea)) {
2459 /* "server_id" should be present in the dhcpv6_options. */
2460 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2461 VLOG_WARN_RL(&rl, "server_id not present in the DHCPv6 options"
2462 " for lport %s", op->json_key);
2463 return false;
2464 }
2465
2466 /* Get the link local IP of the DHCPv6 server from the server MAC. */
2467 struct in6_addr lla;
2468 in6_generate_lla(ea, &lla);
2469
2470 char server_ip[INET6_ADDRSTRLEN + 1];
2471 ipv6_string_mapped(server_ip, &lla);
2472
2473 char ia_addr[INET6_ADDRSTRLEN + 1];
2474 ipv6_string_mapped(ia_addr, offer_ip);
2475
2476 ds_put_format(options_action,
40df4566
ZKL
2477 REGBIT_DHCP_OPTS_RESULT" = put_dhcpv6_opts(");
2478
2479 /* Check whether the dhcpv6 options should be configured as stateful.
2480 * Only reply with ia_addr option for dhcpv6 stateful address mode. */
7c76bf4e 2481 if (!smap_get_bool(options_map, "dhcpv6_stateless", false)) {
40df4566
ZKL
2482 char ia_addr[INET6_ADDRSTRLEN + 1];
2483 ipv6_string_mapped(ia_addr, offer_ip);
2484
2485 ds_put_format(options_action, "ia_addr = %s, ", ia_addr);
2486 }
2487
7c76bf4e
DDP
2488 /* We're not using SMAP_FOR_EACH because we want a consistent order of the
2489 * options on different architectures (big or little endian, SSE4.2) */
2490 const struct smap_node **sorted_opts = smap_sort(options_map);
2491 for (size_t i = 0; i < smap_count(options_map); i++) {
2492 const struct smap_node *node = sorted_opts[i];
40df4566
ZKL
2493 if (strcmp(node->key, "dhcpv6_stateless")) {
2494 ds_put_format(options_action, "%s = %s, ", node->key, node->value);
2495 }
33ac3c83 2496 }
7c76bf4e
DDP
2497 free(sorted_opts);
2498
33ac3c83
NS
2499 ds_chomp(options_action, ' ');
2500 ds_chomp(options_action, ',');
2501 ds_put_cstr(options_action, "); next;");
2502
2503 ds_put_format(response_action, "eth.dst = eth.src; eth.src = %s; "
2504 "ip6.dst = ip6.src; ip6.src = %s; udp.src = 547; "
2505 "udp.dst = 546; outport = inport; flags.loopback = 1; "
2506 "output;",
2507 server_mac, server_ip);
40df4566 2508
33ac3c83
NS
2509 return true;
2510}
2511
78aab811
JP
2512static bool
2513has_stateful_acl(struct ovn_datapath *od)
2514{
9975d7be
BP
2515 for (size_t i = 0; i < od->nbs->n_acls; i++) {
2516 struct nbrec_acl *acl = od->nbs->acls[i];
78aab811
JP
2517 if (!strcmp(acl->action, "allow-related")) {
2518 return true;
2519 }
2520 }
2521
2522 return false;
2523}
2524
2525static void
9ab989b7 2526build_pre_acls(struct ovn_datapath *od, struct hmap *lflows)
78aab811
JP
2527{
2528 bool has_stateful = has_stateful_acl(od);
2529
2530 /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are
2531 * allowed by default. */
880fcd14
BP
2532 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 0, "1", "next;");
2533 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 0, "1", "next;");
78aab811 2534
c132fca0 2535 /* If there are any stateful ACL rules in this datapath, we must
78aab811
JP
2536 * send all IP packets through the conntrack action, which handles
2537 * defragmentation, in order to match L4 headers. */
2538 if (has_stateful) {
9ab989b7
BP
2539 for (size_t i = 0; i < od->n_router_ports; i++) {
2540 struct ovn_port *op = od->router_ports[i];
2541 /* Can't use ct() for router ports. Consider the
2542 * following configuration: lp1(10.0.0.2) on
2543 * hostA--ls1--lr0--ls2--lp2(10.0.1.2) on hostB, For a
2544 * ping from lp1 to lp2, First, the response will go
2545 * through ct() with a zone for lp2 in the ls2 ingress
2546 * pipeline on hostB. That ct zone knows about this
2547 * connection. Next, it goes through ct() with the zone
2548 * for the router port in the egress pipeline of ls2 on
2549 * hostB. This zone does not know about the connection,
2550 * as the icmp request went through the logical router
2551 * on hostA, not hostB. This would only work with
2552 * distributed conntrack state across all chassis. */
2553 struct ds match_in = DS_EMPTY_INITIALIZER;
2554 struct ds match_out = DS_EMPTY_INITIALIZER;
2555
2556 ds_put_format(&match_in, "ip && inport == %s", op->json_key);
2557 ds_put_format(&match_out, "ip && outport == %s", op->json_key);
2558 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110,
2559 ds_cstr(&match_in), "next;");
2560 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110,
2561 ds_cstr(&match_out), "next;");
2562
2563 ds_destroy(&match_in);
2564 ds_destroy(&match_out);
48fcdb47 2565 }
2d018f9b
GS
2566 /* Ingress and Egress Pre-ACL Table (Priority 110).
2567 *
2568 * Not to do conntrack on ND packets. */
2569 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110, "nd", "next;");
2570 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110, "nd", "next;");
48fcdb47 2571
78aab811
JP
2572 /* Ingress and Egress Pre-ACL Table (Priority 100).
2573 *
2574 * Regardless of whether the ACL is "from-lport" or "to-lport",
2575 * we need rules in both the ingress and egress table, because
facf8652
GS
2576 * the return traffic needs to be followed.
2577 *
2578 * 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send
2579 * it to conntrack for tracking and defragmentation. */
2580 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 100, "ip",
2581 REGBIT_CONNTRACK_DEFRAG" = 1; next;");
2582 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip",
2583 REGBIT_CONNTRACK_DEFRAG" = 1; next;");
2d018f9b
GS
2584 }
2585}
78aab811 2586
7a15be69
GS
2587/* For a 'key' of the form "IP:port" or just "IP", sets 'port' and
2588 * 'ip_address'. The caller must free() the memory allocated for
2589 * 'ip_address'. */
2590static void
2591ip_address_and_port_from_lb_key(const char *key, char **ip_address,
2592 uint16_t *port)
2593{
2594 char *ip_str, *start, *next;
2595 *ip_address = NULL;
2596 *port = 0;
2597
2598 next = start = xstrdup(key);
2599 ip_str = strsep(&next, ":");
2600 if (!ip_str || !ip_str[0]) {
2601 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2602 VLOG_WARN_RL(&rl, "bad ip address for load balancer key %s", key);
2603 free(start);
2604 return;
2605 }
2606
2607 ovs_be32 ip, mask;
2608 char *error = ip_parse_masked(ip_str, &ip, &mask);
2609 if (error || mask != OVS_BE32_MAX) {
2610 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2611 VLOG_WARN_RL(&rl, "bad ip address for load balancer key %s", key);
2612 free(start);
2613 free(error);
2614 return;
2615 }
2616
2617 int l4_port = 0;
2618 if (next && next[0]) {
2619 if (!str_to_int(next, 0, &l4_port) || l4_port < 0 || l4_port > 65535) {
2620 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2621 VLOG_WARN_RL(&rl, "bad ip port for load balancer key %s", key);
2622 free(start);
2623 return;
2624 }
2625 }
2626
2627 *port = l4_port;
2628 *ip_address = strdup(ip_str);
2629 free(start);
2630}
2631
2632static void
2633build_pre_lb(struct ovn_datapath *od, struct hmap *lflows)
2634{
2635 /* Allow all packets to go to next tables by default. */
2636 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 0, "1", "next;");
2637 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 0, "1", "next;");
2638
2639 struct sset all_ips = SSET_INITIALIZER(&all_ips);
61591ad9
GS
2640 bool vip_configured = false;
2641 for (int i = 0; i < od->nbs->n_load_balancer; i++) {
2642 struct nbrec_load_balancer *lb = od->nbs->load_balancer[i];
7a15be69
GS
2643 struct smap *vips = &lb->vips;
2644 struct smap_node *node;
7a15be69
GS
2645
2646 SMAP_FOR_EACH (node, vips) {
2647 vip_configured = true;
2648
2649 /* node->key contains IP:port or just IP. */
2650 char *ip_address = NULL;
2651 uint16_t port;
2652 ip_address_and_port_from_lb_key(node->key, &ip_address, &port);
2653 if (!ip_address) {
2654 continue;
2655 }
2656
2657 if (!sset_contains(&all_ips, ip_address)) {
2658 sset_add(&all_ips, ip_address);
2659 }
2660
2661 free(ip_address);
2662
2663 /* Ignore L4 port information in the key because fragmented packets
2664 * may not have L4 information. The pre-stateful table will send
2665 * the packet through ct() action to de-fragment. In stateful
2666 * table, we will eventually look at L4 information. */
2667 }
61591ad9 2668 }
7a15be69 2669
61591ad9
GS
2670 /* 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send
2671 * packet to conntrack for defragmentation. */
2672 const char *ip_address;
2673 SSET_FOR_EACH(ip_address, &all_ips) {
2674 char *match = xasprintf("ip && ip4.dst == %s", ip_address);
2675 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB,
2676 100, match, REGBIT_CONNTRACK_DEFRAG" = 1; next;");
2677 free(match);
2678 }
7a15be69 2679
61591ad9 2680 sset_destroy(&all_ips);
7a15be69 2681
61591ad9
GS
2682 if (vip_configured) {
2683 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB,
2684 100, "ip", REGBIT_CONNTRACK_DEFRAG" = 1; next;");
7a15be69
GS
2685 }
2686}
2687
facf8652
GS
2688static void
2689build_pre_stateful(struct ovn_datapath *od, struct hmap *lflows)
2690{
2691 /* Ingress and Egress pre-stateful Table (Priority 0): Packets are
2692 * allowed by default. */
2693 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 0, "1", "next;");
2694 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 0, "1", "next;");
2695
2696 /* If REGBIT_CONNTRACK_DEFRAG is set as 1, then the packets should be
2697 * sent to conntrack for tracking and defragmentation. */
2698 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 100,
2699 REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;");
2700 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 100,
2701 REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;");
2702}
2703
2d018f9b
GS
2704static void
2705build_acls(struct ovn_datapath *od, struct hmap *lflows)
2706{
2707 bool has_stateful = has_stateful_acl(od);
e75451fe 2708
2d018f9b
GS
2709 /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
2710 * default. A related rule at priority 1 is added below if there
2711 * are any stateful ACLs in this datapath. */
2712 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 0, "1", "next;");
2713 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 0, "1", "next;");
2714
2715 if (has_stateful) {
78aab811
JP
2716 /* Ingress and Egress ACL Table (Priority 1).
2717 *
2718 * By default, traffic is allowed. This is partially handled by
2719 * the Priority 0 ACL flows added earlier, but we also need to
2720 * commit IP flows. This is because, while the initiater's
2721 * direction may not have any stateful rules, the server's may
2722 * and then its return traffic would not have an associated
cc58e1f2
RB
2723 * conntrack entry and would return "+invalid".
2724 *
2725 * We use "ct_commit" for a connection that is not already known
2726 * by the connection tracker. Once a connection is committed,
2727 * subsequent packets will hit the flow at priority 0 that just
2728 * uses "next;"
2729 *
b73db61d 2730 * We also check for established connections that have ct_label.blocked
cc58e1f2
RB
2731 * set on them. That's a connection that was disallowed, but is
2732 * now allowed by policy again since it hit this default-allow flow.
b73db61d 2733 * We need to set ct_label.blocked=0 to let the connection continue,
cc58e1f2
RB
2734 * which will be done by ct_commit() in the "stateful" stage.
2735 * Subsequent packets will hit the flow at priority 0 that just
2736 * uses "next;". */
2737 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 1,
b73db61d 2738 "ip && (!ct.est || (ct.est && ct_label.blocked == 1))",
cc58e1f2
RB
2739 REGBIT_CONNTRACK_COMMIT" = 1; next;");
2740 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 1,
b73db61d 2741 "ip && (!ct.est || (ct.est && ct_label.blocked == 1))",
cc58e1f2 2742 REGBIT_CONNTRACK_COMMIT" = 1; next;");
78aab811
JP
2743
2744 /* Ingress and Egress ACL Table (Priority 65535).
2745 *
cc58e1f2
RB
2746 * Always drop traffic that's in an invalid state. Also drop
2747 * reply direction packets for connections that have been marked
2748 * for deletion (bit 0 of ct_label is set).
2749 *
2750 * This is enforced at a higher priority than ACLs can be defined. */
880fcd14 2751 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
b73db61d 2752 "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
cc58e1f2 2753 "drop;");
880fcd14 2754 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
b73db61d 2755 "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
cc58e1f2 2756 "drop;");
78aab811
JP
2757
2758 /* Ingress and Egress ACL Table (Priority 65535).
2759 *
cc58e1f2
RB
2760 * Allow reply traffic that is part of an established
2761 * conntrack entry that has not been marked for deletion
2762 * (bit 0 of ct_label). We only match traffic in the
2763 * reply direction because we want traffic in the request
2764 * direction to hit the currently defined policy from ACLs.
2765 *
2766 * This is enforced at a higher priority than ACLs can be defined. */
880fcd14 2767 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
cc58e1f2 2768 "ct.est && !ct.rel && !ct.new && !ct.inv "
b73db61d 2769 "&& ct.rpl && ct_label.blocked == 0",
78aab811 2770 "next;");
880fcd14 2771 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
cc58e1f2 2772 "ct.est && !ct.rel && !ct.new && !ct.inv "
b73db61d 2773 "&& ct.rpl && ct_label.blocked == 0",
78aab811
JP
2774 "next;");
2775
2776 /* Ingress and Egress ACL Table (Priority 65535).
2777 *
cc58e1f2
RB
2778 * Allow traffic that is related to an existing conntrack entry that
2779 * has not been marked for deletion (bit 0 of ct_label).
2780 *
2781 * This is enforced at a higher priority than ACLs can be defined.
78aab811
JP
2782 *
2783 * NOTE: This does not support related data sessions (eg,
2784 * a dynamically negotiated FTP data channel), but will allow
2785 * related traffic such as an ICMP Port Unreachable through
2786 * that's generated from a non-listening UDP port. */
880fcd14 2787 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
cc58e1f2 2788 "!ct.est && ct.rel && !ct.new && !ct.inv "
b73db61d 2789 "&& ct_label.blocked == 0",
78aab811 2790 "next;");
880fcd14 2791 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
cc58e1f2 2792 "!ct.est && ct.rel && !ct.new && !ct.inv "
b73db61d 2793 "&& ct_label.blocked == 0",
78aab811 2794 "next;");
e75451fe
ZKL
2795
2796 /* Ingress and Egress ACL Table (Priority 65535).
2797 *
2798 * Not to do conntrack on ND packets. */
2799 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, "nd", "next;");
2800 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, "nd", "next;");
78aab811
JP
2801 }
2802
2803 /* Ingress or Egress ACL Table (Various priorities). */
9975d7be
BP
2804 for (size_t i = 0; i < od->nbs->n_acls; i++) {
2805 struct nbrec_acl *acl = od->nbs->acls[i];
78aab811 2806 bool ingress = !strcmp(acl->direction, "from-lport") ? true :false;
880fcd14 2807 enum ovn_stage stage = ingress ? S_SWITCH_IN_ACL : S_SWITCH_OUT_ACL;
78aab811 2808
cc58e1f2
RB
2809 if (!strcmp(acl->action, "allow")
2810 || !strcmp(acl->action, "allow-related")) {
78aab811
JP
2811 /* If there are any stateful flows, we must even commit "allow"
2812 * actions. This is because, while the initiater's
2813 * direction may not have any stateful rules, the server's
2814 * may and then its return traffic would not have an
2815 * associated conntrack entry and would return "+invalid". */
cc58e1f2
RB
2816 if (!has_stateful) {
2817 ovn_lflow_add(lflows, od, stage,
2818 acl->priority + OVN_ACL_PRI_OFFSET,
2819 acl->match, "next;");
2820 } else {
2821 struct ds match = DS_EMPTY_INITIALIZER;
2822
2823 /* Commit the connection tracking entry if it's a new
2824 * connection that matches this ACL. After this commit,
2825 * the reply traffic is allowed by a flow we create at
2826 * priority 65535, defined earlier.
2827 *
2828 * It's also possible that a known connection was marked for
2829 * deletion after a policy was deleted, but the policy was
2830 * re-added while that connection is still known. We catch
b73db61d 2831 * that case here and un-set ct_label.blocked (which will be done
cc58e1f2
RB
2832 * by ct_commit in the "stateful" stage) to indicate that the
2833 * connection should be allowed to resume.
2834 */
2835 ds_put_format(&match, "((ct.new && !ct.est)"
2836 " || (!ct.new && ct.est && !ct.rpl "
b73db61d 2837 "&& ct_label.blocked == 1)) "
cc58e1f2
RB
2838 "&& (%s)", acl->match);
2839 ovn_lflow_add(lflows, od, stage,
2840 acl->priority + OVN_ACL_PRI_OFFSET,
2841 ds_cstr(&match),
2842 REGBIT_CONNTRACK_COMMIT" = 1; next;");
2843
2844 /* Match on traffic in the request direction for an established
2845 * connection tracking entry that has not been marked for
2846 * deletion. There is no need to commit here, so we can just
2847 * proceed to the next table. We use this to ensure that this
2848 * connection is still allowed by the currently defined
2849 * policy. */
2850 ds_clear(&match);
2851 ds_put_format(&match,
2852 "!ct.new && ct.est && !ct.rpl"
b73db61d 2853 " && ct_label.blocked == 0 && (%s)",
cc58e1f2
RB
2854 acl->match);
2855 ovn_lflow_add(lflows, od, stage,
2856 acl->priority + OVN_ACL_PRI_OFFSET,
2857 ds_cstr(&match), "next;");
2858
2859 ds_destroy(&match);
2860 }
2861 } else if (!strcmp(acl->action, "drop")
2862 || !strcmp(acl->action, "reject")) {
78aab811
JP
2863 struct ds match = DS_EMPTY_INITIALIZER;
2864
cc58e1f2
RB
2865 /* XXX Need to support "reject", treat it as "drop;" for now. */
2866 if (!strcmp(acl->action, "reject")) {
2867 VLOG_INFO("reject is not a supported action");
2868 }
78aab811 2869
cc58e1f2
RB
2870 /* The implementation of "drop" differs if stateful ACLs are in
2871 * use for this datapath. In that case, the actions differ
2872 * depending on whether the connection was previously committed
2873 * to the connection tracker with ct_commit. */
2874 if (has_stateful) {
2875 /* If the packet is not part of an established connection, then
2876 * we can simply drop it. */
2877 ds_put_format(&match,
b73db61d 2878 "(!ct.est || (ct.est && ct_label.blocked == 1)) "
cc58e1f2
RB
2879 "&& (%s)",
2880 acl->match);
2881 ovn_lflow_add(lflows, od, stage, acl->priority +
2882 OVN_ACL_PRI_OFFSET, ds_cstr(&match), "drop;");
2883
2884 /* For an existing connection without ct_label set, we've
2885 * encountered a policy change. ACLs previously allowed
2886 * this connection and we committed the connection tracking
2887 * entry. Current policy says that we should drop this
2888 * connection. First, we set bit 0 of ct_label to indicate
2889 * that this connection is set for deletion. By not
2890 * specifying "next;", we implicitly drop the packet after
2891 * updating conntrack state. We would normally defer
2892 * ct_commit() to the "stateful" stage, but since we're
2893 * dropping the packet, we go ahead and do it here. */
2894 ds_clear(&match);
2895 ds_put_format(&match,
b73db61d 2896 "ct.est && ct_label.blocked == 0 && (%s)",
cc58e1f2
RB
2897 acl->match);
2898 ovn_lflow_add(lflows, od, stage,
2899 acl->priority + OVN_ACL_PRI_OFFSET,
2900 ds_cstr(&match), "ct_commit(ct_label=1/1);");
2901
2902 ds_destroy(&match);
2903 } else {
2904 /* There are no stateful ACLs in use on this datapath,
2905 * so a "drop" ACL is simply the "drop" logical flow action
2906 * in all cases. */
2907 ovn_lflow_add(lflows, od, stage,
2908 acl->priority + OVN_ACL_PRI_OFFSET,
2909 acl->match, "drop;");
2360b854 2910 ds_destroy(&match);
cc58e1f2 2911 }
78aab811
JP
2912 }
2913 }
281977f7
NS
2914
2915 /* Add 34000 priority flow to allow DHCP reply from ovn-controller to all
2916 * logical ports of the datapath if the CMS has configured DHCPv4 options*/
052fa3ac
BP
2917 for (size_t i = 0; i < od->nbs->n_ports; i++) {
2918 if (od->nbs->ports[i]->dhcpv4_options) {
2919 const char *server_id = smap_get(
2920 &od->nbs->ports[i]->dhcpv4_options->options, "server_id");
2921 const char *server_mac = smap_get(
2922 &od->nbs->ports[i]->dhcpv4_options->options, "server_mac");
2923 const char *lease_time = smap_get(
2924 &od->nbs->ports[i]->dhcpv4_options->options, "lease_time");
b89d25e5 2925 if (server_id && server_mac && lease_time) {
052fa3ac
BP
2926 struct ds match = DS_EMPTY_INITIALIZER;
2927 const char *actions =
2928 has_stateful ? "ct_commit; next;" : "next;";
2929 ds_put_format(&match, "outport == \"%s\" && eth.src == %s "
2930 "&& ip4.src == %s && udp && udp.src == 67 "
2931 "&& udp.dst == 68", od->nbs->ports[i]->name,
2932 server_mac, server_id);
2933 ovn_lflow_add(
2934 lflows, od, S_SWITCH_OUT_ACL, 34000, ds_cstr(&match),
2935 actions);
75e82c17 2936 ds_destroy(&match);
281977f7 2937 }
052fa3ac 2938 }
33ac3c83 2939
052fa3ac
BP
2940 if (od->nbs->ports[i]->dhcpv6_options) {
2941 const char *server_mac = smap_get(
2942 &od->nbs->ports[i]->dhcpv6_options->options, "server_id");
2943 struct eth_addr ea;
2944 if (server_mac && eth_addr_from_string(server_mac, &ea)) {
2945 /* Get the link local IP of the DHCPv6 server from the
2946 * server MAC. */
2947 struct in6_addr lla;
2948 in6_generate_lla(ea, &lla);
2949
2950 char server_ip[INET6_ADDRSTRLEN + 1];
2951 ipv6_string_mapped(server_ip, &lla);
2952
2953 struct ds match = DS_EMPTY_INITIALIZER;
2954 const char *actions = has_stateful ? "ct_commit; next;" :
2955 "next;";
2956 ds_put_format(&match, "outport == \"%s\" && eth.src == %s "
2957 "&& ip6.src == %s && udp && udp.src == 547 "
2958 "&& udp.dst == 546", od->nbs->ports[i]->name,
2959 server_mac, server_ip);
2960 ovn_lflow_add(
2961 lflows, od, S_SWITCH_OUT_ACL, 34000, ds_cstr(&match),
2962 actions);
75e82c17 2963 ds_destroy(&match);
33ac3c83 2964 }
281977f7
NS
2965 }
2966 }
78aab811
JP
2967}
2968
1a03fc7d
BS
2969static void
2970build_qos(struct ovn_datapath *od, struct hmap *lflows) {
2971 ovn_lflow_add(lflows, od, S_SWITCH_IN_QOS_MARK, 0, "1", "next;");
2972 ovn_lflow_add(lflows, od, S_SWITCH_OUT_QOS_MARK, 0, "1", "next;");
2973
2974 for (size_t i = 0; i < od->nbs->n_qos_rules; i++) {
2975 struct nbrec_qos *qos = od->nbs->qos_rules[i];
2976 bool ingress = !strcmp(qos->direction, "from-lport") ? true :false;
2977 enum ovn_stage stage = ingress ? S_SWITCH_IN_QOS_MARK : S_SWITCH_OUT_QOS_MARK;
2978
2979 if (!strcmp(qos->key_action, "dscp")) {
2980 struct ds dscp_action = DS_EMPTY_INITIALIZER;
2981
2982 ds_put_format(&dscp_action, "ip.dscp = %d; next;",
2983 (uint8_t)qos->value_action);
2984 ovn_lflow_add(lflows, od, stage,
2985 qos->priority,
2986 qos->match, ds_cstr(&dscp_action));
2987 ds_destroy(&dscp_action);
2988 }
2989 }
2990}
2991
7a15be69
GS
2992static void
2993build_lb(struct ovn_datapath *od, struct hmap *lflows)
2994{
2995 /* Ingress and Egress LB Table (Priority 0): Packets are allowed by
2996 * default. */
2997 ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, 0, "1", "next;");
2998 ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, 0, "1", "next;");
2999
3000 if (od->nbs->load_balancer) {
3001 /* Ingress and Egress LB Table (Priority 65535).
3002 *
3003 * Send established traffic through conntrack for just NAT. */
3004 ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, UINT16_MAX,
3005 "ct.est && !ct.rel && !ct.new && !ct.inv",
3006 REGBIT_CONNTRACK_NAT" = 1; next;");
3007 ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, UINT16_MAX,
3008 "ct.est && !ct.rel && !ct.new && !ct.inv",
3009 REGBIT_CONNTRACK_NAT" = 1; next;");
3010 }
3011}
3012
fa313a8c
GS
3013static void
3014build_stateful(struct ovn_datapath *od, struct hmap *lflows)
3015{
3016 /* Ingress and Egress stateful Table (Priority 0): Packets are
3017 * allowed by default. */
3018 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 0, "1", "next;");
3019 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 0, "1", "next;");
3020
3021 /* If REGBIT_CONNTRACK_COMMIT is set as 1, then the packets should be
b73db61d 3022 * committed to conntrack. We always set ct_label.blocked to 0 here as
cc58e1f2
RB
3023 * any packet that makes it this far is part of a connection we
3024 * want to allow to continue. */
fa313a8c 3025 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100,
cc58e1f2 3026 REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit(ct_label=0/1); next;");
fa313a8c 3027 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100,
cc58e1f2 3028 REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit(ct_label=0/1); next;");
7a15be69
GS
3029
3030 /* If REGBIT_CONNTRACK_NAT is set as 1, then packets should just be sent
3031 * through nat (without committing).
3032 *
3033 * REGBIT_CONNTRACK_COMMIT is set for new connections and
3034 * REGBIT_CONNTRACK_NAT is set for established connections. So they
3035 * don't overlap.
3036 */
3037 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100,
3038 REGBIT_CONNTRACK_NAT" == 1", "ct_lb;");
3039 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100,
3040 REGBIT_CONNTRACK_NAT" == 1", "ct_lb;");
3041
3042 /* Load balancing rules for new connections get committed to conntrack
3043 * table. So even if REGBIT_CONNTRACK_COMMIT is set in a previous table
3044 * a higher priority rule for load balancing below also commits the
3045 * connection, so it is okay if we do not hit the above match on
3046 * REGBIT_CONNTRACK_COMMIT. */
61591ad9
GS
3047 for (int i = 0; i < od->nbs->n_load_balancer; i++) {
3048 struct nbrec_load_balancer *lb = od->nbs->load_balancer[i];
7a15be69
GS
3049 struct smap *vips = &lb->vips;
3050 struct smap_node *node;
3051
3052 SMAP_FOR_EACH (node, vips) {
3053 uint16_t port = 0;
3054
3055 /* node->key contains IP:port or just IP. */
3056 char *ip_address = NULL;
3057 ip_address_and_port_from_lb_key(node->key, &ip_address, &port);
3058 if (!ip_address) {
3059 continue;
3060 }
3061
3062 /* New connections in Ingress table. */
3063 char *action = xasprintf("ct_lb(%s);", node->value);
3064 struct ds match = DS_EMPTY_INITIALIZER;
9784ffaf 3065 ds_put_format(&match, "ct.new && ip4.dst == %s", ip_address);
7a15be69
GS
3066 if (port) {
3067 if (lb->protocol && !strcmp(lb->protocol, "udp")) {
9784ffaf 3068 ds_put_format(&match, " && udp.dst == %d", port);
7a15be69 3069 } else {
9784ffaf 3070 ds_put_format(&match, " && tcp.dst == %d", port);
7a15be69
GS
3071 }
3072 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL,
3073 120, ds_cstr(&match), action);
3074 } else {
3075 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL,
3076 110, ds_cstr(&match), action);
3077 }
3078
7443e4ec 3079 free(ip_address);
7a15be69
GS
3080 ds_destroy(&match);
3081 free(action);
3082 }
3083 }
fa313a8c
GS
3084}
3085
bd39395f 3086static void
9975d7be
BP
3087build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
3088 struct hmap *lflows, struct hmap *mcgroups)
bd39395f 3089{
5cff6b99
BP
3090 /* This flow table structure is documented in ovn-northd(8), so please
3091 * update ovn-northd.8.xml if you change anything. */
3092
09b39248
JP
3093 struct ds match = DS_EMPTY_INITIALIZER;
3094 struct ds actions = DS_EMPTY_INITIALIZER;
3095
9975d7be 3096 /* Build pre-ACL and ACL tables for both ingress and egress.
1a03fc7d 3097 * Ingress tables 3 through 9. Egress tables 0 through 6. */
5868eb24
BP
3098 struct ovn_datapath *od;
3099 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
3100 if (!od->nbs) {
3101 continue;
3102 }
3103
9ab989b7 3104 build_pre_acls(od, lflows);
7a15be69 3105 build_pre_lb(od, lflows);
facf8652 3106 build_pre_stateful(od, lflows);
2d018f9b 3107 build_acls(od, lflows);
1a03fc7d 3108 build_qos(od, lflows);
7a15be69 3109 build_lb(od, lflows);
fa313a8c 3110 build_stateful(od, lflows);
9975d7be
BP
3111 }
3112
3113 /* Logical switch ingress table 0: Admission control framework (priority
3114 * 100). */
3115 HMAP_FOR_EACH (od, key_node, datapaths) {
3116 if (!od->nbs) {
3117 continue;
3118 }
3119
bd39395f 3120 /* Logical VLANs not supported. */
685f4dfe 3121 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "vlan.present",
091e3af9 3122 "drop;");
bd39395f
BP
3123
3124 /* Broadcast/multicast source address is invalid. */
685f4dfe 3125 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "eth.src[40]",
091e3af9 3126 "drop;");
bd39395f 3127
35060cdc
BP
3128 /* Port security flows have priority 50 (see below) and will continue
3129 * to the next table if packet source is acceptable. */
bd39395f
BP
3130 }
3131
685f4dfe
NS
3132 /* Logical switch ingress table 0: Ingress port security - L2
3133 * (priority 50).
3134 * Ingress table 1: Ingress port security - IP (priority 90 and 80)
3135 * Ingress table 2: Ingress port security - ND (priority 90 and 80)
3136 */
5868eb24
BP
3137 struct ovn_port *op;
3138 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 3139 if (!op->nbsp) {
9975d7be
BP
3140 continue;
3141 }
3142
0ee00741 3143 if (!lsp_is_enabled(op->nbsp)) {
96af668a
BP
3144 /* Drop packets from disabled logical ports (since logical flow
3145 * tables are default-drop). */
3146 continue;
3147 }
3148
09b39248 3149 ds_clear(&match);
a6095f81 3150 ds_clear(&actions);
9975d7be 3151 ds_put_format(&match, "inport == %s", op->json_key);
e93b43d6
JP
3152 build_port_security_l2("eth.src", op->ps_addrs, op->n_ps_addrs,
3153 &match);
a6095f81
BS
3154
3155 const char *queue_id = smap_get(&op->sb->options, "qdisc_queue_id");
3156 if (queue_id) {
3157 ds_put_format(&actions, "set_queue(%s); ", queue_id);
3158 }
3159 ds_put_cstr(&actions, "next;");
685f4dfe 3160 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_L2, 50,
a6095f81 3161 ds_cstr(&match), ds_cstr(&actions));
685f4dfe 3162
0ee00741 3163 if (op->nbsp->n_port_security) {
685f4dfe
NS
3164 build_port_security_ip(P_IN, op, lflows);
3165 build_port_security_nd(op, lflows);
3166 }
3167 }
3168
3169 /* Ingress table 1 and 2: Port security - IP and ND, by default goto next.
3170 * (priority 0)*/
3171 HMAP_FOR_EACH (od, key_node, datapaths) {
3172 if (!od->nbs) {
3173 continue;
3174 }
3175
3176 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_ND, 0, "1", "next;");
3177 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_IP, 0, "1", "next;");
5868eb24 3178 }
445a266a 3179
1a03fc7d 3180 /* Ingress table 10: ARP/ND responder, skip requests coming from localnet
0b077281
RR
3181 * and vtep ports. (priority 100); see ovn-northd.8.xml for the
3182 * rationale. */
fa128126 3183 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 3184 if (!op->nbsp) {
fa128126
HZ
3185 continue;
3186 }
3187
0b077281
RR
3188 if ((!strcmp(op->nbsp->type, "localnet")) ||
3189 (!strcmp(op->nbsp->type, "vtep"))) {
09b39248
JP
3190 ds_clear(&match);
3191 ds_put_format(&match, "inport == %s", op->json_key);
e75451fe 3192 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100,
09b39248 3193 ds_cstr(&match), "next;");
fa128126
HZ
3194 }
3195 }
3196
1a03fc7d 3197 /* Ingress table 10: ARP/ND responder, reply for known IPs.
fa128126 3198 * (priority 50). */
57d143eb 3199 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 3200 if (!op->nbsp) {
57d143eb
HZ
3201 continue;
3202 }
3203
4c7bf534 3204 /*
e75451fe 3205 * Add ARP/ND reply flows if either the
4c7bf534
NS
3206 * - port is up or
3207 * - port type is router
3208 */
0ee00741 3209 if (!lsp_is_up(op->nbsp) && strcmp(op->nbsp->type, "router")) {
4c7bf534
NS
3210 continue;
3211 }
3212
e93b43d6
JP
3213 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
3214 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
09b39248 3215 ds_clear(&match);
e93b43d6
JP
3216 ds_put_format(&match, "arp.tpa == %s && arp.op == 1",
3217 op->lsp_addrs[i].ipv4_addrs[j].addr_s);
09b39248
JP
3218 ds_clear(&actions);
3219 ds_put_format(&actions,
57d143eb 3220 "eth.dst = eth.src; "
e93b43d6 3221 "eth.src = %s; "
57d143eb
HZ
3222 "arp.op = 2; /* ARP reply */ "
3223 "arp.tha = arp.sha; "
e93b43d6 3224 "arp.sha = %s; "
57d143eb 3225 "arp.tpa = arp.spa; "
e93b43d6 3226 "arp.spa = %s; "
57d143eb 3227 "outport = inport; "
bf143492 3228 "flags.loopback = 1; "
57d143eb 3229 "output;",
e93b43d6
JP
3230 op->lsp_addrs[i].ea_s, op->lsp_addrs[i].ea_s,
3231 op->lsp_addrs[i].ipv4_addrs[j].addr_s);
e75451fe 3232 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50,
09b39248 3233 ds_cstr(&match), ds_cstr(&actions));
9fcb6a18
BP
3234
3235 /* Do not reply to an ARP request from the port that owns the
3236 * address (otherwise a DHCP client that ARPs to check for a
3237 * duplicate address will fail). Instead, forward it the usual
3238 * way.
3239 *
3240 * (Another alternative would be to simply drop the packet. If
3241 * everything is working as it is configured, then this would
3242 * produce equivalent results, since no one should reply to the
3243 * request. But ARPing for one's own IP address is intended to
3244 * detect situations where the network is not working as
3245 * configured, so dropping the request would frustrate that
3246 * intent.) */
3247 ds_put_format(&match, " && inport == %s", op->json_key);
3248 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100,
3249 ds_cstr(&match), "next;");
57d143eb 3250 }
7dc88496 3251
6fdb7cd6
JP
3252 /* For ND solicitations, we need to listen for both the
3253 * unicast IPv6 address and its all-nodes multicast address,
3254 * but always respond with the unicast IPv6 address. */
3255 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
09b39248 3256 ds_clear(&match);
6fdb7cd6
JP
3257 ds_put_format(&match,
3258 "nd_ns && ip6.dst == {%s, %s} && nd.target == %s",
3259 op->lsp_addrs[i].ipv6_addrs[j].addr_s,
3260 op->lsp_addrs[i].ipv6_addrs[j].sn_addr_s,
3261 op->lsp_addrs[i].ipv6_addrs[j].addr_s);
3262
09b39248
JP
3263 ds_clear(&actions);
3264 ds_put_format(&actions,
6fdb7cd6
JP
3265 "nd_na { "
3266 "eth.src = %s; "
3267 "ip6.src = %s; "
3268 "nd.target = %s; "
3269 "nd.tll = %s; "
3270 "outport = inport; "
bf143492 3271 "flags.loopback = 1; "
6fdb7cd6
JP
3272 "output; "
3273 "};",
3274 op->lsp_addrs[i].ea_s,
3275 op->lsp_addrs[i].ipv6_addrs[j].addr_s,
3276 op->lsp_addrs[i].ipv6_addrs[j].addr_s,
3277 op->lsp_addrs[i].ea_s);
e75451fe 3278 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50,
09b39248 3279 ds_cstr(&match), ds_cstr(&actions));
9fcb6a18
BP
3280
3281 /* Do not reply to a solicitation from the port that owns the
3282 * address (otherwise DAD detection will fail). */
3283 ds_put_format(&match, " && inport == %s", op->json_key);
3284 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100,
3285 ds_cstr(&match), "next;");
e75451fe 3286 }
57d143eb
HZ
3287 }
3288 }
3289
1a03fc7d 3290 /* Ingress table 10: ARP/ND responder, by default goto next.
fa128126
HZ
3291 * (priority 0)*/
3292 HMAP_FOR_EACH (od, key_node, datapaths) {
3293 if (!od->nbs) {
3294 continue;
3295 }
3296
e75451fe 3297 ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_ND_RSP, 0, "1", "next;");
fa128126
HZ
3298 }
3299
1a03fc7d 3300 /* Logical switch ingress table 11 and 12: DHCP options and response
281977f7
NS
3301 * priority 100 flows. */
3302 HMAP_FOR_EACH (op, key_node, ports) {
3303 if (!op->nbsp) {
3304 continue;
3305 }
3306
3307 if (!lsp_is_enabled(op->nbsp) || !strcmp(op->nbsp->type, "router")) {
3308 /* Don't add the DHCP flows if the port is not enabled or if the
3309 * port is a router port. */
3310 continue;
3311 }
3312
33ac3c83
NS
3313 if (!op->nbsp->dhcpv4_options && !op->nbsp->dhcpv6_options) {
3314 /* CMS has disabled both native DHCPv4 and DHCPv6 for this lport.
3315 */
281977f7
NS
3316 continue;
3317 }
3318
3319 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
3320 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
3321 struct ds options_action = DS_EMPTY_INITIALIZER;
3322 struct ds response_action = DS_EMPTY_INITIALIZER;
213615b3 3323 struct ds ipv4_addr_match = DS_EMPTY_INITIALIZER;
281977f7
NS
3324 if (build_dhcpv4_action(
3325 op, op->lsp_addrs[i].ipv4_addrs[j].addr,
213615b3 3326 &options_action, &response_action, &ipv4_addr_match)) {
281977f7
NS
3327 struct ds match = DS_EMPTY_INITIALIZER;
3328 ds_put_format(
3329 &match, "inport == %s && eth.src == %s && "
3330 "ip4.src == 0.0.0.0 && ip4.dst == 255.255.255.255 && "
3331 "udp.src == 68 && udp.dst == 67", op->json_key,
3332 op->lsp_addrs[i].ea_s);
3333
3334 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS,
3335 100, ds_cstr(&match),
3336 ds_cstr(&options_action));
213615b3
NS
3337 ds_clear(&match);
3338 /* Allow ip4.src = OFFER_IP and
3339 * ip4.dst = {SERVER_IP, 255.255.255.255} for the below
3340 * cases
3341 * - When the client wants to renew the IP by sending
3342 * the DHCPREQUEST to the server ip.
3343 * - When the client wants to renew the IP by
3344 * broadcasting the DHCPREQUEST.
3345 */
3346 ds_put_format(
3347 &match, "inport == %s && eth.src == %s && "
3348 "%s && udp.src == 68 && udp.dst == 67", op->json_key,
3349 op->lsp_addrs[i].ea_s, ds_cstr(&ipv4_addr_match));
3350
3351 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS,
3352 100, ds_cstr(&match),
3353 ds_cstr(&options_action));
3354 ds_clear(&match);
3355
281977f7 3356 /* If REGBIT_DHCP_OPTS_RESULT is set, it means the
213615b3
NS
3357 * put_dhcp_opts action is successful. */
3358 ds_put_format(
3359 &match, "inport == %s && eth.src == %s && "
3360 "ip4 && udp.src == 68 && udp.dst == 67"
3361 " && "REGBIT_DHCP_OPTS_RESULT, op->json_key,
3362 op->lsp_addrs[i].ea_s);
281977f7
NS
3363 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_RESPONSE,
3364 100, ds_cstr(&match),
3365 ds_cstr(&response_action));
3366 ds_destroy(&match);
3367 ds_destroy(&options_action);
3368 ds_destroy(&response_action);
213615b3 3369 ds_destroy(&ipv4_addr_match);
281977f7
NS
3370 break;
3371 }
3372 }
33ac3c83
NS
3373
3374 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
3375 struct ds options_action = DS_EMPTY_INITIALIZER;
3376 struct ds response_action = DS_EMPTY_INITIALIZER;
3377 if (build_dhcpv6_action(
3378 op, &op->lsp_addrs[i].ipv6_addrs[j].addr,
3379 &options_action, &response_action)) {
3380 struct ds match = DS_EMPTY_INITIALIZER;
3381 ds_put_format(
3382 &match, "inport == %s && eth.src == %s"
3383 " && ip6.dst == ff02::1:2 && udp.src == 546 &&"
3384 " udp.dst == 547", op->json_key,
3385 op->lsp_addrs[i].ea_s);
3386
3387 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS, 100,
3388 ds_cstr(&match), ds_cstr(&options_action));
3389
3390 /* If REGBIT_DHCP_OPTS_RESULT is set to 1, it means the
3391 * put_dhcpv6_opts action is successful */
3392 ds_put_cstr(&match, " && "REGBIT_DHCP_OPTS_RESULT);
3393 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_RESPONSE, 100,
3394 ds_cstr(&match), ds_cstr(&response_action));
3395 ds_destroy(&match);
3396 ds_destroy(&options_action);
3397 ds_destroy(&response_action);
3398 break;
3399 }
3400 }
281977f7
NS
3401 }
3402 }
3403
1a03fc7d 3404 /* Ingress table 11 and 12: DHCP options and response, by default goto next.
281977f7
NS
3405 * (priority 0). */
3406
3407 HMAP_FOR_EACH (od, key_node, datapaths) {
3408 if (!od->nbs) {
3409 continue;
3410 }
3411
3412 ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_OPTIONS, 0, "1", "next;");
3413 ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_RESPONSE, 0, "1", "next;");
3414 }
3415
1a03fc7d 3416 /* Ingress table 13: Destination lookup, broadcast and multicast handling
5868eb24
BP
3417 * (priority 100). */
3418 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 3419 if (!op->nbsp) {
9975d7be
BP
3420 continue;
3421 }
3422
0ee00741 3423 if (lsp_is_enabled(op->nbsp)) {
9975d7be 3424 ovn_multicast_add(mcgroups, &mc_flood, op);
445a266a 3425 }
5868eb24
BP
3426 }
3427 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
3428 if (!od->nbs) {
3429 continue;
3430 }
3431
3432 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 100, "eth.mcast",
5868eb24 3433 "outport = \""MC_FLOOD"\"; output;");
bd39395f 3434 }
bd39395f 3435
1a03fc7d 3436 /* Ingress table 13: Destination lookup, unicast handling (priority 50), */
5868eb24 3437 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 3438 if (!op->nbsp) {
9975d7be
BP
3439 continue;
3440 }
3441
0ee00741 3442 for (size_t i = 0; i < op->nbsp->n_addresses; i++) {
10c3fcdf 3443 /* Addresses are owned by the logical port.
3444 * Ethernet address followed by zero or more IPv4
3445 * or IPv6 addresses (or both). */
74ff3298 3446 struct eth_addr mac;
10c3fcdf 3447 if (ovs_scan(op->nbsp->addresses[i],
3448 ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) {
09b39248 3449 ds_clear(&match);
9975d7be
BP
3450 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
3451 ETH_ADDR_ARGS(mac));
5868eb24 3452
09b39248 3453 ds_clear(&actions);
9975d7be
BP
3454 ds_put_format(&actions, "outport = %s; output;", op->json_key);
3455 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
5868eb24 3456 ds_cstr(&match), ds_cstr(&actions));
0ee00741
HK
3457 } else if (!strcmp(op->nbsp->addresses[i], "unknown")) {
3458 if (lsp_is_enabled(op->nbsp)) {
9975d7be 3459 ovn_multicast_add(mcgroups, &mc_unknown, op);
96af668a
BP
3460 op->od->has_unknown = true;
3461 }
6374d518 3462 } else if (is_dynamic_lsp_address(op->nbsp->addresses[i])) {
8639f9be 3463 if (!op->nbsp->dynamic_addresses
10c3fcdf 3464 || !ovs_scan(op->nbsp->dynamic_addresses,
3465 ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) {
8639f9be
ND
3466 continue;
3467 }
3468 ds_clear(&match);
3469 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
3470 ETH_ADDR_ARGS(mac));
3471
3472 ds_clear(&actions);
3473 ds_put_format(&actions, "outport = %s; output;", op->json_key);
3474 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
20418099
MS
3475 ds_cstr(&match), ds_cstr(&actions));
3476 } else if (!strcmp(op->nbsp->addresses[i], "router")) {
3477 if (!op->peer || !op->peer->nbrp
3478 || !ovs_scan(op->peer->nbrp->mac,
3479 ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) {
3480 continue;
3481 }
3482 ds_clear(&match);
3483 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
3484 ETH_ADDR_ARGS(mac));
41a15b71
MS
3485 if (op->peer->od->l3dgw_port
3486 && op->peer == op->peer->od->l3dgw_port
3487 && op->peer->od->l3redirect_port) {
3488 /* The destination lookup flow for the router's
3489 * distributed gateway port MAC address should only be
3490 * programmed on the "redirect-chassis". */
3491 ds_put_format(&match, " && is_chassis_resident(%s)",
3492 op->peer->od->l3redirect_port->json_key);
3493 }
20418099
MS
3494
3495 ds_clear(&actions);
3496 ds_put_format(&actions, "outport = %s; output;", op->json_key);
3497 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
8639f9be 3498 ds_cstr(&match), ds_cstr(&actions));
06a26dd2
MS
3499
3500 /* Add ethernet addresses specified in NAT rules on
3501 * distributed logical routers. */
3502 if (op->peer->od->l3dgw_port
3503 && op->peer == op->peer->od->l3dgw_port) {
3504 for (int i = 0; i < op->peer->od->nbr->n_nat; i++) {
3505 const struct nbrec_nat *nat
3506 = op->peer->od->nbr->nat[i];
3507 if (!strcmp(nat->type, "dnat_and_snat")
3508 && nat->logical_port && nat->external_mac
3509 && eth_addr_from_string(nat->external_mac, &mac)) {
3510
3511 ds_clear(&match);
3512 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT
3513 " && is_chassis_resident(\"%s\")",
3514 ETH_ADDR_ARGS(mac),
3515 nat->logical_port);
3516
3517 ds_clear(&actions);
3518 ds_put_format(&actions, "outport = %s; output;",
3519 op->json_key);
3520 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP,
3521 50, ds_cstr(&match),
3522 ds_cstr(&actions));
3523 }
3524 }
3525 }
5868eb24
BP
3526 } else {
3527 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
445a266a 3528
2fa326a3
BP
3529 VLOG_INFO_RL(&rl,
3530 "%s: invalid syntax '%s' in addresses column",
0ee00741 3531 op->nbsp->name, op->nbsp->addresses[i]);
445a266a
BP
3532 }
3533 }
bd39395f
BP
3534 }
3535
1a03fc7d 3536 /* Ingress table 13: Destination lookup for unknown MACs (priority 0). */
5868eb24 3537 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
3538 if (!od->nbs) {
3539 continue;
3540 }
3541
5868eb24 3542 if (od->has_unknown) {
9975d7be 3543 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1",
5868eb24 3544 "outport = \""MC_UNKNOWN"\"; output;");
445a266a 3545 }
bd39395f
BP
3546 }
3547
94300e09
JP
3548 /* Egress tables 6: Egress port security - IP (priority 0)
3549 * Egress table 7: Egress port security L2 - multicast/broadcast
3550 * (priority 100). */
5868eb24 3551 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
3552 if (!od->nbs) {
3553 continue;
3554 }
3555
685f4dfe
NS
3556 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_IP, 0, "1", "next;");
3557 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_L2, 100, "eth.mcast",
091e3af9 3558 "output;");
48f42f3a
RB
3559 }
3560
94300e09 3561 /* Egress table 6: Egress port security - IP (priorities 90 and 80)
685f4dfe
NS
3562 * if port security enabled.
3563 *
94300e09 3564 * Egress table 7: Egress port security - L2 (priorities 50 and 150).
d770a830
BP
3565 *
3566 * Priority 50 rules implement port security for enabled logical port.
3567 *
3568 * Priority 150 rules drop packets to disabled logical ports, so that they
3569 * don't even receive multicast or broadcast packets. */
5868eb24 3570 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 3571 if (!op->nbsp) {
9975d7be
BP
3572 continue;
3573 }
3574
09b39248 3575 ds_clear(&match);
9975d7be 3576 ds_put_format(&match, "outport == %s", op->json_key);
0ee00741 3577 if (lsp_is_enabled(op->nbsp)) {
e93b43d6
JP
3578 build_port_security_l2("eth.dst", op->ps_addrs, op->n_ps_addrs,
3579 &match);
685f4dfe 3580 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 50,
d770a830
BP
3581 ds_cstr(&match), "output;");
3582 } else {
685f4dfe 3583 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 150,
d770a830
BP
3584 ds_cstr(&match), "drop;");
3585 }
eb00399e 3586
0ee00741 3587 if (op->nbsp->n_port_security) {
685f4dfe
NS
3588 build_port_security_ip(P_OUT, op, lflows);
3589 }
eb00399e 3590 }
09b39248
JP
3591
3592 ds_destroy(&match);
3593 ds_destroy(&actions);
9975d7be 3594}
eb00399e 3595
9975d7be
BP
3596static bool
3597lrport_is_enabled(const struct nbrec_logical_router_port *lrport)
3598{
3599 return !lrport->enabled || *lrport->enabled;
3600}
3601
4685e523
JP
3602/* Returns a string of the IP address of the router port 'op' that
3603 * overlaps with 'ip_s". If one is not found, returns NULL.
3604 *
3605 * The caller must not free the returned string. */
3606static const char *
3607find_lrp_member_ip(const struct ovn_port *op, const char *ip_s)
3608{
6fdb7cd6 3609 bool is_ipv4 = strchr(ip_s, '.') ? true : false;
4685e523 3610
6fdb7cd6
JP
3611 if (is_ipv4) {
3612 ovs_be32 ip;
4685e523 3613
6fdb7cd6
JP
3614 if (!ip_parse(ip_s, &ip)) {
3615 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3616 VLOG_WARN_RL(&rl, "bad ip address %s", ip_s);
3617 return NULL;
3618 }
4685e523 3619
6fdb7cd6
JP
3620 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
3621 const struct ipv4_netaddr *na = &op->lrp_networks.ipv4_addrs[i];
3622
3623 if (!((na->network ^ ip) & na->mask)) {
3624 /* There should be only 1 interface that matches the
3625 * supplied IP. Otherwise, it's a configuration error,
3626 * because subnets of a router's interfaces should NOT
3627 * overlap. */
3628 return na->addr_s;
3629 }
3630 }
3631 } else {
3632 struct in6_addr ip6;
3633
3634 if (!ipv6_parse(ip_s, &ip6)) {
3635 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3636 VLOG_WARN_RL(&rl, "bad ipv6 address %s", ip_s);
3637 return NULL;
3638 }
3639
3640 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
3641 const struct ipv6_netaddr *na = &op->lrp_networks.ipv6_addrs[i];
3642 struct in6_addr xor_addr = ipv6_addr_bitxor(&na->network, &ip6);
3643 struct in6_addr and_addr = ipv6_addr_bitand(&xor_addr, &na->mask);
3644
3645 if (ipv6_is_zero(&and_addr)) {
3646 /* There should be only 1 interface that matches the
3647 * supplied IP. Otherwise, it's a configuration error,
3648 * because subnets of a router's interfaces should NOT
3649 * overlap. */
3650 return na->addr_s;
3651 }
4685e523
JP
3652 }
3653 }
3654
3655 return NULL;
3656}
3657
9975d7be 3658static void
0bac7164 3659add_route(struct hmap *lflows, const struct ovn_port *op,
4685e523 3660 const char *lrp_addr_s, const char *network_s, int plen,
440a9f4b 3661 const char *gateway, const char *policy)
9975d7be 3662{
6fdb7cd6 3663 bool is_ipv4 = strchr(network_s, '.') ? true : false;
a63f7235 3664 struct ds match = DS_EMPTY_INITIALIZER;
440a9f4b
GS
3665 const char *dir;
3666 uint16_t priority;
3667
3668 if (policy && !strcmp(policy, "src-ip")) {
3669 dir = "src";
3670 priority = plen * 2;
3671 } else {
3672 dir = "dst";
3673 priority = (plen * 2) + 1;
3674 }
6fdb7cd6 3675
a63f7235
JP
3676 /* IPv6 link-local addresses must be scoped to the local router port. */
3677 if (!is_ipv4) {
3678 struct in6_addr network;
3679 ovs_assert(ipv6_parse(network_s, &network));
3680 if (in6_is_lla(&network)) {
3681 ds_put_format(&match, "inport == %s && ", op->json_key);
3682 }
3683 }
440a9f4b 3684 ds_put_format(&match, "ip%s.%s == %s/%d", is_ipv4 ? "4" : "6", dir,
a63f7235 3685 network_s, plen);
9975d7be
BP
3686
3687 struct ds actions = DS_EMPTY_INITIALIZER;
6fdb7cd6
JP
3688 ds_put_format(&actions, "ip.ttl--; %sreg0 = ", is_ipv4 ? "" : "xx");
3689
9975d7be 3690 if (gateway) {
c9bdf7bd 3691 ds_put_cstr(&actions, gateway);
9975d7be 3692 } else {
6fdb7cd6 3693 ds_put_format(&actions, "ip%s.dst", is_ipv4 ? "4" : "6");
9975d7be 3694 }
4685e523 3695 ds_put_format(&actions, "; "
6fdb7cd6 3696 "%sreg1 = %s; "
4685e523 3697 "eth.src = %s; "
0bac7164 3698 "outport = %s; "
bf143492 3699 "flags.loopback = 1; "
0bac7164 3700 "next;",
6fdb7cd6 3701 is_ipv4 ? "" : "xx",
4685e523
JP
3702 lrp_addr_s,
3703 op->lrp_networks.ea_s,
3704 op->json_key);
9975d7be
BP
3705
3706 /* The priority here is calculated to implement longest-prefix-match
3707 * routing. */
440a9f4b 3708 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_ROUTING, priority,
a63f7235
JP
3709 ds_cstr(&match), ds_cstr(&actions));
3710 ds_destroy(&match);
9975d7be 3711 ds_destroy(&actions);
9975d7be
BP
3712}
3713
28dc3fe9
SR
3714static void
3715build_static_route_flow(struct hmap *lflows, struct ovn_datapath *od,
3716 struct hmap *ports,
3717 const struct nbrec_logical_router_static_route *route)
3718{
6fdb7cd6 3719 ovs_be32 nexthop;
4685e523 3720 const char *lrp_addr_s;
6fdb7cd6
JP
3721 unsigned int plen;
3722 bool is_ipv4;
28dc3fe9 3723
6fdb7cd6
JP
3724 /* Verify that the next hop is an IP address with an all-ones mask. */
3725 char *error = ip_parse_cidr(route->nexthop, &nexthop, &plen);
3726 if (!error) {
3727 if (plen != 32) {
3728 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3729 VLOG_WARN_RL(&rl, "bad next hop mask %s", route->nexthop);
3730 return;
3731 }
3732 is_ipv4 = true;
3733 } else {
28dc3fe9 3734 free(error);
6fdb7cd6
JP
3735
3736 struct in6_addr ip6;
3737 char *error = ipv6_parse_cidr(route->nexthop, &ip6, &plen);
3738 if (!error) {
3739 if (plen != 128) {
3740 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3741 VLOG_WARN_RL(&rl, "bad next hop mask %s", route->nexthop);
3742 return;
3743 }
3744 is_ipv4 = false;
3745 } else {
3746 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3747 VLOG_WARN_RL(&rl, "bad next hop ip address %s", route->nexthop);
3748 free(error);
3749 return;
3750 }
28dc3fe9
SR
3751 }
3752
6fdb7cd6
JP
3753 char *prefix_s;
3754 if (is_ipv4) {
3755 ovs_be32 prefix;
3756 /* Verify that ip prefix is a valid IPv4 address. */
3757 error = ip_parse_cidr(route->ip_prefix, &prefix, &plen);
3758 if (error) {
3759 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3760 VLOG_WARN_RL(&rl, "bad 'ip_prefix' in static routes %s",
3761 route->ip_prefix);
3762 free(error);
3763 return;
3764 }
3765 prefix_s = xasprintf(IP_FMT, IP_ARGS(prefix & be32_prefix_mask(plen)));
3766 } else {
3767 /* Verify that ip prefix is a valid IPv6 address. */
3768 struct in6_addr prefix;
3769 error = ipv6_parse_cidr(route->ip_prefix, &prefix, &plen);
3770 if (error) {
3771 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3772 VLOG_WARN_RL(&rl, "bad 'ip_prefix' in static routes %s",
3773 route->ip_prefix);
3774 free(error);
3775 return;
3776 }
3777 struct in6_addr mask = ipv6_create_mask(plen);
3778 struct in6_addr network = ipv6_addr_bitand(&prefix, &mask);
3779 prefix_s = xmalloc(INET6_ADDRSTRLEN);
3780 inet_ntop(AF_INET6, &network, prefix_s, INET6_ADDRSTRLEN);
28dc3fe9
SR
3781 }
3782
3783 /* Find the outgoing port. */
3784 struct ovn_port *out_port = NULL;
3785 if (route->output_port) {
3786 out_port = ovn_port_find(ports, route->output_port);
3787 if (!out_port) {
3788 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3789 VLOG_WARN_RL(&rl, "Bad out port %s for static route %s",
3790 route->output_port, route->ip_prefix);
6fdb7cd6 3791 goto free_prefix_s;
28dc3fe9 3792 }
4685e523 3793 lrp_addr_s = find_lrp_member_ip(out_port, route->nexthop);
28dc3fe9
SR
3794 } else {
3795 /* output_port is not specified, find the
3796 * router port matching the next hop. */
3797 int i;
3798 for (i = 0; i < od->nbr->n_ports; i++) {
3799 struct nbrec_logical_router_port *lrp = od->nbr->ports[i];
3800 out_port = ovn_port_find(ports, lrp->name);
3801 if (!out_port) {
3802 /* This should not happen. */
3803 continue;
3804 }
3805
4685e523
JP
3806 lrp_addr_s = find_lrp_member_ip(out_port, route->nexthop);
3807 if (lrp_addr_s) {
28dc3fe9
SR
3808 break;
3809 }
3810 }
28dc3fe9
SR
3811 }
3812
4685e523
JP
3813 if (!lrp_addr_s) {
3814 /* There is no matched out port. */
3815 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3816 VLOG_WARN_RL(&rl, "No path for static route %s; next hop %s",
3817 route->ip_prefix, route->nexthop);
6fdb7cd6 3818 goto free_prefix_s;
4685e523
JP
3819 }
3820
440a9f4b
GS
3821 char *policy = route->policy ? route->policy : "dst-ip";
3822 add_route(lflows, out_port, lrp_addr_s, prefix_s, plen, route->nexthop,
3823 policy);
6fdb7cd6
JP
3824
3825free_prefix_s:
c9bdf7bd 3826 free(prefix_s);
28dc3fe9
SR
3827}
3828
4685e523 3829static void
6fdb7cd6 3830op_put_v4_networks(struct ds *ds, const struct ovn_port *op, bool add_bcast)
4685e523
JP
3831{
3832 if (!add_bcast && op->lrp_networks.n_ipv4_addrs == 1) {
3833 ds_put_format(ds, "%s", op->lrp_networks.ipv4_addrs[0].addr_s);
3834 return;
3835 }
3836
3837 ds_put_cstr(ds, "{");
3838 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
3839 ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].addr_s);
3840 if (add_bcast) {
3841 ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].bcast_s);
3842 }
3843 }
3844 ds_chomp(ds, ' ');
3845 ds_chomp(ds, ',');
3846 ds_put_cstr(ds, "}");
3847}
3848
6fdb7cd6
JP
3849static void
3850op_put_v6_networks(struct ds *ds, const struct ovn_port *op)
3851{
3852 if (op->lrp_networks.n_ipv6_addrs == 1) {
3853 ds_put_format(ds, "%s", op->lrp_networks.ipv6_addrs[0].addr_s);
3854 return;
3855 }
3856
3857 ds_put_cstr(ds, "{");
3858 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
3859 ds_put_format(ds, "%s, ", op->lrp_networks.ipv6_addrs[i].addr_s);
3860 }
3861 ds_chomp(ds, ' ');
3862 ds_chomp(ds, ',');
3863 ds_put_cstr(ds, "}");
3864}
3865
65d8810c
GS
3866static const char *
3867get_force_snat_ip(struct ovn_datapath *od, const char *key_type, ovs_be32 *ip)
3868{
3869 char *key = xasprintf("%s_force_snat_ip", key_type);
3870 const char *ip_address = smap_get(&od->nbr->options, key);
3871 free(key);
3872
3873 if (ip_address) {
3874 ovs_be32 mask;
3875 char *error = ip_parse_masked(ip_address, ip, &mask);
3876 if (error || mask != OVS_BE32_MAX) {
3877 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3878 VLOG_WARN_RL(&rl, "bad ip %s in options of router "UUID_FMT"",
3879 ip_address, UUID_ARGS(&od->key));
3880 free(error);
3881 *ip = 0;
3882 return NULL;
3883 }
3884 return ip_address;
3885 }
3886
3887 *ip = 0;
3888 return NULL;
3889}
3890
3891static void
3892add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od,
3893 struct ds *match, struct ds *actions, int priority,
3894 const char *lb_force_snat_ip)
3895{
3896 /* A match and actions for new connections. */
3897 char *new_match = xasprintf("ct.new && %s", ds_cstr(match));
3898 if (lb_force_snat_ip) {
3899 char *new_actions = xasprintf("flags.force_snat_for_lb = 1; %s",
3900 ds_cstr(actions));
3901 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, new_match,
3902 new_actions);
3903 free(new_actions);
3904 } else {
3905 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, new_match,
3906 ds_cstr(actions));
3907 }
3908
3909 /* A match and actions for established connections. */
3910 char *est_match = xasprintf("ct.est && %s", ds_cstr(match));
3911 if (lb_force_snat_ip) {
3912 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, est_match,
3913 "flags.force_snat_for_lb = 1; ct_dnat;");
3914 } else {
3915 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, est_match,
3916 "ct_dnat;");
3917 }
3918
3919 free(new_match);
3920 free(est_match);
3921}
3922
9975d7be
BP
3923static void
3924build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
3925 struct hmap *lflows)
3926{
3927 /* This flow table structure is documented in ovn-northd(8), so please
3928 * update ovn-northd.8.xml if you change anything. */
3929
09b39248
JP
3930 struct ds match = DS_EMPTY_INITIALIZER;
3931 struct ds actions = DS_EMPTY_INITIALIZER;
3932
9975d7be
BP
3933 /* Logical router ingress table 0: Admission control framework. */
3934 struct ovn_datapath *od;
3935 HMAP_FOR_EACH (od, key_node, datapaths) {
3936 if (!od->nbr) {
3937 continue;
3938 }
3939
3940 /* Logical VLANs not supported.
3941 * Broadcast/multicast source address is invalid. */
3942 ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100,
3943 "vlan.present || eth.src[40]", "drop;");
3944 }
3945
3946 /* Logical router ingress table 0: match (priority 50). */
3947 struct ovn_port *op;
3948 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 3949 if (!op->nbrp) {
9975d7be
BP
3950 continue;
3951 }
3952
0ee00741 3953 if (!lrport_is_enabled(op->nbrp)) {
9975d7be
BP
3954 /* Drop packets from disabled logical ports (since logical flow
3955 * tables are default-drop). */
3956 continue;
3957 }
3958
41a15b71
MS
3959 if (op->derived) {
3960 /* No ingress packets should be received on a chassisredirect
3961 * port. */
3962 continue;
3963 }
3964
3965 ds_clear(&match);
3966 ds_put_format(&match, "eth.mcast && inport == %s", op->json_key);
3967 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
3968 ds_cstr(&match), "next;");
3969
09b39248 3970 ds_clear(&match);
41a15b71 3971 ds_put_format(&match, "eth.dst == %s && inport == %s",
4685e523 3972 op->lrp_networks.ea_s, op->json_key);
41a15b71
MS
3973 if (op->od->l3dgw_port && op == op->od->l3dgw_port
3974 && op->od->l3redirect_port) {
3975 /* Traffic with eth.dst = l3dgw_port->lrp_networks.ea_s
3976 * should only be received on the "redirect-chassis". */
3977 ds_put_format(&match, " && is_chassis_resident(%s)",
3978 op->od->l3redirect_port->json_key);
3979 }
9975d7be 3980 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
09b39248 3981 ds_cstr(&match), "next;");
9975d7be
BP
3982 }
3983
3984 /* Logical router ingress table 1: IP Input. */
78aab811 3985 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
3986 if (!od->nbr) {
3987 continue;
3988 }
3989
3990 /* L3 admission control: drop multicast and broadcast source, localhost
3991 * source or destination, and zero network source or destination
3992 * (priority 100). */
3993 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100,
3994 "ip4.mcast || "
3995 "ip4.src == 255.255.255.255 || "
3996 "ip4.src == 127.0.0.0/8 || "
3997 "ip4.dst == 127.0.0.0/8 || "
3998 "ip4.src == 0.0.0.0/8 || "
3999 "ip4.dst == 0.0.0.0/8",
4000 "drop;");
4001
0bac7164
BP
4002 /* ARP reply handling. Use ARP replies to populate the logical
4003 * router's ARP table. */
4004 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "arp.op == 2",
4005 "put_arp(inport, arp.spa, arp.sha);");
4006
9975d7be
BP
4007 /* Drop Ethernet local broadcast. By definition this traffic should
4008 * not be forwarded.*/
4009 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
4010 "eth.bcast", "drop;");
4011
9975d7be
BP
4012 /* TTL discard.
4013 *
4014 * XXX Need to send ICMP time exceeded if !ip.later_frag. */
09b39248
JP
4015 ds_clear(&match);
4016 ds_put_cstr(&match, "ip4 && ip.ttl == {0, 1}");
4017 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30,
4018 ds_cstr(&match), "drop;");
9975d7be 4019
c34a87b6
JP
4020 /* ND advertisement handling. Use advertisements to populate
4021 * the logical router's ARP/ND table. */
4022 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "nd_na",
4023 "put_nd(inport, nd.target, nd.tll);");
4024
4025 /* Lean from neighbor solicitations that were not directed at
4026 * us. (A priority-90 flow will respond to requests to us and
4027 * learn the sender's mac address. */
4028 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 80, "nd_ns",
4029 "put_nd(inport, ip6.src, nd.sll);");
4030
9975d7be
BP
4031 /* Pass other traffic not already handled to the next table for
4032 * routing. */
4033 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;");
78aab811
JP
4034 }
4035
6fdb7cd6 4036 /* Logical router ingress table 1: IP Input for IPv4. */
9975d7be 4037 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 4038 if (!op->nbrp) {
9975d7be
BP
4039 continue;
4040 }
4041
41a15b71
MS
4042 if (op->derived) {
4043 /* No ingress packets are accepted on a chassisredirect
4044 * port, so no need to program flows for that port. */
4045 continue;
4046 }
9975d7be 4047
6fdb7cd6
JP
4048 if (op->lrp_networks.n_ipv4_addrs) {
4049 /* L3 admission control: drop packets that originate from an
4050 * IPv4 address owned by the router or a broadcast address
4051 * known to the router (priority 100). */
4052 ds_clear(&match);
4053 ds_put_cstr(&match, "ip4.src == ");
4054 op_put_v4_networks(&match, op, true);
06a26dd2 4055 ds_put_cstr(&match, " && "REGBIT_EGRESS_LOOPBACK" == 0");
6fdb7cd6
JP
4056 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
4057 ds_cstr(&match), "drop;");
4058
4059 /* ICMP echo reply. These flows reply to ICMP echo requests
4060 * received for the router's IP address. Since packets only
4061 * get here as part of the logical router datapath, the inport
4062 * (i.e. the incoming locally attached net) does not matter.
4063 * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */
4064 ds_clear(&match);
4065 ds_put_cstr(&match, "ip4.dst == ");
4066 op_put_v4_networks(&match, op, false);
4067 ds_put_cstr(&match, " && icmp4.type == 8 && icmp4.code == 0");
4068
4069 ds_clear(&actions);
4070 ds_put_format(&actions,
4071 "ip4.dst <-> ip4.src; "
4072 "ip.ttl = 255; "
4073 "icmp4.type = 0; "
bf143492 4074 "flags.loopback = 1; "
6fdb7cd6
JP
4075 "next; ");
4076 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
4077 ds_cstr(&match), ds_cstr(&actions));
4078 }
dd7652e6 4079
9975d7be
BP
4080 /* ARP reply. These flows reply to ARP requests for the router's own
4081 * IP address. */
4685e523
JP
4082 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
4083 ds_clear(&match);
4084 ds_put_format(&match,
4085 "inport == %s && arp.tpa == %s && arp.op == 1",
4086 op->json_key, op->lrp_networks.ipv4_addrs[i].addr_s);
41a15b71
MS
4087 if (op->od->l3dgw_port && op == op->od->l3dgw_port
4088 && op->od->l3redirect_port) {
4089 /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
4090 * should only be sent from the "redirect-chassis", so that
4091 * upstream MAC learning points to the "redirect-chassis".
4092 * Also need to avoid generation of multiple ARP responses
4093 * from different chassis. */
4094 ds_put_format(&match, " && is_chassis_resident(%s)",
4095 op->od->l3redirect_port->json_key);
4096 }
4685e523
JP
4097
4098 ds_clear(&actions);
4099 ds_put_format(&actions,
4100 "eth.dst = eth.src; "
4101 "eth.src = %s; "
4102 "arp.op = 2; /* ARP reply */ "
4103 "arp.tha = arp.sha; "
4104 "arp.sha = %s; "
4105 "arp.tpa = arp.spa; "
4106 "arp.spa = %s; "
4107 "outport = %s; "
bf143492 4108 "flags.loopback = 1; "
4685e523
JP
4109 "output;",
4110 op->lrp_networks.ea_s,
4111 op->lrp_networks.ea_s,
4112 op->lrp_networks.ipv4_addrs[i].addr_s,
4113 op->json_key);
4114 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
4115 ds_cstr(&match), ds_cstr(&actions));
4116 }
9975d7be 4117
cc4583aa
GS
4118 /* A set to hold all load-balancer vips that need ARP responses. */
4119 struct sset all_ips = SSET_INITIALIZER(&all_ips);
e914fb54 4120 get_router_load_balancer_ips(op->od, &all_ips);
cc4583aa
GS
4121
4122 const char *ip_address;
4123 SSET_FOR_EACH(ip_address, &all_ips) {
4124 ovs_be32 ip;
4125 if (!ip_parse(ip_address, &ip) || !ip) {
4126 continue;
4127 }
4128
4129 ds_clear(&match);
4130 ds_put_format(&match,
4131 "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1",
4132 op->json_key, IP_ARGS(ip));
4133
4134 ds_clear(&actions);
4135 ds_put_format(&actions,
4136 "eth.dst = eth.src; "
4137 "eth.src = %s; "
4138 "arp.op = 2; /* ARP reply */ "
4139 "arp.tha = arp.sha; "
4140 "arp.sha = %s; "
4141 "arp.tpa = arp.spa; "
4142 "arp.spa = "IP_FMT"; "
4143 "outport = %s; "
4144 "flags.loopback = 1; "
4145 "output;",
4146 op->lrp_networks.ea_s,
4147 op->lrp_networks.ea_s,
4148 IP_ARGS(ip),
4149 op->json_key);
4150 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
4151 ds_cstr(&match), ds_cstr(&actions));
4152 }
4153
4154 sset_destroy(&all_ips);
4155
65d8810c
GS
4156 /* A gateway router can have 2 SNAT IP addresses to force DNATed and
4157 * LBed traffic respectively to be SNATed. In addition, there can be
4158 * a number of SNAT rules in the NAT table. */
4159 ovs_be32 *snat_ips = xmalloc(sizeof *snat_ips *
4160 (op->od->nbr->n_nat + 2));
dde5ea7b 4161 size_t n_snat_ips = 0;
65d8810c
GS
4162
4163 ovs_be32 snat_ip;
4164 const char *dnat_force_snat_ip = get_force_snat_ip(op->od, "dnat",
4165 &snat_ip);
4166 if (dnat_force_snat_ip) {
4167 snat_ips[n_snat_ips++] = snat_ip;
4168 }
4169
4170 const char *lb_force_snat_ip = get_force_snat_ip(op->od, "lb",
4171 &snat_ip);
4172 if (lb_force_snat_ip) {
4173 snat_ips[n_snat_ips++] = snat_ip;
4174 }
4175
de297547
GS
4176 for (int i = 0; i < op->od->nbr->n_nat; i++) {
4177 const struct nbrec_nat *nat;
4178
4179 nat = op->od->nbr->nat[i];
4180
de297547
GS
4181 ovs_be32 ip;
4182 if (!ip_parse(nat->external_ip, &ip) || !ip) {
4183 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
dde5ea7b 4184 VLOG_WARN_RL(&rl, "bad ip address %s in nat configuration "
de297547
GS
4185 "for router %s", nat->external_ip, op->key);
4186 continue;
4187 }
4188
dde5ea7b
GS
4189 if (!strcmp(nat->type, "snat")) {
4190 snat_ips[n_snat_ips++] = ip;
4191 continue;
4192 }
4193
4194 /* ARP handling for external IP addresses.
4195 *
4196 * DNAT IP addresses are external IP addresses that need ARP
4197 * handling. */
09b39248
JP
4198 ds_clear(&match);
4199 ds_put_format(&match,
4200 "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1",
4201 op->json_key, IP_ARGS(ip));
4685e523 4202
09b39248
JP
4203 ds_clear(&actions);
4204 ds_put_format(&actions,
de297547 4205 "eth.dst = eth.src; "
de297547 4206 "arp.op = 2; /* ARP reply */ "
06a26dd2
MS
4207 "arp.tha = arp.sha; ");
4208
4209 if (op->od->l3dgw_port && op == op->od->l3dgw_port) {
4210 struct eth_addr mac;
4211 if (nat->external_mac &&
4212 eth_addr_from_string(nat->external_mac, &mac)
4213 && nat->logical_port) {
4214 /* distributed NAT case, use nat->external_mac */
4215 ds_put_format(&actions,
4216 "eth.src = "ETH_ADDR_FMT"; "
4217 "arp.sha = "ETH_ADDR_FMT"; ",
4218 ETH_ADDR_ARGS(mac),
4219 ETH_ADDR_ARGS(mac));
4220 /* Traffic with eth.src = nat->external_mac should only be
4221 * sent from the chassis where nat->logical_port is
4222 * resident, so that upstream MAC learning points to the
4223 * correct chassis. Also need to avoid generation of
4224 * multiple ARP responses from different chassis. */
4225 ds_put_format(&match, " && is_chassis_resident(\"%s\")",
4226 nat->logical_port);
4227 } else {
4228 ds_put_format(&actions,
4229 "eth.src = %s; "
4230 "arp.sha = %s; ",
4231 op->lrp_networks.ea_s,
4232 op->lrp_networks.ea_s);
4233 /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
4234 * should only be sent from the "redirect-chassis", so that
4235 * upstream MAC learning points to the "redirect-chassis".
4236 * Also need to avoid generation of multiple ARP responses
4237 * from different chassis. */
4238 if (op->od->l3redirect_port) {
4239 ds_put_format(&match, " && is_chassis_resident(%s)",
4240 op->od->l3redirect_port->json_key);
4241 }
4242 }
4243 } else {
4244 ds_put_format(&actions,
4245 "eth.src = %s; "
4246 "arp.sha = %s; ",
4247 op->lrp_networks.ea_s,
4248 op->lrp_networks.ea_s);
4249 }
4250 ds_put_format(&actions,
de297547
GS
4251 "arp.tpa = arp.spa; "
4252 "arp.spa = "IP_FMT"; "
4253 "outport = %s; "
bf143492 4254 "flags.loopback = 1; "
de297547 4255 "output;",
de297547
GS
4256 IP_ARGS(ip),
4257 op->json_key);
4258 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
09b39248 4259 ds_cstr(&match), ds_cstr(&actions));
de297547
GS
4260 }
4261
4685e523
JP
4262 ds_clear(&match);
4263 ds_put_cstr(&match, "ip4.dst == {");
4264 bool has_drop_ips = false;
4265 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
49da9ec0 4266 bool snat_ip_is_router_ip = false;
dde5ea7b
GS
4267 for (int j = 0; j < n_snat_ips; j++) {
4268 /* Packets to SNAT IPs should not be dropped. */
4269 if (op->lrp_networks.ipv4_addrs[i].addr == snat_ips[j]) {
49da9ec0
CSV
4270 snat_ip_is_router_ip = true;
4271 break;
4685e523 4272 }
4ef48e9d 4273 }
49da9ec0
CSV
4274 if (snat_ip_is_router_ip) {
4275 continue;
4276 }
4685e523
JP
4277 ds_put_format(&match, "%s, ",
4278 op->lrp_networks.ipv4_addrs[i].addr_s);
4279 has_drop_ips = true;
4ef48e9d 4280 }
4685e523
JP
4281 ds_chomp(&match, ' ');
4282 ds_chomp(&match, ',');
4283 ds_put_cstr(&match, "}");
4ef48e9d 4284
4685e523
JP
4285 if (has_drop_ips) {
4286 /* Drop IP traffic to this router. */
09b39248
JP
4287 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
4288 ds_cstr(&match), "drop;");
4ef48e9d 4289 }
4685e523 4290
dde5ea7b 4291 free(snat_ips);
9975d7be
BP
4292 }
4293
6fdb7cd6
JP
4294 /* Logical router ingress table 1: IP Input for IPv6. */
4295 HMAP_FOR_EACH (op, key_node, ports) {
4296 if (!op->nbrp) {
4297 continue;
4298 }
4299
41a15b71
MS
4300 if (op->derived) {
4301 /* No ingress packets are accepted on a chassisredirect
4302 * port, so no need to program flows for that port. */
4303 continue;
4304 }
4305
6fdb7cd6
JP
4306 if (op->lrp_networks.n_ipv6_addrs) {
4307 /* L3 admission control: drop packets that originate from an
4308 * IPv6 address owned by the router (priority 100). */
4309 ds_clear(&match);
4310 ds_put_cstr(&match, "ip6.src == ");
4311 op_put_v6_networks(&match, op);
4312 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
4313 ds_cstr(&match), "drop;");
4314
4315 /* ICMPv6 echo reply. These flows reply to echo requests
4316 * received for the router's IP address. */
4317 ds_clear(&match);
4318 ds_put_cstr(&match, "ip6.dst == ");
4319 op_put_v6_networks(&match, op);
4320 ds_put_cstr(&match, " && icmp6.type == 128 && icmp6.code == 0");
4321
4322 ds_clear(&actions);
4323 ds_put_cstr(&actions,
4324 "ip6.dst <-> ip6.src; "
4325 "ip.ttl = 255; "
4326 "icmp6.type = 129; "
bf143492 4327 "flags.loopback = 1; "
6fdb7cd6
JP
4328 "next; ");
4329 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
4330 ds_cstr(&match), ds_cstr(&actions));
4331
4332 /* Drop IPv6 traffic to this router. */
4333 ds_clear(&match);
4334 ds_put_cstr(&match, "ip6.dst == ");
4335 op_put_v6_networks(&match, op);
4336 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
4337 ds_cstr(&match), "drop;");
4338 }
4339
4340 /* ND reply. These flows reply to ND solicitations for the
4341 * router's own IP address. */
4342 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
4343 ds_clear(&match);
4344 ds_put_format(&match,
4345 "inport == %s && nd_ns && ip6.dst == {%s, %s} "
4346 "&& nd.target == %s",
4347 op->json_key,
4348 op->lrp_networks.ipv6_addrs[i].addr_s,
4349 op->lrp_networks.ipv6_addrs[i].sn_addr_s,
4350 op->lrp_networks.ipv6_addrs[i].addr_s);
41a15b71
MS
4351 if (op->od->l3dgw_port && op == op->od->l3dgw_port
4352 && op->od->l3redirect_port) {
4353 /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
4354 * should only be sent from the "redirect-chassis", so that
4355 * upstream MAC learning points to the "redirect-chassis".
4356 * Also need to avoid generation of multiple ND replies
4357 * from different chassis. */
4358 ds_put_format(&match, " && is_chassis_resident(%s)",
4359 op->od->l3redirect_port->json_key);
4360 }
6fdb7cd6
JP
4361
4362 ds_clear(&actions);
4363 ds_put_format(&actions,
c34a87b6 4364 "put_nd(inport, ip6.src, nd.sll); "
6fdb7cd6
JP
4365 "nd_na { "
4366 "eth.src = %s; "
4367 "ip6.src = %s; "
4368 "nd.target = %s; "
4369 "nd.tll = %s; "
4370 "outport = inport; "
bf143492 4371 "flags.loopback = 1; "
6fdb7cd6
JP
4372 "output; "
4373 "};",
4374 op->lrp_networks.ea_s,
4375 op->lrp_networks.ipv6_addrs[i].addr_s,
4376 op->lrp_networks.ipv6_addrs[i].addr_s,
4377 op->lrp_networks.ea_s);
4378 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
4379 ds_cstr(&match), ds_cstr(&actions));
4380 }
4381 }
4382
06a26dd2 4383 /* NAT, Defrag and load balancing. */
de297547
GS
4384 HMAP_FOR_EACH (od, key_node, datapaths) {
4385 if (!od->nbr) {
4386 continue;
4387 }
4388
4389 /* Packets are allowed by default. */
cc4583aa 4390 ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG, 0, "1", "next;");
de297547
GS
4391 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;");
4392 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;");
4393 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;");
06a26dd2
MS
4394 ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 0, "1", "next;");
4395 ovn_lflow_add(lflows, od, S_ROUTER_OUT_EGR_LOOP, 0, "1", "next;");
de297547 4396
06a26dd2
MS
4397 /* NAT rules are only valid on Gateway routers and routers with
4398 * l3dgw_port (router has a port with "redirect-chassis"
4399 * specified). */
4400 if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) {
de297547
GS
4401 continue;
4402 }
4403
65d8810c
GS
4404 ovs_be32 snat_ip;
4405 const char *dnat_force_snat_ip = get_force_snat_ip(od, "dnat",
4406 &snat_ip);
4407 const char *lb_force_snat_ip = get_force_snat_ip(od, "lb",
4408 &snat_ip);
4409
de297547
GS
4410 for (int i = 0; i < od->nbr->n_nat; i++) {
4411 const struct nbrec_nat *nat;
4412
4413 nat = od->nbr->nat[i];
4414
4415 ovs_be32 ip, mask;
4416
4417 char *error = ip_parse_masked(nat->external_ip, &ip, &mask);
4418 if (error || mask != OVS_BE32_MAX) {
4419 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
4420 VLOG_WARN_RL(&rl, "bad external ip %s for nat",
4421 nat->external_ip);
4422 free(error);
4423 continue;
4424 }
4425
4426 /* Check the validity of nat->logical_ip. 'logical_ip' can
4427 * be a subnet when the type is "snat". */
4428 error = ip_parse_masked(nat->logical_ip, &ip, &mask);
4429 if (!strcmp(nat->type, "snat")) {
4430 if (error) {
4431 static struct vlog_rate_limit rl =
4432 VLOG_RATE_LIMIT_INIT(5, 1);
4433 VLOG_WARN_RL(&rl, "bad ip network or ip %s for snat "
4434 "in router "UUID_FMT"",
4435 nat->logical_ip, UUID_ARGS(&od->key));
4436 free(error);
4437 continue;
4438 }
4439 } else {
4440 if (error || mask != OVS_BE32_MAX) {
4441 static struct vlog_rate_limit rl =
4442 VLOG_RATE_LIMIT_INIT(5, 1);
4443 VLOG_WARN_RL(&rl, "bad ip %s for dnat in router "
4444 ""UUID_FMT"", nat->logical_ip, UUID_ARGS(&od->key));
4445 free(error);
4446 continue;
4447 }
4448 }
4449
06a26dd2
MS
4450 /* For distributed router NAT, determine whether this NAT rule
4451 * satisfies the conditions for distributed NAT processing. */
4452 bool distributed = false;
4453 struct eth_addr mac;
4454 if (od->l3dgw_port && !strcmp(nat->type, "dnat_and_snat") &&
4455 nat->logical_port && nat->external_mac) {
4456 if (eth_addr_from_string(nat->external_mac, &mac)) {
4457 distributed = true;
4458 } else {
4459 static struct vlog_rate_limit rl =
4460 VLOG_RATE_LIMIT_INIT(5, 1);
4461 VLOG_WARN_RL(&rl, "bad mac %s for dnat in router "
4462 ""UUID_FMT"", nat->external_mac, UUID_ARGS(&od->key));
4463 continue;
4464 }
4465 }
4466
de297547
GS
4467 /* Ingress UNSNAT table: It is for already established connections'
4468 * reverse traffic. i.e., SNAT has already been done in egress
4469 * pipeline and now the packet has entered the ingress pipeline as
4470 * part of a reply. We undo the SNAT here.
4471 *
4472 * Undoing SNAT has to happen before DNAT processing. This is
4473 * because when the packet was DNATed in ingress pipeline, it did
4474 * not know about the possibility of eventual additional SNAT in
4475 * egress pipeline. */
4476 if (!strcmp(nat->type, "snat")
4477 || !strcmp(nat->type, "dnat_and_snat")) {
06a26dd2
MS
4478 if (!od->l3dgw_port) {
4479 /* Gateway router. */
4480 ds_clear(&match);
4481 ds_put_format(&match, "ip && ip4.dst == %s",
4482 nat->external_ip);
4483 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 90,
4484 ds_cstr(&match), "ct_snat; next;");
4485 } else {
4486 /* Distributed router. */
4487
4488 /* Traffic received on l3dgw_port is subject to NAT. */
4489 ds_clear(&match);
4490 ds_put_format(&match, "ip && ip4.dst == %s"
4491 " && inport == %s",
4492 nat->external_ip,
4493 od->l3dgw_port->json_key);
4494 if (!distributed && od->l3redirect_port) {
4495 /* Flows for NAT rules that are centralized are only
4496 * programmed on the "redirect-chassis". */
4497 ds_put_format(&match, " && is_chassis_resident(%s)",
4498 od->l3redirect_port->json_key);
4499 }
4500 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 100,
4501 ds_cstr(&match), "ct_snat;");
4502
4503 /* Traffic received on other router ports must be
4504 * redirected to the central instance of the l3dgw_port
4505 * for NAT processing. */
4506 ds_clear(&match);
4507 ds_put_format(&match, "ip && ip4.dst == %s",
4508 nat->external_ip);
4509 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 50,
4510 ds_cstr(&match),
4511 REGBIT_NAT_REDIRECT" = 1; next;");
4512 }
de297547
GS
4513 }
4514
4515 /* Ingress DNAT table: Packets enter the pipeline with destination
4516 * IP address that needs to be DNATted from a external IP address
4517 * to a logical IP address. */
4518 if (!strcmp(nat->type, "dnat")
4519 || !strcmp(nat->type, "dnat_and_snat")) {
06a26dd2
MS
4520 if (!od->l3dgw_port) {
4521 /* Gateway router. */
4522 /* Packet when it goes from the initiator to destination.
4523 * We need to set flags.loopback because the router can
4524 * send the packet back through the same interface. */
4525 ds_clear(&match);
4526 ds_put_format(&match, "ip && ip4.dst == %s",
4527 nat->external_ip);
4528 ds_clear(&actions);
4529 if (dnat_force_snat_ip) {
4530 /* Indicate to the future tables that a DNAT has taken
4531 * place and a force SNAT needs to be done in the
4532 * Egress SNAT table. */
4533 ds_put_format(&actions,
4534 "flags.force_snat_for_dnat = 1; ");
4535 }
4536 ds_put_format(&actions, "flags.loopback = 1; ct_dnat(%s);",
4537 nat->logical_ip);
4538 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100,
4539 ds_cstr(&match), ds_cstr(&actions));
4540 } else {
4541 /* Distributed router. */
4542
4543 /* Traffic received on l3dgw_port is subject to NAT. */
4544 ds_clear(&match);
4545 ds_put_format(&match, "ip && ip4.dst == %s"
4546 " && inport == %s",
4547 nat->external_ip,
4548 od->l3dgw_port->json_key);
4549 if (!distributed && od->l3redirect_port) {
4550 /* Flows for NAT rules that are centralized are only
4551 * programmed on the "redirect-chassis". */
4552 ds_put_format(&match, " && is_chassis_resident(%s)",
4553 od->l3redirect_port->json_key);
4554 }
4555 ds_clear(&actions);
4556 ds_put_format(&actions, "ct_dnat(%s);",
4557 nat->logical_ip);
4558 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100,
4559 ds_cstr(&match), ds_cstr(&actions));
4560
4561 /* Traffic received on other router ports must be
4562 * redirected to the central instance of the l3dgw_port
4563 * for NAT processing. */
4564 ds_clear(&match);
4565 ds_put_format(&match, "ip && ip4.dst == %s",
4566 nat->external_ip);
4567 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50,
4568 ds_cstr(&match),
4569 REGBIT_NAT_REDIRECT" = 1; next;");
4570 }
4571 }
4572
4573 /* Egress UNDNAT table: It is for already established connections'
4574 * reverse traffic. i.e., DNAT has already been done in ingress
4575 * pipeline and now the packet has entered the egress pipeline as
4576 * part of a reply. We undo the DNAT here.
4577 *
4578 * Note that this only applies for NAT on a distributed router.
4579 * Undo DNAT on a gateway router is done in the ingress DNAT
4580 * pipeline stage. */
4581 if (od->l3dgw_port && (!strcmp(nat->type, "dnat")
4582 || !strcmp(nat->type, "dnat_and_snat"))) {
09b39248 4583 ds_clear(&match);
06a26dd2
MS
4584 ds_put_format(&match, "ip && ip4.src == %s"
4585 " && outport == %s",
4586 nat->logical_ip,
4587 od->l3dgw_port->json_key);
4588 if (!distributed && od->l3redirect_port) {
4589 /* Flows for NAT rules that are centralized are only
4590 * programmed on the "redirect-chassis". */
4591 ds_put_format(&match, " && is_chassis_resident(%s)",
4592 od->l3redirect_port->json_key);
4593 }
09b39248 4594 ds_clear(&actions);
06a26dd2
MS
4595 if (distributed) {
4596 ds_put_format(&actions, "eth.src = "ETH_ADDR_FMT"; ",
4597 ETH_ADDR_ARGS(mac));
65d8810c 4598 }
06a26dd2
MS
4599 ds_put_format(&actions, "ct_dnat;");
4600 ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 100,
09b39248 4601 ds_cstr(&match), ds_cstr(&actions));
de297547
GS
4602 }
4603
4604 /* Egress SNAT table: Packets enter the egress pipeline with
4605 * source ip address that needs to be SNATted to a external ip
4606 * address. */
4607 if (!strcmp(nat->type, "snat")
4608 || !strcmp(nat->type, "dnat_and_snat")) {
06a26dd2
MS
4609 if (!od->l3dgw_port) {
4610 /* Gateway router. */
4611 ds_clear(&match);
4612 ds_put_format(&match, "ip && ip4.src == %s",
4613 nat->logical_ip);
4614 ds_clear(&actions);
4615 ds_put_format(&actions, "ct_snat(%s);", nat->external_ip);
4616
4617 /* The priority here is calculated such that the
4618 * nat->logical_ip with the longest mask gets a higher
4619 * priority. */
4620 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT,
4621 count_1bits(ntohl(mask)) + 1,
4622 ds_cstr(&match), ds_cstr(&actions));
4623 } else {
4624 /* Distributed router. */
4625 ds_clear(&match);
4626 ds_put_format(&match, "ip && ip4.src == %s"
4627 " && outport == %s",
4628 nat->logical_ip,
4629 od->l3dgw_port->json_key);
4630 if (!distributed && od->l3redirect_port) {
4631 /* Flows for NAT rules that are centralized are only
4632 * programmed on the "redirect-chassis". */
4633 ds_put_format(&match, " && is_chassis_resident(%s)",
4634 od->l3redirect_port->json_key);
4635 }
4636 ds_clear(&actions);
4637 if (distributed) {
4638 ds_put_format(&actions, "eth.src = "ETH_ADDR_FMT"; ",
4639 ETH_ADDR_ARGS(mac));
4640 }
4641 ds_put_format(&actions, "ct_snat(%s);", nat->external_ip);
4642
4643 /* The priority here is calculated such that the
4644 * nat->logical_ip with the longest mask gets a higher
4645 * priority. */
4646 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT,
4647 count_1bits(ntohl(mask)) + 1,
4648 ds_cstr(&match), ds_cstr(&actions));
4649 }
4650 }
4651
4652 /* Logical router ingress table 0:
4653 * For NAT on a distributed router, add rules allowing
4654 * ingress traffic with eth.dst matching nat->external_mac
4655 * on the l3dgw_port instance where nat->logical_port is
4656 * resident. */
4657 if (distributed) {
09b39248 4658 ds_clear(&match);
06a26dd2
MS
4659 ds_put_format(&match,
4660 "eth.dst == "ETH_ADDR_FMT" && inport == %s"
4661 " && is_chassis_resident(\"%s\")",
4662 ETH_ADDR_ARGS(mac),
4663 od->l3dgw_port->json_key,
4664 nat->logical_port);
4665 ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 50,
4666 ds_cstr(&match), "next;");
4667 }
4668
4669 /* Ingress Gateway Redirect Table: For NAT on a distributed
4670 * router, add flows that are specific to a NAT rule. These
4671 * flows indicate the presence of an applicable NAT rule that
4672 * can be applied in a distributed manner. */
4673 if (distributed) {
4674 ds_clear(&match);
4675 ds_put_format(&match, "ip4.src == %s && outport == %s",
4676 nat->logical_ip,
4677 od->l3dgw_port->json_key);
4678 ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 100,
4679 ds_cstr(&match), "next;");
4680 }
de297547 4681
06a26dd2
MS
4682 /* Egress Loopback table: For NAT on a distributed router.
4683 * If packets in the egress pipeline on the distributed
4684 * gateway port have ip.dst matching a NAT external IP, then
4685 * loop a clone of the packet back to the beginning of the
4686 * ingress pipeline with inport = outport. */
4687 if (od->l3dgw_port) {
4688 /* Distributed router. */
4689 ds_clear(&match);
4690 ds_put_format(&match, "ip4.dst == %s && outport == %s",
4691 nat->external_ip,
4692 od->l3dgw_port->json_key);
4693 ds_clear(&actions);
4694 ds_put_format(&actions,
4695 "clone { ct_clear; "
4696 "inport = outport; outport = \"\"; "
4697 "flags = 0; flags.loopback = 1; ");
4698 for (int i = 0; i < MFF_N_LOG_REGS; i++) {
4699 ds_put_format(&actions, "reg%d = 0; ", i);
4700 }
4701 ds_put_format(&actions, REGBIT_EGRESS_LOOPBACK" = 1; "
4702 "next(pipeline=ingress, table=0); };");
4703 ovn_lflow_add(lflows, od, S_ROUTER_OUT_EGR_LOOP, 100,
09b39248 4704 ds_cstr(&match), ds_cstr(&actions));
de297547
GS
4705 }
4706 }
4707
65d8810c 4708 /* Handle force SNAT options set in the gateway router. */
06a26dd2 4709 if (dnat_force_snat_ip && !od->l3dgw_port) {
65d8810c
GS
4710 /* If a packet with destination IP address as that of the
4711 * gateway router (as set in options:dnat_force_snat_ip) is seen,
4712 * UNSNAT it. */
4713 ds_clear(&match);
4714 ds_put_format(&match, "ip && ip4.dst == %s", dnat_force_snat_ip);
4715 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 110,
4716 ds_cstr(&match), "ct_snat; next;");
4717
4718 /* Higher priority rules to force SNAT with the IP addresses
4719 * configured in the Gateway router. This only takes effect
4720 * when the packet has already been DNATed once. */
4721 ds_clear(&match);
4722 ds_put_format(&match, "flags.force_snat_for_dnat == 1 && ip");
4723 ds_clear(&actions);
4724 ds_put_format(&actions, "ct_snat(%s);", dnat_force_snat_ip);
4725 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 100,
4726 ds_cstr(&match), ds_cstr(&actions));
4727 }
06a26dd2 4728 if (lb_force_snat_ip && !od->l3dgw_port) {
65d8810c
GS
4729 /* If a packet with destination IP address as that of the
4730 * gateway router (as set in options:lb_force_snat_ip) is seen,
4731 * UNSNAT it. */
4732 ds_clear(&match);
4733 ds_put_format(&match, "ip && ip4.dst == %s", lb_force_snat_ip);
4734 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 100,
4735 ds_cstr(&match), "ct_snat; next;");
4736
4737 /* Load balanced traffic will have flags.force_snat_for_lb set.
4738 * Force SNAT it. */
4739 ds_clear(&match);
4740 ds_put_format(&match, "flags.force_snat_for_lb == 1 && ip");
4741 ds_clear(&actions);
4742 ds_put_format(&actions, "ct_snat(%s);", lb_force_snat_ip);
4743 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 100,
4744 ds_cstr(&match), ds_cstr(&actions));
4745 }
4746
06a26dd2
MS
4747 if (!od->l3dgw_port) {
4748 /* For gateway router, re-circulate every packet through
4749 * the DNAT zone. This helps with two things.
4750 *
4751 * 1. Any packet that needs to be unDNATed in the reverse
4752 * direction gets unDNATed. Ideally this could be done in
4753 * the egress pipeline. But since the gateway router
4754 * does not have any feature that depends on the source
4755 * ip address being external IP address for IP routing,
4756 * we can do it here, saving a future re-circulation.
4757 *
4758 * 2. Any packet that was sent through SNAT zone in the
4759 * previous table automatically gets re-circulated to get
4760 * back the new destination IP address that is needed for
4761 * routing in the openflow pipeline. */
4762 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50,
4763 "ip", "flags.loopback = 1; ct_dnat;");
4764 } else {
4765 /* For NAT on a distributed router, add flows to Ingress
4766 * IP Routing table, Ingress ARP Resolution table, and
4767 * Ingress Gateway Redirect Table that are not specific to a
4768 * NAT rule. */
4769
4770 /* The highest priority IN_IP_ROUTING rule matches packets
4771 * with REGBIT_NAT_REDIRECT (set in DNAT or UNSNAT stages),
4772 * with action "ip.ttl--; next;". The IN_GW_REDIRECT table
4773 * will take care of setting the outport. */
4774 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 300,
4775 REGBIT_NAT_REDIRECT" == 1", "ip.ttl--; next;");
4776
4777 /* The highest priority IN_ARP_RESOLVE rule matches packets
4778 * with REGBIT_NAT_REDIRECT (set in DNAT or UNSNAT stages),
4779 * then sets eth.dst to the distributed gateway port's
4780 * ethernet address. */
4781 ds_clear(&actions);
4782 ds_put_format(&actions, "eth.dst = %s; next;",
4783 od->l3dgw_port->lrp_networks.ea_s);
4784 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 200,
4785 REGBIT_NAT_REDIRECT" == 1", ds_cstr(&actions));
4786
4787 /* The highest priority IN_GW_REDIRECT rule redirects packets
4788 * with REGBIT_NAT_REDIRECT (set in DNAT or UNSNAT stages) to
4789 * the central instance of the l3dgw_port for NAT processing. */
4790 ds_clear(&actions);
4791 ds_put_format(&actions, "outport = %s; next;",
4792 od->l3redirect_port->json_key);
4793 ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 200,
4794 REGBIT_NAT_REDIRECT" == 1", ds_cstr(&actions));
4795 }
4796
4797 /* Load balancing and packet defrag are only valid on
4798 * Gateway routers. */
4799 if (!smap_get(&od->nbr->options, "chassis")) {
4800 continue;
4801 }
8697d426
MS
4802
4803 /* A set to hold all ips that need defragmentation and tracking. */
4804 struct sset all_ips = SSET_INITIALIZER(&all_ips);
4805
4806 for (int i = 0; i < od->nbr->n_load_balancer; i++) {
4807 struct nbrec_load_balancer *lb = od->nbr->load_balancer[i];
4808 struct smap *vips = &lb->vips;
4809 struct smap_node *node;
4810
4811 SMAP_FOR_EACH (node, vips) {
4812 uint16_t port = 0;
4813
4814 /* node->key contains IP:port or just IP. */
4815 char *ip_address = NULL;
4816 ip_address_and_port_from_lb_key(node->key, &ip_address, &port);
4817 if (!ip_address) {
4818 continue;
4819 }
4820
4821 if (!sset_contains(&all_ips, ip_address)) {
4822 sset_add(&all_ips, ip_address);
4823 }
4824
4825 /* Higher priority rules are added for load-balancing in DNAT
4826 * table. For every match (on a VIP[:port]), we add two flows
4827 * via add_router_lb_flow(). One flow is for specific matching
4828 * on ct.new with an action of "ct_lb($targets);". The other
4829 * flow is for ct.est with an action of "ct_dnat;". */
4830 ds_clear(&actions);
4831 ds_put_format(&actions, "ct_lb(%s);", node->value);
4832
4833 ds_clear(&match);
4834 ds_put_format(&match, "ip && ip4.dst == %s",
4835 ip_address);
4836 free(ip_address);
4837
4838 if (port) {
4839 if (lb->protocol && !strcmp(lb->protocol, "udp")) {
4840 ds_put_format(&match, " && udp && udp.dst == %d",
4841 port);
4842 } else {
4843 ds_put_format(&match, " && tcp && tcp.dst == %d",
4844 port);
4845 }
4846 add_router_lb_flow(lflows, od, &match, &actions, 120,
4847 lb_force_snat_ip);
4848 } else {
4849 add_router_lb_flow(lflows, od, &match, &actions, 110,
4850 lb_force_snat_ip);
4851 }
4852 }
4853 }
4854
4855 /* If there are any load balancing rules, we should send the
4856 * packet to conntrack for defragmentation and tracking. This helps
4857 * with two things.
4858 *
4859 * 1. With tracking, we can send only new connections to pick a
4860 * DNAT ip address from a group.
4861 * 2. If there are L4 ports in load balancing rules, we need the
4862 * defragmentation to match on L4 ports. */
4863 const char *ip_address;
4864 SSET_FOR_EACH(ip_address, &all_ips) {
4865 ds_clear(&match);
4866 ds_put_format(&match, "ip && ip4.dst == %s", ip_address);
4867 ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG,
4868 100, ds_cstr(&match), "ct_next;");
4869 }
4870
4871 sset_destroy(&all_ips);
de297547
GS
4872 }
4873
4f6d33f3 4874 /* Logical router ingress table 5: IP Routing.
9975d7be
BP
4875 *
4876 * A packet that arrives at this table is an IP packet that should be
6fdb7cd6
JP
4877 * routed to the address in 'ip[46].dst'. This table sets outport to
4878 * the correct output port, eth.src to the output port's MAC
4879 * address, and '[xx]reg0' to the next-hop IP address (leaving
4880 * 'ip[46].dst', the packet’s final destination, unchanged), and
4881 * advances to the next table for ARP/ND resolution. */
9975d7be 4882 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 4883 if (!op->nbrp) {
9975d7be
BP
4884 continue;
4885 }
4886
4685e523
JP
4887 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
4888 add_route(lflows, op, op->lrp_networks.ipv4_addrs[i].addr_s,
4889 op->lrp_networks.ipv4_addrs[i].network_s,
440a9f4b 4890 op->lrp_networks.ipv4_addrs[i].plen, NULL, NULL);
4685e523 4891 }
6fdb7cd6
JP
4892
4893 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
4894 add_route(lflows, op, op->lrp_networks.ipv6_addrs[i].addr_s,
4895 op->lrp_networks.ipv6_addrs[i].network_s,
440a9f4b 4896 op->lrp_networks.ipv6_addrs[i].plen, NULL, NULL);
6fdb7cd6 4897 }
9975d7be 4898 }
4685e523 4899
6fdb7cd6 4900 /* Convert the static routes to flows. */
9975d7be
BP
4901 HMAP_FOR_EACH (od, key_node, datapaths) {
4902 if (!od->nbr) {
4903 continue;
4904 }
4905
28dc3fe9
SR
4906 for (int i = 0; i < od->nbr->n_static_routes; i++) {
4907 const struct nbrec_logical_router_static_route *route;
4908
4909 route = od->nbr->static_routes[i];
4910 build_static_route_flow(lflows, od, ports, route);
4911 }
9975d7be 4912 }
6fdb7cd6 4913
9975d7be
BP
4914 /* XXX destination unreachable */
4915
4f6d33f3 4916 /* Local router ingress table 6: ARP Resolution.
9975d7be
BP
4917 *
4918 * Any packet that reaches this table is an IP packet whose next-hop IP
4919 * address is in reg0. (ip4.dst is the final destination.) This table
4920 * resolves the IP address in reg0 into an output port in outport and an
4921 * Ethernet address in eth.dst. */
4922 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 4923 if (op->nbrp) {
6fdb7cd6
JP
4924 /* This is a logical router port. If next-hop IP address in
4925 * '[xx]reg0' matches IP address of this router port, then
4926 * the packet is intended to eventually be sent to this
4927 * logical port. Set the destination mac address using this
4928 * port's mac address.
509afdc3
GS
4929 *
4930 * The packet is still in peer's logical pipeline. So the match
4931 * should be on peer's outport. */
6fdb7cd6
JP
4932 if (op->peer && op->nbrp->peer) {
4933 if (op->lrp_networks.n_ipv4_addrs) {
4934 ds_clear(&match);
4935 ds_put_format(&match, "outport == %s && reg0 == ",
4936 op->peer->json_key);
4937 op_put_v4_networks(&match, op, false);
4938
4939 ds_clear(&actions);
4940 ds_put_format(&actions, "eth.dst = %s; next;",
4941 op->lrp_networks.ea_s);
4942 ovn_lflow_add(lflows, op->peer->od, S_ROUTER_IN_ARP_RESOLVE,
4943 100, ds_cstr(&match), ds_cstr(&actions));
4944 }
4685e523 4945
6fdb7cd6
JP
4946 if (op->lrp_networks.n_ipv6_addrs) {
4947 ds_clear(&match);
4948 ds_put_format(&match, "outport == %s && xxreg0 == ",
4949 op->peer->json_key);
4950 op_put_v6_networks(&match, op);
4951
4952 ds_clear(&actions);
4953 ds_put_format(&actions, "eth.dst = %s; next;",
4954 op->lrp_networks.ea_s);
4955 ovn_lflow_add(lflows, op->peer->od, S_ROUTER_IN_ARP_RESOLVE,
4956 100, ds_cstr(&match), ds_cstr(&actions));
4957 }
509afdc3 4958 }
0ee00741 4959 } else if (op->od->n_router_ports && strcmp(op->nbsp->type, "router")) {
75cf9d2b
GS
4960 /* This is a logical switch port that backs a VM or a container.
4961 * Extract its addresses. For each of the address, go through all
4962 * the router ports attached to the switch (to which this port
4963 * connects) and if the address in question is reachable from the
6fdb7cd6 4964 * router port, add an ARP/ND entry in that router's pipeline. */
75cf9d2b 4965
e93b43d6 4966 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
4685e523 4967 const char *ea_s = op->lsp_addrs[i].ea_s;
e93b43d6 4968 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
4685e523 4969 const char *ip_s = op->lsp_addrs[i].ipv4_addrs[j].addr_s;
e93b43d6 4970 for (size_t k = 0; k < op->od->n_router_ports; k++) {
80f408f4
JP
4971 /* Get the Logical_Router_Port that the
4972 * Logical_Switch_Port is connected to, as
4973 * 'peer'. */
86e98048 4974 const char *peer_name = smap_get(
0ee00741 4975 &op->od->router_ports[k]->nbsp->options,
86e98048
BP
4976 "router-port");
4977 if (!peer_name) {
4978 continue;
4979 }
4980
e93b43d6 4981 struct ovn_port *peer = ovn_port_find(ports, peer_name);
0ee00741 4982 if (!peer || !peer->nbrp) {
86e98048
BP
4983 continue;
4984 }
4985
4685e523 4986 if (!find_lrp_member_ip(peer, ip_s)) {
86e98048
BP
4987 continue;
4988 }
4989
09b39248 4990 ds_clear(&match);
e93b43d6 4991 ds_put_format(&match, "outport == %s && reg0 == %s",
4685e523
JP
4992 peer->json_key, ip_s);
4993
09b39248 4994 ds_clear(&actions);
4685e523 4995 ds_put_format(&actions, "eth.dst = %s; next;", ea_s);
86e98048 4996 ovn_lflow_add(lflows, peer->od,
09b39248
JP
4997 S_ROUTER_IN_ARP_RESOLVE, 100,
4998 ds_cstr(&match), ds_cstr(&actions));
86e98048 4999 }
9975d7be 5000 }
6fdb7cd6
JP
5001
5002 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
5003 const char *ip_s = op->lsp_addrs[i].ipv6_addrs[j].addr_s;
5004 for (size_t k = 0; k < op->od->n_router_ports; k++) {
5005 /* Get the Logical_Router_Port that the
5006 * Logical_Switch_Port is connected to, as
5007 * 'peer'. */
5008 const char *peer_name = smap_get(
5009 &op->od->router_ports[k]->nbsp->options,
5010 "router-port");
5011 if (!peer_name) {
5012 continue;
5013 }
5014
5015 struct ovn_port *peer = ovn_port_find(ports, peer_name);
5016 if (!peer || !peer->nbrp) {
5017 continue;
5018 }
5019
5020 if (!find_lrp_member_ip(peer, ip_s)) {
5021 continue;
5022 }
5023
5024 ds_clear(&match);
5025 ds_put_format(&match, "outport == %s && xxreg0 == %s",
5026 peer->json_key, ip_s);
5027
5028 ds_clear(&actions);
5029 ds_put_format(&actions, "eth.dst = %s; next;", ea_s);
5030 ovn_lflow_add(lflows, peer->od,
5031 S_ROUTER_IN_ARP_RESOLVE, 100,
5032 ds_cstr(&match), ds_cstr(&actions));
5033 }
5034 }
9975d7be 5035 }
0ee00741 5036 } else if (!strcmp(op->nbsp->type, "router")) {
75cf9d2b
GS
5037 /* This is a logical switch port that connects to a router. */
5038
5039 /* The peer of this switch port is the router port for which
5040 * we need to add logical flows such that it can resolve
5041 * ARP entries for all the other router ports connected to
5042 * the switch in question. */
5043
0ee00741 5044 const char *peer_name = smap_get(&op->nbsp->options,
75cf9d2b
GS
5045 "router-port");
5046 if (!peer_name) {
5047 continue;
5048 }
5049
5050 struct ovn_port *peer = ovn_port_find(ports, peer_name);
0ee00741 5051 if (!peer || !peer->nbrp) {
75cf9d2b
GS
5052 continue;
5053 }
5054
4685e523 5055 for (size_t i = 0; i < op->od->n_router_ports; i++) {
75cf9d2b 5056 const char *router_port_name = smap_get(
0ee00741 5057 &op->od->router_ports[i]->nbsp->options,
75cf9d2b
GS
5058 "router-port");
5059 struct ovn_port *router_port = ovn_port_find(ports,
5060 router_port_name);
0ee00741 5061 if (!router_port || !router_port->nbrp) {
75cf9d2b
GS
5062 continue;
5063 }
5064
5065 /* Skip the router port under consideration. */
5066 if (router_port == peer) {
5067 continue;
5068 }
5069
6fdb7cd6
JP
5070 if (router_port->lrp_networks.n_ipv4_addrs) {
5071 ds_clear(&match);
5072 ds_put_format(&match, "outport == %s && reg0 == ",
5073 peer->json_key);
5074 op_put_v4_networks(&match, router_port, false);
5075
5076 ds_clear(&actions);
5077 ds_put_format(&actions, "eth.dst = %s; next;",
5078 router_port->lrp_networks.ea_s);
5079 ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE,
5080 100, ds_cstr(&match), ds_cstr(&actions));
5081 }
4685e523 5082
6fdb7cd6
JP
5083 if (router_port->lrp_networks.n_ipv6_addrs) {
5084 ds_clear(&match);
5085 ds_put_format(&match, "outport == %s && xxreg0 == ",
5086 peer->json_key);
5087 op_put_v6_networks(&match, router_port);
5088
5089 ds_clear(&actions);
5090 ds_put_format(&actions, "eth.dst = %s; next;",
5091 router_port->lrp_networks.ea_s);
5092 ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE,
5093 100, ds_cstr(&match), ds_cstr(&actions));
5094 }
75cf9d2b 5095 }
9975d7be
BP
5096 }
5097 }
75cf9d2b 5098
0bac7164
BP
5099 HMAP_FOR_EACH (od, key_node, datapaths) {
5100 if (!od->nbr) {
5101 continue;
5102 }
5103
c34a87b6
JP
5104 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip4",
5105 "get_arp(outport, reg0); next;");
5106
5107 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip6",
5108 "get_nd(outport, xxreg0); next;");
0bac7164
BP
5109 }
5110
41a15b71
MS
5111 /* Logical router ingress table 7: Gateway redirect.
5112 *
5113 * For traffic with outport equal to the l3dgw_port
5114 * on a distributed router, this table redirects a subset
5115 * of the traffic to the l3redirect_port which represents
5116 * the central instance of the l3dgw_port.
5117 */
5118 HMAP_FOR_EACH (od, key_node, datapaths) {
5119 if (!od->nbr) {
5120 continue;
5121 }
5122 if (od->l3dgw_port && od->l3redirect_port) {
5123 /* For traffic with outport == l3dgw_port, if the
5124 * packet did not match any higher priority redirect
5125 * rule, then the traffic is redirected to the central
5126 * instance of the l3dgw_port. */
5127 ds_clear(&match);
5128 ds_put_format(&match, "outport == %s",
5129 od->l3dgw_port->json_key);
5130 ds_clear(&actions);
5131 ds_put_format(&actions, "outport = %s; next;",
5132 od->l3redirect_port->json_key);
5133 ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 50,
5134 ds_cstr(&match), ds_cstr(&actions));
5135
5136 /* If the Ethernet destination has not been resolved,
5137 * redirect to the central instance of the l3dgw_port.
5138 * Such traffic will be replaced by an ARP request or ND
5139 * Neighbor Solicitation in the ARP request ingress
5140 * table, before being redirected to the central instance.
5141 */
5142 ds_put_format(&match, " && eth.dst == 00:00:00:00:00:00");
5143 ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 150,
5144 ds_cstr(&match), ds_cstr(&actions));
5145 }
5146
5147 /* Packets are allowed by default. */
5148 ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 0, "1", "next;");
5149 }
5150
5151 /* Local router ingress table 8: ARP request.
0bac7164
BP
5152 *
5153 * In the common case where the Ethernet destination has been resolved,
94300e09
JP
5154 * this table outputs the packet (priority 0). Otherwise, it composes
5155 * and sends an ARP request (priority 100). */
0bac7164
BP
5156 HMAP_FOR_EACH (od, key_node, datapaths) {
5157 if (!od->nbr) {
5158 continue;
5159 }
5160
5161 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100,
5162 "eth.dst == 00:00:00:00:00:00",
5163 "arp { "
5164 "eth.dst = ff:ff:ff:ff:ff:ff; "
5165 "arp.spa = reg1; "
47021598 5166 "arp.tpa = reg0; "
0bac7164
BP
5167 "arp.op = 1; " /* ARP request */
5168 "output; "
5169 "};");
5170 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1", "output;");
5171 }
9975d7be 5172
de297547 5173 /* Logical router egress table 1: Delivery (priority 100).
9975d7be
BP
5174 *
5175 * Priority 100 rules deliver packets to enabled logical ports. */
5176 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 5177 if (!op->nbrp) {
9975d7be
BP
5178 continue;
5179 }
5180
0ee00741 5181 if (!lrport_is_enabled(op->nbrp)) {
9975d7be
BP
5182 /* Drop packets to disabled logical ports (since logical flow
5183 * tables are default-drop). */
5184 continue;
5185 }
5186
41a15b71
MS
5187 if (op->derived) {
5188 /* No egress packets should be processed in the context of
5189 * a chassisredirect port. The chassisredirect port should
5190 * be replaced by the l3dgw port in the local output
5191 * pipeline stage before egress processing. */
5192 continue;
5193 }
5194
09b39248
JP
5195 ds_clear(&match);
5196 ds_put_format(&match, "outport == %s", op->json_key);
9975d7be 5197 ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100,
09b39248 5198 ds_cstr(&match), "output;");
9975d7be 5199 }
09b39248
JP
5200
5201 ds_destroy(&match);
5202 ds_destroy(&actions);
9975d7be
BP
5203}
5204
5205/* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
5206 * constructing their contents based on the OVN_NB database. */
5207static void
5208build_lflows(struct northd_context *ctx, struct hmap *datapaths,
5209 struct hmap *ports)
5210{
5211 struct hmap lflows = HMAP_INITIALIZER(&lflows);
5212 struct hmap mcgroups = HMAP_INITIALIZER(&mcgroups);
5213
5214 build_lswitch_flows(datapaths, ports, &lflows, &mcgroups);
5215 build_lrouter_flows(datapaths, ports, &lflows);
5216
5868eb24
BP
5217 /* Push changes to the Logical_Flow table to database. */
5218 const struct sbrec_logical_flow *sbflow, *next_sbflow;
5219 SBREC_LOGICAL_FLOW_FOR_EACH_SAFE (sbflow, next_sbflow, ctx->ovnsb_idl) {
5220 struct ovn_datapath *od
5221 = ovn_datapath_from_sbrec(datapaths, sbflow->logical_datapath);
5222 if (!od) {
5223 sbrec_logical_flow_delete(sbflow);
5224 continue;
eb00399e 5225 }
eb00399e 5226
9975d7be 5227 enum ovn_datapath_type dp_type = od->nbs ? DP_SWITCH : DP_ROUTER;
880fcd14
BP
5228 enum ovn_pipeline pipeline
5229 = !strcmp(sbflow->pipeline, "ingress") ? P_IN : P_OUT;
5868eb24 5230 struct ovn_lflow *lflow = ovn_lflow_find(
880fcd14
BP
5231 &lflows, od, ovn_stage_build(dp_type, pipeline, sbflow->table_id),
5232 sbflow->priority, sbflow->match, sbflow->actions);
5868eb24
BP
5233 if (lflow) {
5234 ovn_lflow_destroy(&lflows, lflow);
5235 } else {
5236 sbrec_logical_flow_delete(sbflow);
4edcdcf4
RB
5237 }
5238 }
5868eb24
BP
5239 struct ovn_lflow *lflow, *next_lflow;
5240 HMAP_FOR_EACH_SAFE (lflow, next_lflow, hmap_node, &lflows) {
880fcd14
BP
5241 enum ovn_pipeline pipeline = ovn_stage_get_pipeline(lflow->stage);
5242 uint8_t table = ovn_stage_get_table(lflow->stage);
5243
5868eb24
BP
5244 sbflow = sbrec_logical_flow_insert(ctx->ovnsb_txn);
5245 sbrec_logical_flow_set_logical_datapath(sbflow, lflow->od->sb);
9975d7be
BP
5246 sbrec_logical_flow_set_pipeline(
5247 sbflow, pipeline == P_IN ? "ingress" : "egress");
880fcd14 5248 sbrec_logical_flow_set_table_id(sbflow, table);
5868eb24
BP
5249 sbrec_logical_flow_set_priority(sbflow, lflow->priority);
5250 sbrec_logical_flow_set_match(sbflow, lflow->match);
5251 sbrec_logical_flow_set_actions(sbflow, lflow->actions);
091e3af9 5252
d8026bbf
BP
5253 /* Trim the source locator lflow->where, which looks something like
5254 * "ovn/northd/ovn-northd.c:1234", down to just the part following the
5255 * last slash, e.g. "ovn-northd.c:1234". */
5256 const char *slash = strrchr(lflow->where, '/');
5257#if _WIN32
5258 const char *backslash = strrchr(lflow->where, '\\');
5259 if (!slash || backslash > slash) {
5260 slash = backslash;
5261 }
5262#endif
5263 const char *where = slash ? slash + 1 : lflow->where;
5264
5265 const struct smap ids = SMAP_CONST2(
5266 &ids,
5267 "stage-name", ovn_stage_to_str(lflow->stage),
5268 "source", where);
aaf881c6 5269 sbrec_logical_flow_set_external_ids(sbflow, &ids);
091e3af9 5270
5868eb24 5271 ovn_lflow_destroy(&lflows, lflow);
eb00399e 5272 }
5868eb24
BP
5273 hmap_destroy(&lflows);
5274
5275 /* Push changes to the Multicast_Group table to database. */
5276 const struct sbrec_multicast_group *sbmc, *next_sbmc;
5277 SBREC_MULTICAST_GROUP_FOR_EACH_SAFE (sbmc, next_sbmc, ctx->ovnsb_idl) {
5278 struct ovn_datapath *od = ovn_datapath_from_sbrec(datapaths,
5279 sbmc->datapath);
5280 if (!od) {
5281 sbrec_multicast_group_delete(sbmc);
5282 continue;
5283 }
eb00399e 5284
5868eb24
BP
5285 struct multicast_group group = { .name = sbmc->name,
5286 .key = sbmc->tunnel_key };
5287 struct ovn_multicast *mc = ovn_multicast_find(&mcgroups, od, &group);
5288 if (mc) {
5289 ovn_multicast_update_sbrec(mc, sbmc);
5290 ovn_multicast_destroy(&mcgroups, mc);
5291 } else {
5292 sbrec_multicast_group_delete(sbmc);
5293 }
5294 }
5295 struct ovn_multicast *mc, *next_mc;
5296 HMAP_FOR_EACH_SAFE (mc, next_mc, hmap_node, &mcgroups) {
5297 sbmc = sbrec_multicast_group_insert(ctx->ovnsb_txn);
5298 sbrec_multicast_group_set_datapath(sbmc, mc->datapath->sb);
5299 sbrec_multicast_group_set_name(sbmc, mc->group->name);
5300 sbrec_multicast_group_set_tunnel_key(sbmc, mc->group->key);
5301 ovn_multicast_update_sbrec(mc, sbmc);
5302 ovn_multicast_destroy(&mcgroups, mc);
4edcdcf4 5303 }
5868eb24 5304 hmap_destroy(&mcgroups);
4edcdcf4 5305}
ea382567
RB
5306
5307/* OVN_Northbound and OVN_Southbound have an identical Address_Set table.
5308 * We always update OVN_Southbound to match the current data in
5309 * OVN_Northbound, so that the address sets used in Logical_Flows in
5310 * OVN_Southbound is checked against the proper set.*/
5311static void
5312sync_address_sets(struct northd_context *ctx)
5313{
5314 struct shash sb_address_sets = SHASH_INITIALIZER(&sb_address_sets);
5315
5316 const struct sbrec_address_set *sb_address_set;
5317 SBREC_ADDRESS_SET_FOR_EACH (sb_address_set, ctx->ovnsb_idl) {
5318 shash_add(&sb_address_sets, sb_address_set->name, sb_address_set);
5319 }
5320
5321 const struct nbrec_address_set *nb_address_set;
5322 NBREC_ADDRESS_SET_FOR_EACH (nb_address_set, ctx->ovnnb_idl) {
5323 sb_address_set = shash_find_and_delete(&sb_address_sets,
5324 nb_address_set->name);
5325 if (!sb_address_set) {
5326 sb_address_set = sbrec_address_set_insert(ctx->ovnsb_txn);
5327 sbrec_address_set_set_name(sb_address_set, nb_address_set->name);
5328 }
5329
5330 sbrec_address_set_set_addresses(sb_address_set,
5331 /* "char **" is not compatible with "const char **" */
5332 (const char **) nb_address_set->addresses,
5333 nb_address_set->n_addresses);
5334 }
5335
5336 struct shash_node *node, *next;
5337 SHASH_FOR_EACH_SAFE (node, next, &sb_address_sets) {
5338 sbrec_address_set_delete(node->data);
5339 shash_delete(&sb_address_sets, node);
5340 }
5341 shash_destroy(&sb_address_sets);
5342}
5868eb24 5343\f
4edcdcf4 5344static void
fa183acc 5345ovnnb_db_run(struct northd_context *ctx, struct ovsdb_idl_loop *sb_loop)
4edcdcf4 5346{
b511690b 5347 if (!ctx->ovnsb_txn || !ctx->ovnnb_txn) {
331e7aef
NS
5348 return;
5349 }
5868eb24
BP
5350 struct hmap datapaths, ports;
5351 build_datapaths(ctx, &datapaths);
5352 build_ports(ctx, &datapaths, &ports);
b511690b 5353 build_ipam(&datapaths, &ports);
5868eb24
BP
5354 build_lflows(ctx, &datapaths, &ports);
5355
ea382567
RB
5356 sync_address_sets(ctx);
5357
5868eb24
BP
5358 struct ovn_datapath *dp, *next_dp;
5359 HMAP_FOR_EACH_SAFE (dp, next_dp, key_node, &datapaths) {
5360 ovn_datapath_destroy(&datapaths, dp);
5361 }
5362 hmap_destroy(&datapaths);
5363
5364 struct ovn_port *port, *next_port;
5365 HMAP_FOR_EACH_SAFE (port, next_port, key_node, &ports) {
5366 ovn_port_destroy(&ports, port);
5367 }
5368 hmap_destroy(&ports);
fa183acc
BP
5369
5370 /* Copy nb_cfg from northbound to southbound database.
5371 *
5372 * Also set up to update sb_cfg once our southbound transaction commits. */
5373 const struct nbrec_nb_global *nb = nbrec_nb_global_first(ctx->ovnnb_idl);
14338f22
GS
5374 if (!nb) {
5375 nb = nbrec_nb_global_insert(ctx->ovnnb_txn);
5376 }
fa183acc 5377 const struct sbrec_sb_global *sb = sbrec_sb_global_first(ctx->ovnsb_idl);
14338f22
GS
5378 if (!sb) {
5379 sb = sbrec_sb_global_insert(ctx->ovnsb_txn);
fa183acc 5380 }
14338f22
GS
5381 sbrec_sb_global_set_nb_cfg(sb, nb->nb_cfg);
5382 sb_loop->next_cfg = nb->nb_cfg;
8639f9be
ND
5383
5384 cleanup_macam(&macam);
ac0630a2
RB
5385}
5386
fa183acc
BP
5387/* Handle changes to the 'chassis' column of the 'Port_Binding' table. When
5388 * this column is not empty, it means we need to set the corresponding logical
5389 * port as 'up' in the northbound DB. */
ac0630a2 5390static void
fa183acc 5391update_logical_port_status(struct northd_context *ctx)
ac0630a2 5392{
fc3113bc 5393 struct hmap lports_hmap;
5868eb24 5394 const struct sbrec_port_binding *sb;
0ee00741 5395 const struct nbrec_logical_switch_port *nbsp;
fc3113bc
RB
5396
5397 struct lport_hash_node {
5398 struct hmap_node node;
0ee00741 5399 const struct nbrec_logical_switch_port *nbsp;
4ec3d7c7 5400 } *hash_node;
f93818dd 5401
fc3113bc 5402 hmap_init(&lports_hmap);
f93818dd 5403
0ee00741 5404 NBREC_LOGICAL_SWITCH_PORT_FOR_EACH(nbsp, ctx->ovnnb_idl) {
fc3113bc 5405 hash_node = xzalloc(sizeof *hash_node);
0ee00741
HK
5406 hash_node->nbsp = nbsp;
5407 hmap_insert(&lports_hmap, &hash_node->node, hash_string(nbsp->name, 0));
fc3113bc
RB
5408 }
5409
5868eb24 5410 SBREC_PORT_BINDING_FOR_EACH(sb, ctx->ovnsb_idl) {
0ee00741 5411 nbsp = NULL;
fc3113bc 5412 HMAP_FOR_EACH_WITH_HASH(hash_node, node,
5868eb24
BP
5413 hash_string(sb->logical_port, 0),
5414 &lports_hmap) {
0ee00741
HK
5415 if (!strcmp(sb->logical_port, hash_node->nbsp->name)) {
5416 nbsp = hash_node->nbsp;
fc3113bc
RB
5417 break;
5418 }
f93818dd
RB
5419 }
5420
0ee00741 5421 if (!nbsp) {
dcda6e0d 5422 /* The logical port doesn't exist for this port binding. This can
2e2762d4 5423 * happen under normal circumstances when ovn-northd hasn't gotten
dcda6e0d 5424 * around to pruning the Port_Binding yet. */
f93818dd
RB
5425 continue;
5426 }
5427
0ee00741 5428 if (sb->chassis && (!nbsp->up || !*nbsp->up)) {
f93818dd 5429 bool up = true;
0ee00741
HK
5430 nbrec_logical_switch_port_set_up(nbsp, &up, 1);
5431 } else if (!sb->chassis && (!nbsp->up || *nbsp->up)) {
f93818dd 5432 bool up = false;
0ee00741 5433 nbrec_logical_switch_port_set_up(nbsp, &up, 1);
f93818dd
RB
5434 }
5435 }
fc3113bc 5436
4ec3d7c7 5437 HMAP_FOR_EACH_POP(hash_node, node, &lports_hmap) {
fc3113bc
RB
5438 free(hash_node);
5439 }
5440 hmap_destroy(&lports_hmap);
ac0630a2 5441}
45f98d4c 5442
281977f7
NS
5443static struct dhcp_opts_map supported_dhcp_opts[] = {
5444 OFFERIP,
5445 DHCP_OPT_NETMASK,
5446 DHCP_OPT_ROUTER,
5447 DHCP_OPT_DNS_SERVER,
5448 DHCP_OPT_LOG_SERVER,
5449 DHCP_OPT_LPR_SERVER,
5450 DHCP_OPT_SWAP_SERVER,
5451 DHCP_OPT_POLICY_FILTER,
5452 DHCP_OPT_ROUTER_SOLICITATION,
5453 DHCP_OPT_NIS_SERVER,
5454 DHCP_OPT_NTP_SERVER,
5455 DHCP_OPT_SERVER_ID,
5456 DHCP_OPT_TFTP_SERVER,
5457 DHCP_OPT_CLASSLESS_STATIC_ROUTE,
5458 DHCP_OPT_MS_CLASSLESS_STATIC_ROUTE,
5459 DHCP_OPT_IP_FORWARD_ENABLE,
5460 DHCP_OPT_ROUTER_DISCOVERY,
5461 DHCP_OPT_ETHERNET_ENCAP,
5462 DHCP_OPT_DEFAULT_TTL,
5463 DHCP_OPT_TCP_TTL,
5464 DHCP_OPT_MTU,
5465 DHCP_OPT_LEASE_TIME,
5466 DHCP_OPT_T1,
5467 DHCP_OPT_T2
5468};
5469
33ac3c83
NS
5470static struct dhcp_opts_map supported_dhcpv6_opts[] = {
5471 DHCPV6_OPT_IA_ADDR,
5472 DHCPV6_OPT_SERVER_ID,
5473 DHCPV6_OPT_DOMAIN_SEARCH,
5474 DHCPV6_OPT_DNS_SERVER
5475};
5476
281977f7
NS
5477static void
5478check_and_add_supported_dhcp_opts_to_sb_db(struct northd_context *ctx)
5479{
5480 struct hmap dhcp_opts_to_add = HMAP_INITIALIZER(&dhcp_opts_to_add);
5481 for (size_t i = 0; (i < sizeof(supported_dhcp_opts) /
5482 sizeof(supported_dhcp_opts[0])); i++) {
5483 hmap_insert(&dhcp_opts_to_add, &supported_dhcp_opts[i].hmap_node,
5484 dhcp_opt_hash(supported_dhcp_opts[i].name));
5485 }
5486
5487 const struct sbrec_dhcp_options *opt_row, *opt_row_next;
5488 SBREC_DHCP_OPTIONS_FOR_EACH_SAFE(opt_row, opt_row_next, ctx->ovnsb_idl) {
5489 struct dhcp_opts_map *dhcp_opt =
5490 dhcp_opts_find(&dhcp_opts_to_add, opt_row->name);
5491 if (dhcp_opt) {
5492 hmap_remove(&dhcp_opts_to_add, &dhcp_opt->hmap_node);
5493 } else {
5494 sbrec_dhcp_options_delete(opt_row);
5495 }
5496 }
5497
5498 struct dhcp_opts_map *opt;
5499 HMAP_FOR_EACH (opt, hmap_node, &dhcp_opts_to_add) {
5500 struct sbrec_dhcp_options *sbrec_dhcp_option =
5501 sbrec_dhcp_options_insert(ctx->ovnsb_txn);
5502 sbrec_dhcp_options_set_name(sbrec_dhcp_option, opt->name);
5503 sbrec_dhcp_options_set_code(sbrec_dhcp_option, opt->code);
5504 sbrec_dhcp_options_set_type(sbrec_dhcp_option, opt->type);
5505 }
5506
5507 hmap_destroy(&dhcp_opts_to_add);
5508}
5509
33ac3c83
NS
5510static void
5511check_and_add_supported_dhcpv6_opts_to_sb_db(struct northd_context *ctx)
5512{
5513 struct hmap dhcpv6_opts_to_add = HMAP_INITIALIZER(&dhcpv6_opts_to_add);
5514 for (size_t i = 0; (i < sizeof(supported_dhcpv6_opts) /
5515 sizeof(supported_dhcpv6_opts[0])); i++) {
5516 hmap_insert(&dhcpv6_opts_to_add, &supported_dhcpv6_opts[i].hmap_node,
5517 dhcp_opt_hash(supported_dhcpv6_opts[i].name));
5518 }
5519
5520 const struct sbrec_dhcpv6_options *opt_row, *opt_row_next;
5521 SBREC_DHCPV6_OPTIONS_FOR_EACH_SAFE(opt_row, opt_row_next, ctx->ovnsb_idl) {
5522 struct dhcp_opts_map *dhcp_opt =
5523 dhcp_opts_find(&dhcpv6_opts_to_add, opt_row->name);
5524 if (dhcp_opt) {
5525 hmap_remove(&dhcpv6_opts_to_add, &dhcp_opt->hmap_node);
5526 } else {
5527 sbrec_dhcpv6_options_delete(opt_row);
5528 }
5529 }
5530
5531 struct dhcp_opts_map *opt;
5532 HMAP_FOR_EACH(opt, hmap_node, &dhcpv6_opts_to_add) {
5533 struct sbrec_dhcpv6_options *sbrec_dhcpv6_option =
5534 sbrec_dhcpv6_options_insert(ctx->ovnsb_txn);
5535 sbrec_dhcpv6_options_set_name(sbrec_dhcpv6_option, opt->name);
5536 sbrec_dhcpv6_options_set_code(sbrec_dhcpv6_option, opt->code);
5537 sbrec_dhcpv6_options_set_type(sbrec_dhcpv6_option, opt->type);
5538 }
5539
5540 hmap_destroy(&dhcpv6_opts_to_add);
5541}
5542
fa183acc
BP
5543/* Updates the sb_cfg and hv_cfg columns in the northbound NB_Global table. */
5544static void
5545update_northbound_cfg(struct northd_context *ctx,
5546 struct ovsdb_idl_loop *sb_loop)
5547{
5548 /* Update northbound sb_cfg if appropriate. */
5549 const struct nbrec_nb_global *nbg = nbrec_nb_global_first(ctx->ovnnb_idl);
5550 int64_t sb_cfg = sb_loop->cur_cfg;
5551 if (nbg && sb_cfg && nbg->sb_cfg != sb_cfg) {
5552 nbrec_nb_global_set_sb_cfg(nbg, sb_cfg);
5553 }
5554
5555 /* Update northbound hv_cfg if appropriate. */
5556 if (nbg) {
5557 /* Find minimum nb_cfg among all chassis. */
5558 const struct sbrec_chassis *chassis;
5559 int64_t hv_cfg = nbg->nb_cfg;
5560 SBREC_CHASSIS_FOR_EACH (chassis, ctx->ovnsb_idl) {
5561 if (chassis->nb_cfg < hv_cfg) {
5562 hv_cfg = chassis->nb_cfg;
5563 }
5564 }
5565
5566 /* Update hv_cfg. */
5567 if (nbg->hv_cfg != hv_cfg) {
5568 nbrec_nb_global_set_hv_cfg(nbg, hv_cfg);
5569 }
5570 }
5571}
5572
5573/* Handle a fairly small set of changes in the southbound database. */
5574static void
5575ovnsb_db_run(struct northd_context *ctx, struct ovsdb_idl_loop *sb_loop)
5576{
5577 if (!ctx->ovnnb_txn || !ovsdb_idl_has_ever_connected(ctx->ovnsb_idl)) {
5578 return;
5579 }
5580
5581 update_logical_port_status(ctx);
5582 update_northbound_cfg(ctx, sb_loop);
5583}
5584\f
ac0630a2
RB
5585static void
5586parse_options(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
5587{
5588 enum {
67d9b930 5589 DAEMON_OPTION_ENUMS,
ac0630a2 5590 VLOG_OPTION_ENUMS,
e18a1d08 5591 SSL_OPTION_ENUMS,
ac0630a2
RB
5592 };
5593 static const struct option long_options[] = {
ec78987f 5594 {"ovnsb-db", required_argument, NULL, 'd'},
ac0630a2
RB
5595 {"ovnnb-db", required_argument, NULL, 'D'},
5596 {"help", no_argument, NULL, 'h'},
5597 {"options", no_argument, NULL, 'o'},
5598 {"version", no_argument, NULL, 'V'},
67d9b930 5599 DAEMON_LONG_OPTIONS,
ac0630a2
RB
5600 VLOG_LONG_OPTIONS,
5601 STREAM_SSL_LONG_OPTIONS,
5602 {NULL, 0, NULL, 0},
5603 };
5604 char *short_options = ovs_cmdl_long_options_to_short_options(long_options);
5605
5606 for (;;) {
5607 int c;
5608
5609 c = getopt_long(argc, argv, short_options, long_options, NULL);
5610 if (c == -1) {
5611 break;
5612 }
5613
5614 switch (c) {
67d9b930 5615 DAEMON_OPTION_HANDLERS;
ac0630a2
RB
5616 VLOG_OPTION_HANDLERS;
5617 STREAM_SSL_OPTION_HANDLERS;
5618
5619 case 'd':
ec78987f 5620 ovnsb_db = optarg;
ac0630a2
RB
5621 break;
5622
5623 case 'D':
5624 ovnnb_db = optarg;
5625 break;
5626
5627 case 'h':
5628 usage();
5629 exit(EXIT_SUCCESS);
5630
5631 case 'o':
5632 ovs_cmdl_print_options(long_options);
5633 exit(EXIT_SUCCESS);
5634
5635 case 'V':
5636 ovs_print_version(0, 0);
5637 exit(EXIT_SUCCESS);
5638
5639 default:
5640 break;
5641 }
5642 }
5643
ec78987f 5644 if (!ovnsb_db) {
60bdd011 5645 ovnsb_db = default_sb_db();
ac0630a2
RB
5646 }
5647
5648 if (!ovnnb_db) {
60bdd011 5649 ovnnb_db = default_nb_db();
ac0630a2
RB
5650 }
5651
5652 free(short_options);
5653}
5654
5868eb24
BP
5655static void
5656add_column_noalert(struct ovsdb_idl *idl,
5657 const struct ovsdb_idl_column *column)
5658{
5659 ovsdb_idl_add_column(idl, column);
5660 ovsdb_idl_omit_alert(idl, column);
5661}
5662
ac0630a2
RB
5663int
5664main(int argc, char *argv[])
5665{
ac0630a2 5666 int res = EXIT_SUCCESS;
7b303ff9
AW
5667 struct unixctl_server *unixctl;
5668 int retval;
5669 bool exiting;
ac0630a2
RB
5670
5671 fatal_ignore_sigpipe();
3dada172 5672 ovs_cmdl_proctitle_init(argc, argv);
ac0630a2 5673 set_program_name(argv[0]);
485f0696 5674 service_start(&argc, &argv);
ac0630a2 5675 parse_options(argc, argv);
67d9b930 5676
e91b927d 5677 daemonize_start(false);
7b303ff9
AW
5678
5679 retval = unixctl_server_create(NULL, &unixctl);
5680 if (retval) {
5681 exit(EXIT_FAILURE);
5682 }
5683 unixctl_command_register("exit", "", 0, 0, ovn_northd_exit, &exiting);
5684
5685 daemonize_complete();
67d9b930 5686
fa183acc 5687 /* We want to detect (almost) all changes to the ovn-nb db. */
331e7aef
NS
5688 struct ovsdb_idl_loop ovnnb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
5689 ovsdb_idl_create(ovnnb_db, &nbrec_idl_class, true, true));
fa183acc
BP
5690 ovsdb_idl_omit_alert(ovnnb_idl_loop.idl, &nbrec_nb_global_col_sb_cfg);
5691 ovsdb_idl_omit_alert(ovnnb_idl_loop.idl, &nbrec_nb_global_col_hv_cfg);
331e7aef 5692
fa183acc 5693 /* We want to detect only selected changes to the ovn-sb db. */
331e7aef
NS
5694 struct ovsdb_idl_loop ovnsb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
5695 ovsdb_idl_create(ovnsb_db, &sbrec_idl_class, false, true));
5696
fa183acc
BP
5697 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_sb_global);
5698 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_sb_global_col_nb_cfg);
5699
331e7aef
NS
5700 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_logical_flow);
5701 add_column_noalert(ovnsb_idl_loop.idl,
5702 &sbrec_logical_flow_col_logical_datapath);
5703 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_pipeline);
5704 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_table_id);
5705 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_priority);
5706 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_match);
5707 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_actions);
5708
5709 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_multicast_group);
5710 add_column_noalert(ovnsb_idl_loop.idl,
5711 &sbrec_multicast_group_col_datapath);
5712 add_column_noalert(ovnsb_idl_loop.idl,
5713 &sbrec_multicast_group_col_tunnel_key);
5714 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_name);
5715 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_ports);
5716
5717 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_datapath_binding);
5718 add_column_noalert(ovnsb_idl_loop.idl,
5719 &sbrec_datapath_binding_col_tunnel_key);
5720 add_column_noalert(ovnsb_idl_loop.idl,
5721 &sbrec_datapath_binding_col_external_ids);
5722
5723 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_port_binding);
5724 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_datapath);
5725 add_column_noalert(ovnsb_idl_loop.idl,
5726 &sbrec_port_binding_col_logical_port);
5727 add_column_noalert(ovnsb_idl_loop.idl,
5728 &sbrec_port_binding_col_tunnel_key);
5729 add_column_noalert(ovnsb_idl_loop.idl,
5730 &sbrec_port_binding_col_parent_port);
5731 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_tag);
5732 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_type);
5733 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_options);
5734 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_mac);
f40c5588
MS
5735 add_column_noalert(ovnsb_idl_loop.idl,
5736 &sbrec_port_binding_col_nat_addresses);
331e7aef 5737 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_port_binding_col_chassis);
6e31816f
CSV
5738 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_mac_binding);
5739 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_datapath);
5740 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_ip);
5741 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_mac);
5742 add_column_noalert(ovnsb_idl_loop.idl,
5743 &sbrec_mac_binding_col_logical_port);
281977f7
NS
5744 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_dhcp_options);
5745 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_code);
5746 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_type);
5747 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_name);
33ac3c83
NS
5748 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_dhcpv6_options);
5749 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_code);
5750 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_type);
5751 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_name);
ea382567
RB
5752 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_address_set);
5753 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_name);
5754 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_addresses);
5755
fa183acc
BP
5756 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_chassis);
5757 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_nb_cfg);
5758
331e7aef 5759 /* Main loop. */
7b303ff9
AW
5760 exiting = false;
5761 while (!exiting) {
331e7aef
NS
5762 struct northd_context ctx = {
5763 .ovnnb_idl = ovnnb_idl_loop.idl,
5764 .ovnnb_txn = ovsdb_idl_loop_run(&ovnnb_idl_loop),
5765 .ovnsb_idl = ovnsb_idl_loop.idl,
5766 .ovnsb_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop),
5767 };
ac0630a2 5768
fa183acc
BP
5769 ovnnb_db_run(&ctx, &ovnsb_idl_loop);
5770 ovnsb_db_run(&ctx, &ovnsb_idl_loop);
281977f7
NS
5771 if (ctx.ovnsb_txn) {
5772 check_and_add_supported_dhcp_opts_to_sb_db(&ctx);
33ac3c83 5773 check_and_add_supported_dhcpv6_opts_to_sb_db(&ctx);
281977f7 5774 }
f93818dd 5775
331e7aef
NS
5776 unixctl_server_run(unixctl);
5777 unixctl_server_wait(unixctl);
5778 if (exiting) {
5779 poll_immediate_wake();
ac0630a2 5780 }
331e7aef
NS
5781 ovsdb_idl_loop_commit_and_wait(&ovnnb_idl_loop);
5782 ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);
ac0630a2 5783
331e7aef 5784 poll_block();
485f0696
GS
5785 if (should_service_stop()) {
5786 exiting = true;
5787 }
ac0630a2
RB
5788 }
5789
7b303ff9 5790 unixctl_server_destroy(unixctl);
331e7aef
NS
5791 ovsdb_idl_loop_destroy(&ovnnb_idl_loop);
5792 ovsdb_idl_loop_destroy(&ovnsb_idl_loop);
485f0696 5793 service_stop();
ac0630a2
RB
5794
5795 exit(res);
5796}
7b303ff9
AW
5797
5798static void
5799ovn_northd_exit(struct unixctl_conn *conn, int argc OVS_UNUSED,
5800 const char *argv[] OVS_UNUSED, void *exiting_)
5801{
5802 bool *exiting = exiting_;
5803 *exiting = true;
5804
5805 unixctl_command_reply(conn, NULL);
5806}