]> git.proxmox.com Git - mirror_ovs.git/blame - ovn/northd/ovn-northd.c
INSTALL: Recommend starting daemons with --log-file.
[mirror_ovs.git] / ovn / northd / ovn-northd.c
CommitLineData
ac0630a2
RB
1/*
2 * Licensed under the Apache License, Version 2.0 (the "License");
3 * you may not use this file except in compliance with the License.
4 * You may obtain a copy of the License at:
5 *
6 * http://www.apache.org/licenses/LICENSE-2.0
7 *
8 * Unless required by applicable law or agreed to in writing, software
9 * distributed under the License is distributed on an "AS IS" BASIS,
10 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 * See the License for the specific language governing permissions and
12 * limitations under the License.
13 */
14
15#include <config.h>
16
17#include <getopt.h>
18#include <stdlib.h>
19#include <stdio.h>
20
b511690b 21#include "bitmap.h"
ac0630a2 22#include "command-line.h"
67d9b930 23#include "daemon.h"
ac0630a2 24#include "dirs.h"
3e8a2ad1 25#include "openvswitch/dynamic-string.h"
ac0630a2 26#include "fatal-signal.h"
4edcdcf4 27#include "hash.h"
ee89ea7b
TW
28#include "openvswitch/hmap.h"
29#include "openvswitch/json.h"
8b2ed684 30#include "ovn/lex.h"
281977f7 31#include "ovn/lib/ovn-dhcp.h"
e3df8838
BP
32#include "ovn/lib/ovn-nb-idl.h"
33#include "ovn/lib/ovn-sb-idl.h"
218351dd 34#include "ovn/lib/ovn-util.h"
a6095f81 35#include "ovn/actions.h"
064d7f84 36#include "packets.h"
ac0630a2 37#include "poll-loop.h"
5868eb24 38#include "smap.h"
7a15be69 39#include "sset.h"
ac0630a2
RB
40#include "stream.h"
41#include "stream-ssl.h"
7b303ff9 42#include "unixctl.h"
ac0630a2 43#include "util.h"
4edcdcf4 44#include "uuid.h"
ac0630a2
RB
45#include "openvswitch/vlog.h"
46
2e2762d4 47VLOG_DEFINE_THIS_MODULE(ovn_northd);
ac0630a2 48
7b303ff9
AW
49static unixctl_cb_func ovn_northd_exit;
50
2e2762d4 51struct northd_context {
f93818dd 52 struct ovsdb_idl *ovnnb_idl;
ec78987f 53 struct ovsdb_idl *ovnsb_idl;
f93818dd 54 struct ovsdb_idl_txn *ovnnb_txn;
3c78b3ca 55 struct ovsdb_idl_txn *ovnsb_txn;
f93818dd
RB
56};
57
ac0630a2 58static const char *ovnnb_db;
ec78987f 59static const char *ovnsb_db;
ac0630a2 60
8639f9be
ND
61#define MAC_ADDR_PREFIX 0x0A0000000000ULL
62#define MAC_ADDR_SPACE 0xffffff
63
64/* MAC address management (macam) table of "struct eth_addr"s, that holds the
65 * MAC addresses allocated by the OVN ipam module. */
66static struct hmap macam = HMAP_INITIALIZER(&macam);
b511690b
GS
67
68#define MAX_OVN_TAGS 4096
880fcd14
BP
69\f
70/* Pipeline stages. */
ac0630a2 71
880fcd14
BP
72/* The two pipelines in an OVN logical flow table. */
73enum ovn_pipeline {
74 P_IN, /* Ingress pipeline. */
75 P_OUT /* Egress pipeline. */
76};
091e3af9 77
880fcd14
BP
78/* The two purposes for which ovn-northd uses OVN logical datapaths. */
79enum ovn_datapath_type {
80 DP_SWITCH, /* OVN logical switch. */
81 DP_ROUTER /* OVN logical router. */
091e3af9
JP
82};
83
880fcd14
BP
84/* Returns an "enum ovn_stage" built from the arguments.
85 *
86 * (It's better to use ovn_stage_build() for type-safety reasons, but inline
87 * functions can't be used in enums or switch cases.) */
88#define OVN_STAGE_BUILD(DP_TYPE, PIPELINE, TABLE) \
89 (((DP_TYPE) << 9) | ((PIPELINE) << 8) | (TABLE))
90
91/* A stage within an OVN logical switch or router.
091e3af9 92 *
880fcd14
BP
93 * An "enum ovn_stage" indicates whether the stage is part of a logical switch
94 * or router, whether the stage is part of the ingress or egress pipeline, and
95 * the table within that pipeline. The first three components are combined to
685f4dfe 96 * form the stage's full name, e.g. S_SWITCH_IN_PORT_SEC_L2,
880fcd14
BP
97 * S_ROUTER_OUT_DELIVERY. */
98enum ovn_stage {
1a03fc7d
BS
99#define PIPELINE_STAGES \
100 /* Logical switch ingress stages. */ \
101 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_L2, 0, "ls_in_port_sec_l2") \
102 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_IP, 1, "ls_in_port_sec_ip") \
103 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_ND, 2, "ls_in_port_sec_nd") \
104 PIPELINE_STAGE(SWITCH, IN, PRE_ACL, 3, "ls_in_pre_acl") \
105 PIPELINE_STAGE(SWITCH, IN, PRE_LB, 4, "ls_in_pre_lb") \
106 PIPELINE_STAGE(SWITCH, IN, PRE_STATEFUL, 5, "ls_in_pre_stateful") \
107 PIPELINE_STAGE(SWITCH, IN, ACL, 6, "ls_in_acl") \
108 PIPELINE_STAGE(SWITCH, IN, QOS_MARK, 7, "ls_in_qos_mark") \
109 PIPELINE_STAGE(SWITCH, IN, LB, 8, "ls_in_lb") \
110 PIPELINE_STAGE(SWITCH, IN, STATEFUL, 9, "ls_in_stateful") \
111 PIPELINE_STAGE(SWITCH, IN, ARP_ND_RSP, 10, "ls_in_arp_rsp") \
112 PIPELINE_STAGE(SWITCH, IN, DHCP_OPTIONS, 11, "ls_in_dhcp_options") \
113 PIPELINE_STAGE(SWITCH, IN, DHCP_RESPONSE, 12, "ls_in_dhcp_response") \
114 PIPELINE_STAGE(SWITCH, IN, L2_LKUP, 13, "ls_in_l2_lkup") \
e0c9e58b
JP
115 \
116 /* Logical switch egress stages. */ \
7a15be69
GS
117 PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 0, "ls_out_pre_lb") \
118 PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 1, "ls_out_pre_acl") \
119 PIPELINE_STAGE(SWITCH, OUT, PRE_STATEFUL, 2, "ls_out_pre_stateful") \
120 PIPELINE_STAGE(SWITCH, OUT, LB, 3, "ls_out_lb") \
121 PIPELINE_STAGE(SWITCH, OUT, ACL, 4, "ls_out_acl") \
1a03fc7d
BS
122 PIPELINE_STAGE(SWITCH, OUT, QOS_MARK, 5, "ls_out_qos_mark") \
123 PIPELINE_STAGE(SWITCH, OUT, STATEFUL, 6, "ls_out_stateful") \
124 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_IP, 7, "ls_out_port_sec_ip") \
125 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_L2, 8, "ls_out_port_sec_l2") \
e0c9e58b
JP
126 \
127 /* Logical router ingress stages. */ \
128 PIPELINE_STAGE(ROUTER, IN, ADMISSION, 0, "lr_in_admission") \
129 PIPELINE_STAGE(ROUTER, IN, IP_INPUT, 1, "lr_in_ip_input") \
cc4583aa
GS
130 PIPELINE_STAGE(ROUTER, IN, DEFRAG, 2, "lr_in_defrag") \
131 PIPELINE_STAGE(ROUTER, IN, UNSNAT, 3, "lr_in_unsnat") \
132 PIPELINE_STAGE(ROUTER, IN, DNAT, 4, "lr_in_dnat") \
133 PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 5, "lr_in_ip_routing") \
134 PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 6, "lr_in_arp_resolve") \
135 PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 7, "lr_in_arp_request") \
e0c9e58b
JP
136 \
137 /* Logical router egress stages. */ \
de297547
GS
138 PIPELINE_STAGE(ROUTER, OUT, SNAT, 0, "lr_out_snat") \
139 PIPELINE_STAGE(ROUTER, OUT, DELIVERY, 1, "lr_out_delivery")
880fcd14
BP
140
141#define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
142 S_##DP_TYPE##_##PIPELINE##_##STAGE \
143 = OVN_STAGE_BUILD(DP_##DP_TYPE, P_##PIPELINE, TABLE),
144 PIPELINE_STAGES
145#undef PIPELINE_STAGE
091e3af9
JP
146};
147
6bb4a18e
JP
148/* Due to various hard-coded priorities need to implement ACLs, the
149 * northbound database supports a smaller range of ACL priorities than
150 * are available to logical flows. This value is added to an ACL
151 * priority to determine the ACL's logical flow priority. */
152#define OVN_ACL_PRI_OFFSET 1000
153
facf8652 154#define REGBIT_CONNTRACK_DEFRAG "reg0[0]"
fa313a8c 155#define REGBIT_CONNTRACK_COMMIT "reg0[1]"
7a15be69 156#define REGBIT_CONNTRACK_NAT "reg0[2]"
281977f7 157#define REGBIT_DHCP_OPTS_RESULT "reg0[3]"
facf8652 158
880fcd14
BP
159/* Returns an "enum ovn_stage" built from the arguments. */
160static enum ovn_stage
161ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline,
162 uint8_t table)
163{
164 return OVN_STAGE_BUILD(dp_type, pipeline, table);
165}
166
167/* Returns the pipeline to which 'stage' belongs. */
168static enum ovn_pipeline
169ovn_stage_get_pipeline(enum ovn_stage stage)
170{
171 return (stage >> 8) & 1;
172}
173
174/* Returns the table to which 'stage' belongs. */
175static uint8_t
176ovn_stage_get_table(enum ovn_stage stage)
177{
178 return stage & 0xff;
179}
180
181/* Returns a string name for 'stage'. */
182static const char *
183ovn_stage_to_str(enum ovn_stage stage)
184{
185 switch (stage) {
186#define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
187 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return NAME;
188 PIPELINE_STAGES
189#undef PIPELINE_STAGE
190 default: return "<unknown>";
191 }
192}
9a9961d2
BP
193
194/* Returns the type of the datapath to which a flow with the given 'stage' may
195 * be added. */
196static enum ovn_datapath_type
197ovn_stage_to_datapath_type(enum ovn_stage stage)
198{
199 switch (stage) {
200#define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
201 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return DP_##DP_TYPE;
202 PIPELINE_STAGES
203#undef PIPELINE_STAGE
204 default: OVS_NOT_REACHED();
205 }
206}
880fcd14 207\f
ac0630a2
RB
208static void
209usage(void)
210{
211 printf("\
212%s: OVN northbound management daemon\n\
213usage: %s [OPTIONS]\n\
214\n\
215Options:\n\
216 --ovnnb-db=DATABASE connect to ovn-nb database at DATABASE\n\
217 (default: %s)\n\
ec78987f 218 --ovnsb-db=DATABASE connect to ovn-sb database at DATABASE\n\
ac0630a2
RB
219 (default: %s)\n\
220 -h, --help display this help message\n\
221 -o, --options list available options\n\
222 -V, --version display version information\n\
60bdd011 223", program_name, program_name, default_nb_db(), default_sb_db());
67d9b930 224 daemon_usage();
ac0630a2
RB
225 vlog_usage();
226 stream_usage("database", true, true, false);
227}
228\f
5868eb24
BP
229struct tnlid_node {
230 struct hmap_node hmap_node;
231 uint32_t tnlid;
232};
233
234static void
235destroy_tnlids(struct hmap *tnlids)
4edcdcf4 236{
4ec3d7c7
DDP
237 struct tnlid_node *node;
238 HMAP_FOR_EACH_POP (node, hmap_node, tnlids) {
5868eb24
BP
239 free(node);
240 }
241 hmap_destroy(tnlids);
242}
243
244static void
245add_tnlid(struct hmap *set, uint32_t tnlid)
246{
247 struct tnlid_node *node = xmalloc(sizeof *node);
248 hmap_insert(set, &node->hmap_node, hash_int(tnlid, 0));
249 node->tnlid = tnlid;
4edcdcf4
RB
250}
251
4edcdcf4 252static bool
5868eb24 253tnlid_in_use(const struct hmap *set, uint32_t tnlid)
4edcdcf4 254{
5868eb24
BP
255 const struct tnlid_node *node;
256 HMAP_FOR_EACH_IN_BUCKET (node, hmap_node, hash_int(tnlid, 0), set) {
257 if (node->tnlid == tnlid) {
258 return true;
259 }
260 }
261 return false;
262}
4edcdcf4 263
5868eb24
BP
264static uint32_t
265allocate_tnlid(struct hmap *set, const char *name, uint32_t max,
266 uint32_t *hint)
267{
268 for (uint32_t tnlid = *hint + 1; tnlid != *hint;
269 tnlid = tnlid + 1 <= max ? tnlid + 1 : 1) {
270 if (!tnlid_in_use(set, tnlid)) {
271 add_tnlid(set, tnlid);
272 *hint = tnlid;
273 return tnlid;
274 }
4edcdcf4
RB
275 }
276
5868eb24
BP
277 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
278 VLOG_WARN_RL(&rl, "all %s tunnel ids exhausted", name);
279 return 0;
280}
281\f
a6095f81
BS
282struct ovn_chassis_qdisc_queues {
283 struct hmap_node key_node;
284 uint32_t queue_id;
285 struct uuid chassis_uuid;
286};
287
288static void
289destroy_chassis_queues(struct hmap *set)
290{
291 struct ovn_chassis_qdisc_queues *node;
292 HMAP_FOR_EACH_POP (node, key_node, set) {
293 free(node);
294 }
295 hmap_destroy(set);
296}
297
298static void
299add_chassis_queue(struct hmap *set, struct uuid *chassis_uuid,
300 uint32_t queue_id)
301{
302 struct ovn_chassis_qdisc_queues *node = xmalloc(sizeof *node);
303 node->queue_id = queue_id;
304 memcpy(&node->chassis_uuid, chassis_uuid, sizeof node->chassis_uuid);
305 hmap_insert(set, &node->key_node, uuid_hash(chassis_uuid));
306}
307
308static bool
309chassis_queueid_in_use(const struct hmap *set, struct uuid *chassis_uuid,
310 uint32_t queue_id)
311{
312 const struct ovn_chassis_qdisc_queues *node;
313 HMAP_FOR_EACH_WITH_HASH (node, key_node, uuid_hash(chassis_uuid), set) {
314 if (uuid_equals(chassis_uuid, &node->chassis_uuid)
315 && node->queue_id == queue_id) {
316 return true;
317 }
318 }
319 return false;
320}
321
322static uint32_t
323allocate_chassis_queueid(struct hmap *set, struct sbrec_chassis *chassis)
324{
325 for (uint32_t queue_id = QDISC_MIN_QUEUE_ID + 1;
326 queue_id <= QDISC_MAX_QUEUE_ID;
327 queue_id++) {
328 if (!chassis_queueid_in_use(set, &chassis->header_.uuid, queue_id)) {
329 add_chassis_queue(set, &chassis->header_.uuid, queue_id);
330 return queue_id;
331 }
332 }
333
334 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
335 VLOG_WARN_RL(&rl, "all %s queue ids exhausted", chassis->name);
336 return 0;
337}
338
339static void
340free_chassis_queueid(struct hmap *set, struct sbrec_chassis *chassis,
341 uint32_t queue_id)
342{
343 struct ovn_chassis_qdisc_queues *node;
344 HMAP_FOR_EACH_WITH_HASH (node, key_node,
345 uuid_hash(&chassis->header_.uuid),
346 set) {
347 if (uuid_equals(&chassis->header_.uuid, &node->chassis_uuid)
348 && node->queue_id == queue_id) {
349 hmap_remove(set, &node->key_node);
350 break;
351 }
352 }
353}
354
355static inline bool
356port_has_qos_params(const struct smap *opts)
357{
358 return (smap_get(opts, "qos_max_rate") ||
359 smap_get(opts, "qos_burst"));
360}
361\f
9975d7be
BP
362/* The 'key' comes from nbs->header_.uuid or nbr->header_.uuid or
363 * sb->external_ids:logical-switch. */
5868eb24
BP
364struct ovn_datapath {
365 struct hmap_node key_node; /* Index on 'key'. */
9975d7be 366 struct uuid key; /* (nbs/nbr)->header_.uuid. */
4edcdcf4 367
9975d7be
BP
368 const struct nbrec_logical_switch *nbs; /* May be NULL. */
369 const struct nbrec_logical_router *nbr; /* May be NULL. */
5868eb24 370 const struct sbrec_datapath_binding *sb; /* May be NULL. */
4edcdcf4 371
5868eb24 372 struct ovs_list list; /* In list of similar records. */
4edcdcf4 373
9975d7be 374 /* Logical switch data. */
86e98048
BP
375 struct ovn_port **router_ports;
376 size_t n_router_ports;
9975d7be 377
5868eb24
BP
378 struct hmap port_tnlids;
379 uint32_t port_key_hint;
380
381 bool has_unknown;
8639f9be
ND
382
383 /* IPAM data. */
384 struct hmap ipam;
385};
386
387struct macam_node {
388 struct hmap_node hmap_node;
389 struct eth_addr mac_addr; /* Allocated MAC address. */
5868eb24
BP
390};
391
8639f9be
ND
392static void
393cleanup_macam(struct hmap *macam)
394{
395 struct macam_node *node;
396 HMAP_FOR_EACH_POP (node, hmap_node, macam) {
397 free(node);
398 }
399}
400
401struct ipam_node {
402 struct hmap_node hmap_node;
403 uint32_t ip_addr; /* Allocated IP address. */
404};
405
406static void
407destroy_ipam(struct hmap *ipam)
408{
409 struct ipam_node *node;
410 HMAP_FOR_EACH_POP (node, hmap_node, ipam) {
411 free(node);
412 }
413 hmap_destroy(ipam);
414}
415
5868eb24
BP
416static struct ovn_datapath *
417ovn_datapath_create(struct hmap *datapaths, const struct uuid *key,
9975d7be
BP
418 const struct nbrec_logical_switch *nbs,
419 const struct nbrec_logical_router *nbr,
5868eb24
BP
420 const struct sbrec_datapath_binding *sb)
421{
422 struct ovn_datapath *od = xzalloc(sizeof *od);
423 od->key = *key;
424 od->sb = sb;
9975d7be
BP
425 od->nbs = nbs;
426 od->nbr = nbr;
5868eb24 427 hmap_init(&od->port_tnlids);
8639f9be 428 hmap_init(&od->ipam);
5868eb24
BP
429 od->port_key_hint = 0;
430 hmap_insert(datapaths, &od->key_node, uuid_hash(&od->key));
431 return od;
432}
433
434static void
435ovn_datapath_destroy(struct hmap *datapaths, struct ovn_datapath *od)
436{
437 if (od) {
438 /* Don't remove od->list. It is used within build_datapaths() as a
439 * private list and once we've exited that function it is not safe to
440 * use it. */
441 hmap_remove(datapaths, &od->key_node);
442 destroy_tnlids(&od->port_tnlids);
8639f9be 443 destroy_ipam(&od->ipam);
86e98048 444 free(od->router_ports);
5868eb24
BP
445 free(od);
446 }
447}
448
9a9961d2
BP
449/* Returns 'od''s datapath type. */
450static enum ovn_datapath_type
451ovn_datapath_get_type(const struct ovn_datapath *od)
452{
453 return od->nbs ? DP_SWITCH : DP_ROUTER;
454}
455
5868eb24
BP
456static struct ovn_datapath *
457ovn_datapath_find(struct hmap *datapaths, const struct uuid *uuid)
458{
459 struct ovn_datapath *od;
460
461 HMAP_FOR_EACH_WITH_HASH (od, key_node, uuid_hash(uuid), datapaths) {
462 if (uuid_equals(uuid, &od->key)) {
463 return od;
464 }
465 }
466 return NULL;
467}
468
469static struct ovn_datapath *
470ovn_datapath_from_sbrec(struct hmap *datapaths,
471 const struct sbrec_datapath_binding *sb)
472{
473 struct uuid key;
474
9975d7be
BP
475 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
476 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
5868eb24
BP
477 return NULL;
478 }
479 return ovn_datapath_find(datapaths, &key);
480}
481
5412db30
J
482static bool
483lrouter_is_enabled(const struct nbrec_logical_router *lrouter)
484{
485 return !lrouter->enabled || *lrouter->enabled;
486}
487
5868eb24
BP
488static void
489join_datapaths(struct northd_context *ctx, struct hmap *datapaths,
490 struct ovs_list *sb_only, struct ovs_list *nb_only,
491 struct ovs_list *both)
492{
493 hmap_init(datapaths);
417e7e66
BW
494 ovs_list_init(sb_only);
495 ovs_list_init(nb_only);
496 ovs_list_init(both);
5868eb24
BP
497
498 const struct sbrec_datapath_binding *sb, *sb_next;
499 SBREC_DATAPATH_BINDING_FOR_EACH_SAFE (sb, sb_next, ctx->ovnsb_idl) {
500 struct uuid key;
9975d7be
BP
501 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
502 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
503 ovsdb_idl_txn_add_comment(
504 ctx->ovnsb_txn,
505 "deleting Datapath_Binding "UUID_FMT" that lacks "
506 "external-ids:logical-switch and "
507 "external-ids:logical-router",
508 UUID_ARGS(&sb->header_.uuid));
5868eb24
BP
509 sbrec_datapath_binding_delete(sb);
510 continue;
511 }
512
513 if (ovn_datapath_find(datapaths, &key)) {
514 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
9975d7be
BP
515 VLOG_INFO_RL(
516 &rl, "deleting Datapath_Binding "UUID_FMT" with "
517 "duplicate external-ids:logical-switch/router "UUID_FMT,
518 UUID_ARGS(&sb->header_.uuid), UUID_ARGS(&key));
5868eb24
BP
519 sbrec_datapath_binding_delete(sb);
520 continue;
521 }
522
523 struct ovn_datapath *od = ovn_datapath_create(datapaths, &key,
9975d7be 524 NULL, NULL, sb);
417e7e66 525 ovs_list_push_back(sb_only, &od->list);
5868eb24
BP
526 }
527
9975d7be
BP
528 const struct nbrec_logical_switch *nbs;
529 NBREC_LOGICAL_SWITCH_FOR_EACH (nbs, ctx->ovnnb_idl) {
5868eb24 530 struct ovn_datapath *od = ovn_datapath_find(datapaths,
9975d7be 531 &nbs->header_.uuid);
5868eb24 532 if (od) {
9975d7be 533 od->nbs = nbs;
417e7e66
BW
534 ovs_list_remove(&od->list);
535 ovs_list_push_back(both, &od->list);
5868eb24 536 } else {
9975d7be
BP
537 od = ovn_datapath_create(datapaths, &nbs->header_.uuid,
538 nbs, NULL, NULL);
417e7e66 539 ovs_list_push_back(nb_only, &od->list);
5868eb24
BP
540 }
541 }
9975d7be
BP
542
543 const struct nbrec_logical_router *nbr;
544 NBREC_LOGICAL_ROUTER_FOR_EACH (nbr, ctx->ovnnb_idl) {
5412db30
J
545 if (!lrouter_is_enabled(nbr)) {
546 continue;
547 }
548
9975d7be
BP
549 struct ovn_datapath *od = ovn_datapath_find(datapaths,
550 &nbr->header_.uuid);
551 if (od) {
552 if (!od->nbs) {
553 od->nbr = nbr;
417e7e66
BW
554 ovs_list_remove(&od->list);
555 ovs_list_push_back(both, &od->list);
9975d7be
BP
556 } else {
557 /* Can't happen! */
558 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
559 VLOG_WARN_RL(&rl,
560 "duplicate UUID "UUID_FMT" in OVN_Northbound",
561 UUID_ARGS(&nbr->header_.uuid));
562 continue;
563 }
564 } else {
565 od = ovn_datapath_create(datapaths, &nbr->header_.uuid,
566 NULL, nbr, NULL);
417e7e66 567 ovs_list_push_back(nb_only, &od->list);
9975d7be 568 }
9975d7be 569 }
5868eb24
BP
570}
571
572static uint32_t
573ovn_datapath_allocate_key(struct hmap *dp_tnlids)
574{
575 static uint32_t hint;
576 return allocate_tnlid(dp_tnlids, "datapath", (1u << 24) - 1, &hint);
577}
578
0bac7164
BP
579/* Updates the southbound Datapath_Binding table so that it contains the
580 * logical switches and routers specified by the northbound database.
581 *
582 * Initializes 'datapaths' to contain a "struct ovn_datapath" for every logical
583 * switch and router. */
5868eb24
BP
584static void
585build_datapaths(struct northd_context *ctx, struct hmap *datapaths)
586{
587 struct ovs_list sb_only, nb_only, both;
588
589 join_datapaths(ctx, datapaths, &sb_only, &nb_only, &both);
590
417e7e66 591 if (!ovs_list_is_empty(&nb_only)) {
5868eb24
BP
592 /* First index the in-use datapath tunnel IDs. */
593 struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
594 struct ovn_datapath *od;
595 LIST_FOR_EACH (od, list, &both) {
596 add_tnlid(&dp_tnlids, od->sb->tunnel_key);
597 }
598
599 /* Add southbound record for each unmatched northbound record. */
600 LIST_FOR_EACH (od, list, &nb_only) {
601 uint16_t tunnel_key = ovn_datapath_allocate_key(&dp_tnlids);
602 if (!tunnel_key) {
603 break;
604 }
605
606 od->sb = sbrec_datapath_binding_insert(ctx->ovnsb_txn);
607
0f8e9c12
BP
608 /* Get the logical-switch or logical-router UUID to set in
609 * external-ids. */
5868eb24 610 char uuid_s[UUID_LEN + 1];
9975d7be
BP
611 sprintf(uuid_s, UUID_FMT, UUID_ARGS(&od->key));
612 const char *key = od->nbs ? "logical-switch" : "logical-router";
0f8e9c12
BP
613
614 /* Get name to set in external-ids. */
615 const char *name = od->nbs ? od->nbs->name : od->nbr->name;
616
617 /* Set external-ids. */
618 struct smap ids = SMAP_INITIALIZER(&ids);
619 smap_add(&ids, key, uuid_s);
620 if (*name) {
621 smap_add(&ids, "name", name);
622 }
623 sbrec_datapath_binding_set_external_ids(od->sb, &ids);
624 smap_destroy(&ids);
5868eb24
BP
625
626 sbrec_datapath_binding_set_tunnel_key(od->sb, tunnel_key);
627 }
628 destroy_tnlids(&dp_tnlids);
629 }
630
631 /* Delete southbound records without northbound matches. */
632 struct ovn_datapath *od, *next;
633 LIST_FOR_EACH_SAFE (od, next, list, &sb_only) {
417e7e66 634 ovs_list_remove(&od->list);
5868eb24
BP
635 sbrec_datapath_binding_delete(od->sb);
636 ovn_datapath_destroy(datapaths, od);
637 }
638}
639\f
640struct ovn_port {
641 struct hmap_node key_node; /* Index on 'key'. */
9975d7be
BP
642 char *key; /* nbs->name, nbr->name, sb->logical_port. */
643 char *json_key; /* 'key', quoted for use in JSON. */
5868eb24 644
9975d7be
BP
645 const struct sbrec_port_binding *sb; /* May be NULL. */
646
e93b43d6 647 /* Logical switch port data. */
0ee00741 648 const struct nbrec_logical_switch_port *nbsp; /* May be NULL. */
e93b43d6
JP
649
650 struct lport_addresses *lsp_addrs; /* Logical switch port addresses. */
651 unsigned int n_lsp_addrs;
652
653 struct lport_addresses *ps_addrs; /* Port security addresses. */
654 unsigned int n_ps_addrs;
655
9975d7be 656 /* Logical router port data. */
0ee00741 657 const struct nbrec_logical_router_port *nbrp; /* May be NULL. */
e93b43d6 658
4685e523 659 struct lport_addresses lrp_networks;
c9bdf7bd 660
ad386c3f
BP
661 /* The port's peer:
662 *
663 * - A switch port S of type "router" has a router port R as a peer,
664 * and R in turn has S has its peer.
665 *
666 * - Two connected logical router ports have each other as peer. */
9975d7be 667 struct ovn_port *peer;
5868eb24
BP
668
669 struct ovn_datapath *od;
670
671 struct ovs_list list; /* In list of similar records. */
672};
673
674static struct ovn_port *
675ovn_port_create(struct hmap *ports, const char *key,
0ee00741
HK
676 const struct nbrec_logical_switch_port *nbsp,
677 const struct nbrec_logical_router_port *nbrp,
5868eb24
BP
678 const struct sbrec_port_binding *sb)
679{
680 struct ovn_port *op = xzalloc(sizeof *op);
9975d7be
BP
681
682 struct ds json_key = DS_EMPTY_INITIALIZER;
683 json_string_escape(key, &json_key);
684 op->json_key = ds_steal_cstr(&json_key);
685
686 op->key = xstrdup(key);
5868eb24 687 op->sb = sb;
0ee00741
HK
688 op->nbsp = nbsp;
689 op->nbrp = nbrp;
5868eb24
BP
690 hmap_insert(ports, &op->key_node, hash_string(op->key, 0));
691 return op;
692}
693
694static void
695ovn_port_destroy(struct hmap *ports, struct ovn_port *port)
696{
697 if (port) {
698 /* Don't remove port->list. It is used within build_ports() as a
699 * private list and once we've exited that function it is not safe to
700 * use it. */
701 hmap_remove(ports, &port->key_node);
e93b43d6
JP
702
703 for (int i = 0; i < port->n_lsp_addrs; i++) {
704 destroy_lport_addresses(&port->lsp_addrs[i]);
705 }
706 free(port->lsp_addrs);
707
708 for (int i = 0; i < port->n_ps_addrs; i++) {
709 destroy_lport_addresses(&port->ps_addrs[i]);
710 }
711 free(port->ps_addrs);
712
4685e523 713 destroy_lport_addresses(&port->lrp_networks);
9975d7be
BP
714 free(port->json_key);
715 free(port->key);
5868eb24
BP
716 free(port);
717 }
718}
719
720static struct ovn_port *
721ovn_port_find(struct hmap *ports, const char *name)
722{
723 struct ovn_port *op;
724
725 HMAP_FOR_EACH_WITH_HASH (op, key_node, hash_string(name, 0), ports) {
726 if (!strcmp(op->key, name)) {
727 return op;
728 }
729 }
730 return NULL;
731}
732
733static uint32_t
734ovn_port_allocate_key(struct ovn_datapath *od)
735{
736 return allocate_tnlid(&od->port_tnlids, "port",
737 (1u << 15) - 1, &od->port_key_hint);
738}
739
8639f9be
ND
740static bool
741ipam_is_duplicate_mac(struct eth_addr *ea, uint64_t mac64, bool warn)
742{
743 struct macam_node *macam_node;
744 HMAP_FOR_EACH_WITH_HASH (macam_node, hmap_node, hash_uint64(mac64),
745 &macam) {
746 if (eth_addr_equals(*ea, macam_node->mac_addr)) {
747 if (warn) {
748 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
749 VLOG_WARN_RL(&rl, "Duplicate MAC set: "ETH_ADDR_FMT,
750 ETH_ADDR_ARGS(macam_node->mac_addr));
751 }
752 return true;
753 }
754 }
755 return false;
756}
757
758static bool
759ipam_is_duplicate_ip(struct ovn_datapath *od, uint32_t ip, bool warn)
760{
761 struct ipam_node *ipam_node;
762 HMAP_FOR_EACH_WITH_HASH (ipam_node, hmap_node, hash_int(ip, 0),
763 &od->ipam) {
764 if (ipam_node->ip_addr == ip) {
765 if (warn) {
766 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
767 VLOG_WARN_RL(&rl, "Duplicate IP set: "IP_FMT,
768 IP_ARGS(htonl(ip)));
769 }
770 return true;
771 }
772 }
773 return false;
774}
775
776static void
777ipam_insert_mac(struct eth_addr *ea, bool check)
778{
779 if (!ea) {
780 return;
781 }
782
783 uint64_t mac64 = eth_addr_to_uint64(*ea);
784 /* If the new MAC was not assigned by this address management system or
785 * check is true and the new MAC is a duplicate, do not insert it into the
786 * macam hmap. */
787 if (((mac64 ^ MAC_ADDR_PREFIX) >> 24)
788 || (check && ipam_is_duplicate_mac(ea, mac64, true))) {
789 return;
790 }
791
792 struct macam_node *new_macam_node = xmalloc(sizeof *new_macam_node);
793 new_macam_node->mac_addr = *ea;
794 hmap_insert(&macam, &new_macam_node->hmap_node, hash_uint64(mac64));
795}
796
797static void
798ipam_insert_ip(struct ovn_datapath *od, uint32_t ip, bool check)
799{
800 if (!od) {
801 return;
802 }
803
804 if (check && ipam_is_duplicate_ip(od, ip, true)) {
805 return;
806 }
807
808 struct ipam_node *new_ipam_node = xmalloc(sizeof *new_ipam_node);
809 new_ipam_node->ip_addr = ip;
810 hmap_insert(&od->ipam, &new_ipam_node->hmap_node, hash_int(ip, 0));
811}
812
813static void
814ipam_insert_lsp_addresses(struct ovn_datapath *od, struct ovn_port *op,
815 char *address)
816{
817 if (!od || !op || !address || !strcmp(address, "unknown")
6374d518 818 || is_dynamic_lsp_address(address)) {
8639f9be
ND
819 return;
820 }
821
822 struct lport_addresses laddrs;
823 if (!extract_lsp_addresses(address, &laddrs)) {
824 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
825 VLOG_WARN_RL(&rl, "Extract addresses failed.");
826 return;
827 }
828 ipam_insert_mac(&laddrs.ea, true);
829
830 /* IP is only added to IPAM if the switch's subnet option
831 * is set, whereas MAC is always added to MACAM. */
832 if (!smap_get(&od->nbs->other_config, "subnet")) {
833 destroy_lport_addresses(&laddrs);
834 return;
835 }
836
837 for (size_t j = 0; j < laddrs.n_ipv4_addrs; j++) {
838 uint32_t ip = ntohl(laddrs.ipv4_addrs[j].addr);
839 ipam_insert_ip(od, ip, true);
840 }
841
842 destroy_lport_addresses(&laddrs);
843}
844
845static void
846ipam_add_port_addresses(struct ovn_datapath *od, struct ovn_port *op)
847{
848 if (!od || !op) {
849 return;
850 }
851
852 if (op->nbsp) {
853 /* Add all the port's addresses to address data structures. */
854 for (size_t i = 0; i < op->nbsp->n_addresses; i++) {
855 ipam_insert_lsp_addresses(od, op, op->nbsp->addresses[i]);
856 }
857 if (op->nbsp->dynamic_addresses) {
858 ipam_insert_lsp_addresses(od, op, op->nbsp->dynamic_addresses);
859 }
860 } else if (op->nbrp) {
861 struct lport_addresses lrp_networks;
862 if (!extract_lrp_networks(op->nbrp, &lrp_networks)) {
863 static struct vlog_rate_limit rl
864 = VLOG_RATE_LIMIT_INIT(1, 1);
865 VLOG_WARN_RL(&rl, "Extract addresses failed.");
866 return;
867 }
868 ipam_insert_mac(&lrp_networks.ea, true);
869
870 if (!op->peer || !op->peer->nbsp || !op->peer->od || !op->peer->od->nbs
871 || !smap_get(&op->peer->od->nbs->other_config, "subnet")) {
872 destroy_lport_addresses(&lrp_networks);
873 return;
874 }
875
876 for (size_t i = 0; i < lrp_networks.n_ipv4_addrs; i++) {
877 uint32_t ip = ntohl(lrp_networks.ipv4_addrs[i].addr);
878 ipam_insert_ip(op->peer->od, ip, true);
879 }
880
881 destroy_lport_addresses(&lrp_networks);
882 }
883}
884
885static uint64_t
886ipam_get_unused_mac(void)
887{
888 /* Stores the suffix of the most recently ipam-allocated MAC address. */
889 static uint32_t last_mac;
890
891 uint64_t mac64;
892 struct eth_addr mac;
893 uint32_t mac_addr_suffix, i;
894 for (i = 0; i < MAC_ADDR_SPACE - 1; i++) {
895 /* The tentative MAC's suffix will be in the interval (1, 0xfffffe). */
896 mac_addr_suffix = ((last_mac + i) % (MAC_ADDR_SPACE - 1)) + 1;
897 mac64 = MAC_ADDR_PREFIX | mac_addr_suffix;
898 eth_addr_from_uint64(mac64, &mac);
899 if (!ipam_is_duplicate_mac(&mac, mac64, false)) {
900 last_mac = mac_addr_suffix;
901 break;
902 }
903 }
904
905 if (i == MAC_ADDR_SPACE) {
906 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
907 VLOG_WARN_RL(&rl, "MAC address space exhausted.");
908 mac64 = 0;
909 }
910
911 return mac64;
912}
913
914static uint32_t
915ipam_get_unused_ip(struct ovn_datapath *od, uint32_t subnet, uint32_t mask)
916{
917 if (!od) {
918 return 0;
919 }
920
921 uint32_t ip = 0;
922
923 /* Find an unused IP address in subnet. x.x.x.1 is reserved for a
924 * logical router port. */
925 for (uint32_t i = 2; i < ~mask; i++) {
926 uint32_t tentative_ip = subnet + i;
927 if (!ipam_is_duplicate_ip(od, tentative_ip, false)) {
928 ip = tentative_ip;
929 break;
930 }
931 }
932
933 if (!ip) {
934 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
935 VLOG_WARN_RL( &rl, "Subnet address space has been exhausted.");
936 }
937
938 return ip;
939}
940
941static bool
942ipam_allocate_addresses(struct ovn_datapath *od, struct ovn_port *op,
6374d518 943 const char *addrspec, ovs_be32 subnet, ovs_be32 mask)
8639f9be
ND
944{
945 if (!od || !op || !op->nbsp) {
946 return false;
947 }
948
949 uint32_t ip = ipam_get_unused_ip(od, ntohl(subnet), ntohl(mask));
950 if (!ip) {
951 return false;
952 }
953
954 struct eth_addr mac;
6374d518
LR
955 bool check_mac;
956 int n = 0;
957
958 if (ovs_scan(addrspec, ETH_ADDR_SCAN_FMT" dynamic%n",
959 ETH_ADDR_SCAN_ARGS(mac), &n)
960 && addrspec[n] == '\0') {
961 check_mac = true;
962 } else {
963 uint64_t mac64 = ipam_get_unused_mac();
964 if (!mac64) {
965 return false;
966 }
967 eth_addr_from_uint64(mac64, &mac);
968 check_mac = false;
8639f9be 969 }
8639f9be
ND
970
971 /* Add MAC/IP to MACAM/IPAM hmaps if both addresses were allocated
972 * successfully. */
973 ipam_insert_ip(od, ip, false);
6374d518 974 ipam_insert_mac(&mac, check_mac);
8639f9be
ND
975
976 char *new_addr = xasprintf(ETH_ADDR_FMT" "IP_FMT,
977 ETH_ADDR_ARGS(mac), IP_ARGS(htonl(ip)));
978 nbrec_logical_switch_port_set_dynamic_addresses(op->nbsp, new_addr);
979 free(new_addr);
980
981 return true;
982}
983
984static void
b511690b 985build_ipam(struct hmap *datapaths, struct hmap *ports)
8639f9be
ND
986{
987 /* IPAM generally stands for IP address management. In non-virtualized
988 * world, MAC addresses come with the hardware. But, with virtualized
989 * workloads, they need to be assigned and managed. This function
990 * does both IP address management (ipam) and MAC address management
991 * (macam). */
992
8639f9be
ND
993 /* If the switch's other_config:subnet is set, allocate new addresses for
994 * ports that have the "dynamic" keyword in their addresses column. */
995 struct ovn_datapath *od;
996 HMAP_FOR_EACH (od, key_node, datapaths) {
997 if (od->nbs) {
998 const char *subnet_str = smap_get(&od->nbs->other_config,
999 "subnet");
1000 if (!subnet_str) {
1001 continue;
1002 }
1003
1004 ovs_be32 subnet, mask;
1005 char *error = ip_parse_masked(subnet_str, &subnet, &mask);
1006 if (error || mask == OVS_BE32_MAX || !ip_is_cidr(mask)) {
1007 static struct vlog_rate_limit rl
1008 = VLOG_RATE_LIMIT_INIT(5, 1);
1009 VLOG_WARN_RL(&rl, "bad 'subnet' %s", subnet_str);
1010 free(error);
1011 continue;
1012 }
1013
1014 struct ovn_port *op;
1015 for (size_t i = 0; i < od->nbs->n_ports; i++) {
1016 const struct nbrec_logical_switch_port *nbsp =
1017 od->nbs->ports[i];
1018
1019 if (!nbsp) {
1020 continue;
1021 }
1022
1023 op = ovn_port_find(ports, nbsp->name);
1024 if (!op || (op->nbsp && op->peer)) {
1025 /* Do not allocate addresses for logical switch ports that
1026 * have a peer. */
1027 continue;
1028 }
1029
1030 for (size_t j = 0; j < nbsp->n_addresses; j++) {
6374d518 1031 if (is_dynamic_lsp_address(nbsp->addresses[j])
8639f9be 1032 && !nbsp->dynamic_addresses) {
6374d518
LR
1033 if (!ipam_allocate_addresses(od, op,
1034 nbsp->addresses[j], subnet, mask)
8639f9be
ND
1035 || !extract_lsp_addresses(nbsp->dynamic_addresses,
1036 &op->lsp_addrs[op->n_lsp_addrs])) {
1037 static struct vlog_rate_limit rl
1038 = VLOG_RATE_LIMIT_INIT(1, 1);
1039 VLOG_INFO_RL(&rl, "Failed to allocate address.");
1040 } else {
1041 op->n_lsp_addrs++;
1042 }
1043 break;
1044 }
1045 }
1046 }
1047 }
1048 }
1049}
1050\f
b511690b
GS
1051/* Tag allocation for nested containers.
1052 *
1053 * For a logical switch port with 'parent_name' and a request to allocate tags,
1054 * keeps a track of all allocated tags. */
1055struct tag_alloc_node {
1056 struct hmap_node hmap_node;
1057 char *parent_name;
1058 unsigned long *allocated_tags; /* A bitmap to track allocated tags. */
1059};
1060
1061static void
1062tag_alloc_destroy(struct hmap *tag_alloc_table)
1063{
1064 struct tag_alloc_node *node;
1065 HMAP_FOR_EACH_POP (node, hmap_node, tag_alloc_table) {
1066 bitmap_free(node->allocated_tags);
1067 free(node->parent_name);
1068 free(node);
1069 }
1070 hmap_destroy(tag_alloc_table);
1071}
1072
1073static struct tag_alloc_node *
1074tag_alloc_get_node(struct hmap *tag_alloc_table, const char *parent_name)
1075{
1076 /* If a node for the 'parent_name' exists, return it. */
1077 struct tag_alloc_node *tag_alloc_node;
1078 HMAP_FOR_EACH_WITH_HASH (tag_alloc_node, hmap_node,
1079 hash_string(parent_name, 0),
1080 tag_alloc_table) {
1081 if (!strcmp(tag_alloc_node->parent_name, parent_name)) {
1082 return tag_alloc_node;
1083 }
1084 }
1085
1086 /* Create a new node. */
1087 tag_alloc_node = xmalloc(sizeof *tag_alloc_node);
1088 tag_alloc_node->parent_name = xstrdup(parent_name);
1089 tag_alloc_node->allocated_tags = bitmap_allocate(MAX_OVN_TAGS);
1090 /* Tag 0 is invalid for nested containers. */
1091 bitmap_set1(tag_alloc_node->allocated_tags, 0);
1092 hmap_insert(tag_alloc_table, &tag_alloc_node->hmap_node,
1093 hash_string(parent_name, 0));
1094
1095 return tag_alloc_node;
1096}
1097
1098static void
1099tag_alloc_add_existing_tags(struct hmap *tag_alloc_table,
1100 const struct nbrec_logical_switch_port *nbsp)
1101{
1102 /* Add the tags of already existing nested containers. If there is no
1103 * 'nbsp->parent_name' or no 'nbsp->tag' set, there is nothing to do. */
1104 if (!nbsp->parent_name || !nbsp->parent_name[0] || !nbsp->tag) {
1105 return;
1106 }
1107
1108 struct tag_alloc_node *tag_alloc_node;
1109 tag_alloc_node = tag_alloc_get_node(tag_alloc_table, nbsp->parent_name);
1110 bitmap_set1(tag_alloc_node->allocated_tags, *nbsp->tag);
1111}
1112
1113static void
1114tag_alloc_create_new_tag(struct hmap *tag_alloc_table,
1115 const struct nbrec_logical_switch_port *nbsp)
1116{
1117 if (!nbsp->tag_request) {
1118 return;
1119 }
1120
1121 if (nbsp->parent_name && nbsp->parent_name[0]
1122 && *nbsp->tag_request == 0) {
1123 /* For nested containers that need allocation, do the allocation. */
1124
1125 if (nbsp->tag) {
1126 /* This has already been allocated. */
1127 return;
1128 }
1129
1130 struct tag_alloc_node *tag_alloc_node;
1131 int64_t tag;
1132 tag_alloc_node = tag_alloc_get_node(tag_alloc_table,
1133 nbsp->parent_name);
1134 tag = bitmap_scan(tag_alloc_node->allocated_tags, 0, 1, MAX_OVN_TAGS);
1135 if (tag == MAX_OVN_TAGS) {
1136 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1137 VLOG_ERR_RL(&rl, "out of vlans for logical switch ports with "
1138 "parent %s", nbsp->parent_name);
1139 return;
1140 }
1141 bitmap_set1(tag_alloc_node->allocated_tags, tag);
1142 nbrec_logical_switch_port_set_tag(nbsp, &tag, 1);
1143 } else if (*nbsp->tag_request != 0) {
1144 /* For everything else, copy the contents of 'tag_request' to 'tag'. */
1145 nbrec_logical_switch_port_set_tag(nbsp, nbsp->tag_request, 1);
1146 }
1147}
1148\f
8639f9be 1149
5868eb24
BP
1150static void
1151join_logical_ports(struct northd_context *ctx,
1152 struct hmap *datapaths, struct hmap *ports,
a6095f81 1153 struct hmap *chassis_qdisc_queues,
b511690b
GS
1154 struct hmap *tag_alloc_table, struct ovs_list *sb_only,
1155 struct ovs_list *nb_only, struct ovs_list *both)
5868eb24
BP
1156{
1157 hmap_init(ports);
417e7e66
BW
1158 ovs_list_init(sb_only);
1159 ovs_list_init(nb_only);
1160 ovs_list_init(both);
5868eb24
BP
1161
1162 const struct sbrec_port_binding *sb;
1163 SBREC_PORT_BINDING_FOR_EACH (sb, ctx->ovnsb_idl) {
1164 struct ovn_port *op = ovn_port_create(ports, sb->logical_port,
9975d7be 1165 NULL, NULL, sb);
417e7e66 1166 ovs_list_push_back(sb_only, &op->list);
5868eb24
BP
1167 }
1168
1169 struct ovn_datapath *od;
1170 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
1171 if (od->nbs) {
1172 for (size_t i = 0; i < od->nbs->n_ports; i++) {
0ee00741
HK
1173 const struct nbrec_logical_switch_port *nbsp
1174 = od->nbs->ports[i];
1175 struct ovn_port *op = ovn_port_find(ports, nbsp->name);
9975d7be 1176 if (op) {
0ee00741 1177 if (op->nbsp || op->nbrp) {
9975d7be
BP
1178 static struct vlog_rate_limit rl
1179 = VLOG_RATE_LIMIT_INIT(5, 1);
1180 VLOG_WARN_RL(&rl, "duplicate logical port %s",
0ee00741 1181 nbsp->name);
9975d7be
BP
1182 continue;
1183 }
0ee00741 1184 op->nbsp = nbsp;
417e7e66 1185 ovs_list_remove(&op->list);
a6095f81
BS
1186
1187 uint32_t queue_id = smap_get_int(&op->sb->options,
1188 "qdisc_queue_id", 0);
1189 if (queue_id && op->sb->chassis) {
1190 add_chassis_queue(
1191 chassis_qdisc_queues, &op->sb->chassis->header_.uuid,
1192 queue_id);
1193 }
1194
417e7e66 1195 ovs_list_push_back(both, &op->list);
e93b43d6
JP
1196
1197 /* This port exists due to a SB binding, but should
1198 * not have been initialized fully. */
1199 ovs_assert(!op->n_lsp_addrs && !op->n_ps_addrs);
9975d7be 1200 } else {
0ee00741 1201 op = ovn_port_create(ports, nbsp->name, nbsp, NULL, NULL);
417e7e66 1202 ovs_list_push_back(nb_only, &op->list);
9975d7be
BP
1203 }
1204
e93b43d6 1205 op->lsp_addrs
0ee00741
HK
1206 = xmalloc(sizeof *op->lsp_addrs * nbsp->n_addresses);
1207 for (size_t j = 0; j < nbsp->n_addresses; j++) {
1208 if (!strcmp(nbsp->addresses[j], "unknown")) {
e93b43d6
JP
1209 continue;
1210 }
6374d518 1211 if (is_dynamic_lsp_address(nbsp->addresses[j])) {
8639f9be
ND
1212 if (nbsp->dynamic_addresses) {
1213 if (!extract_lsp_addresses(nbsp->dynamic_addresses,
1214 &op->lsp_addrs[op->n_lsp_addrs])) {
1215 static struct vlog_rate_limit rl
1216 = VLOG_RATE_LIMIT_INIT(1, 1);
1217 VLOG_INFO_RL(&rl, "invalid syntax '%s' in "
1218 "logical switch port "
1219 "dynamic_addresses. No "
1220 "MAC address found",
1221 op->nbsp->dynamic_addresses);
1222 continue;
1223 }
1224 } else {
1225 continue;
1226 }
1227 } else if (!extract_lsp_addresses(nbsp->addresses[j],
e93b43d6
JP
1228 &op->lsp_addrs[op->n_lsp_addrs])) {
1229 static struct vlog_rate_limit rl
1230 = VLOG_RATE_LIMIT_INIT(1, 1);
1231 VLOG_INFO_RL(&rl, "invalid syntax '%s' in logical "
1232 "switch port addresses. No MAC "
1233 "address found",
0ee00741 1234 op->nbsp->addresses[j]);
e93b43d6
JP
1235 continue;
1236 }
1237 op->n_lsp_addrs++;
1238 }
1239
1240 op->ps_addrs
0ee00741
HK
1241 = xmalloc(sizeof *op->ps_addrs * nbsp->n_port_security);
1242 for (size_t j = 0; j < nbsp->n_port_security; j++) {
1243 if (!extract_lsp_addresses(nbsp->port_security[j],
e93b43d6
JP
1244 &op->ps_addrs[op->n_ps_addrs])) {
1245 static struct vlog_rate_limit rl
1246 = VLOG_RATE_LIMIT_INIT(1, 1);
1247 VLOG_INFO_RL(&rl, "invalid syntax '%s' in port "
1248 "security. No MAC address found",
0ee00741 1249 op->nbsp->port_security[j]);
e93b43d6
JP
1250 continue;
1251 }
1252 op->n_ps_addrs++;
1253 }
1254
9975d7be 1255 op->od = od;
8639f9be 1256 ipam_add_port_addresses(od, op);
b511690b 1257 tag_alloc_add_existing_tags(tag_alloc_table, nbsp);
9975d7be
BP
1258 }
1259 } else {
1260 for (size_t i = 0; i < od->nbr->n_ports; i++) {
0ee00741
HK
1261 const struct nbrec_logical_router_port *nbrp
1262 = od->nbr->ports[i];
9975d7be 1263
4685e523 1264 struct lport_addresses lrp_networks;
0ee00741 1265 if (!extract_lrp_networks(nbrp, &lrp_networks)) {
9975d7be
BP
1266 static struct vlog_rate_limit rl
1267 = VLOG_RATE_LIMIT_INIT(5, 1);
0ee00741 1268 VLOG_WARN_RL(&rl, "bad 'mac' %s", nbrp->mac);
9975d7be
BP
1269 continue;
1270 }
1271
4685e523 1272 if (!lrp_networks.n_ipv4_addrs && !lrp_networks.n_ipv6_addrs) {
9975d7be
BP
1273 continue;
1274 }
1275
0ee00741 1276 struct ovn_port *op = ovn_port_find(ports, nbrp->name);
9975d7be 1277 if (op) {
0ee00741 1278 if (op->nbsp || op->nbrp) {
9975d7be
BP
1279 static struct vlog_rate_limit rl
1280 = VLOG_RATE_LIMIT_INIT(5, 1);
1281 VLOG_WARN_RL(&rl, "duplicate logical router port %s",
0ee00741 1282 nbrp->name);
9975d7be
BP
1283 continue;
1284 }
0ee00741 1285 op->nbrp = nbrp;
417e7e66
BW
1286 ovs_list_remove(&op->list);
1287 ovs_list_push_back(both, &op->list);
4685e523
JP
1288
1289 /* This port exists but should not have been
1290 * initialized fully. */
1291 ovs_assert(!op->lrp_networks.n_ipv4_addrs
1292 && !op->lrp_networks.n_ipv6_addrs);
9975d7be 1293 } else {
0ee00741 1294 op = ovn_port_create(ports, nbrp->name, NULL, nbrp, NULL);
417e7e66 1295 ovs_list_push_back(nb_only, &op->list);
9975d7be
BP
1296 }
1297
4685e523 1298 op->lrp_networks = lrp_networks;
9975d7be 1299 op->od = od;
8639f9be 1300 ipam_add_port_addresses(op->od, op);
5868eb24 1301 }
9975d7be
BP
1302 }
1303 }
1304
1305 /* Connect logical router ports, and logical switch ports of type "router",
1306 * to their peers. */
1307 struct ovn_port *op;
1308 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741
HK
1309 if (op->nbsp && !strcmp(op->nbsp->type, "router")) {
1310 const char *peer_name = smap_get(&op->nbsp->options, "router-port");
9975d7be
BP
1311 if (!peer_name) {
1312 continue;
1313 }
1314
1315 struct ovn_port *peer = ovn_port_find(ports, peer_name);
0ee00741 1316 if (!peer || !peer->nbrp) {
9975d7be
BP
1317 continue;
1318 }
1319
1320 peer->peer = op;
1321 op->peer = peer;
86e98048
BP
1322 op->od->router_ports = xrealloc(
1323 op->od->router_ports,
1324 sizeof *op->od->router_ports * (op->od->n_router_ports + 1));
1325 op->od->router_ports[op->od->n_router_ports++] = op;
0ee00741 1326 } else if (op->nbrp && op->nbrp->peer) {
ad386c3f
BP
1327 struct ovn_port *peer = ovn_port_find(ports, op->nbrp->peer);
1328 if (peer) {
1329 if (peer->nbrp) {
1330 op->peer = peer;
60fa6dbb 1331 } else if (peer->nbsp) {
ad386c3f
BP
1332 /* An ovn_port for a switch port of type "router" does have
1333 * a router port as its peer (see the case above for
1334 * "router" ports), but this is set via options:router-port
1335 * in Logical_Switch_Port and does not involve the
1336 * Logical_Router_Port's 'peer' column. */
1337 static struct vlog_rate_limit rl =
1338 VLOG_RATE_LIMIT_INIT(5, 1);
1339 VLOG_WARN_RL(&rl, "Bad configuration: The peer of router "
1340 "port %s is a switch port", op->key);
1341 }
1342 }
5868eb24
BP
1343 }
1344 }
1345}
1346
1347static void
a6095f81
BS
1348ovn_port_update_sbrec(const struct ovn_port *op,
1349 struct hmap *chassis_qdisc_queues)
5868eb24
BP
1350{
1351 sbrec_port_binding_set_datapath(op->sb, op->od->sb);
0ee00741 1352 if (op->nbrp) {
c1645003 1353 /* If the router is for l3 gateway, it resides on a chassis
17bac0ff 1354 * and its port type is "l3gateway". */
c1645003
GS
1355 const char *chassis = smap_get(&op->od->nbr->options, "chassis");
1356 if (chassis) {
17bac0ff 1357 sbrec_port_binding_set_type(op->sb, "l3gateway");
c1645003
GS
1358 } else {
1359 sbrec_port_binding_set_type(op->sb, "patch");
1360 }
9975d7be
BP
1361
1362 const char *peer = op->peer ? op->peer->key : "<error>";
c1645003
GS
1363 struct smap new;
1364 smap_init(&new);
1365 smap_add(&new, "peer", peer);
1366 if (chassis) {
17bac0ff 1367 smap_add(&new, "l3gateway-chassis", chassis);
c1645003
GS
1368 }
1369 sbrec_port_binding_set_options(op->sb, &new);
1370 smap_destroy(&new);
9975d7be
BP
1371
1372 sbrec_port_binding_set_parent_port(op->sb, NULL);
1373 sbrec_port_binding_set_tag(op->sb, NULL, 0);
1374 sbrec_port_binding_set_mac(op->sb, NULL, 0);
1375 } else {
0ee00741 1376 if (strcmp(op->nbsp->type, "router")) {
a6095f81
BS
1377 uint32_t queue_id = smap_get_int(
1378 &op->sb->options, "qdisc_queue_id", 0);
1379 bool has_qos = port_has_qos_params(&op->nbsp->options);
1380 struct smap options;
1381
1382 if (op->sb->chassis && has_qos && !queue_id) {
1383 queue_id = allocate_chassis_queueid(chassis_qdisc_queues,
1384 op->sb->chassis);
1385 } else if (!has_qos && queue_id) {
1386 free_chassis_queueid(chassis_qdisc_queues,
1387 op->sb->chassis,
1388 queue_id);
1389 queue_id = 0;
1390 }
1391
1392 smap_clone(&options, &op->nbsp->options);
1393 if (queue_id) {
1394 smap_add_format(&options,
1395 "qdisc_queue_id", "%d", queue_id);
1396 }
1397 sbrec_port_binding_set_options(op->sb, &options);
1398 smap_destroy(&options);
0ee00741 1399 sbrec_port_binding_set_type(op->sb, op->nbsp->type);
9975d7be 1400 } else {
c1645003
GS
1401 const char *chassis = NULL;
1402 if (op->peer && op->peer->od && op->peer->od->nbr) {
1403 chassis = smap_get(&op->peer->od->nbr->options, "chassis");
1404 }
1405
1406 /* A switch port connected to a gateway router is also of
17bac0ff 1407 * type "l3gateway". */
c1645003 1408 if (chassis) {
17bac0ff 1409 sbrec_port_binding_set_type(op->sb, "l3gateway");
c1645003
GS
1410 } else {
1411 sbrec_port_binding_set_type(op->sb, "patch");
1412 }
9975d7be 1413
f99f67bd
BP
1414 const char *router_port = smap_get_def(&op->nbsp->options,
1415 "router-port", "<error>");
c1645003
GS
1416 struct smap new;
1417 smap_init(&new);
1418 smap_add(&new, "peer", router_port);
1419 if (chassis) {
17bac0ff 1420 smap_add(&new, "l3gateway-chassis", chassis);
c1645003 1421 }
8439c2eb
CSV
1422
1423 const char *nat_addresses = smap_get(&op->nbsp->options,
1424 "nat-addresses");
1425 if (nat_addresses) {
1426 struct lport_addresses laddrs;
1427 if (!extract_lsp_addresses(nat_addresses, &laddrs)) {
1428 static struct vlog_rate_limit rl =
1429 VLOG_RATE_LIMIT_INIT(1, 1);
1430 VLOG_WARN_RL(&rl, "Error extracting nat-addresses.");
1431 } else {
1432 smap_add(&new, "nat-addresses", nat_addresses);
1433 destroy_lport_addresses(&laddrs);
1434 }
1435 }
c1645003
GS
1436 sbrec_port_binding_set_options(op->sb, &new);
1437 smap_destroy(&new);
9975d7be 1438 }
0ee00741
HK
1439 sbrec_port_binding_set_parent_port(op->sb, op->nbsp->parent_name);
1440 sbrec_port_binding_set_tag(op->sb, op->nbsp->tag, op->nbsp->n_tag);
1441 sbrec_port_binding_set_mac(op->sb, (const char **) op->nbsp->addresses,
1442 op->nbsp->n_addresses);
9975d7be 1443 }
5868eb24
BP
1444}
1445
6e31816f
CSV
1446/* Remove mac_binding entries that refer to logical_ports which are
1447 * deleted. */
1448static void
1449cleanup_mac_bindings(struct northd_context *ctx, struct hmap *ports)
1450{
1451 const struct sbrec_mac_binding *b, *n;
1452 SBREC_MAC_BINDING_FOR_EACH_SAFE (b, n, ctx->ovnsb_idl) {
1453 if (!ovn_port_find(ports, b->logical_port)) {
1454 sbrec_mac_binding_delete(b);
1455 }
1456 }
1457}
1458
0bac7164 1459/* Updates the southbound Port_Binding table so that it contains the logical
80f408f4 1460 * switch ports specified by the northbound database.
0bac7164
BP
1461 *
1462 * Initializes 'ports' to contain a "struct ovn_port" for every logical port,
1463 * using the "struct ovn_datapath"s in 'datapaths' to look up logical
1464 * datapaths. */
5868eb24
BP
1465static void
1466build_ports(struct northd_context *ctx, struct hmap *datapaths,
1467 struct hmap *ports)
1468{
1469 struct ovs_list sb_only, nb_only, both;
a6095f81
BS
1470 struct hmap tag_alloc_table = HMAP_INITIALIZER(&tag_alloc_table);
1471 struct hmap chassis_qdisc_queues = HMAP_INITIALIZER(&chassis_qdisc_queues);
5868eb24 1472
a6095f81
BS
1473 join_logical_ports(ctx, datapaths, ports, &chassis_qdisc_queues,
1474 &tag_alloc_table, &sb_only, &nb_only, &both);
5868eb24 1475
5868eb24 1476 struct ovn_port *op, *next;
b511690b
GS
1477 /* For logical ports that are in both databases, update the southbound
1478 * record based on northbound data. Also index the in-use tunnel_keys.
1479 * For logical ports that are in NB database, do any tag allocation
1480 * needed. */
5868eb24 1481 LIST_FOR_EACH_SAFE (op, next, list, &both) {
b511690b
GS
1482 if (op->nbsp) {
1483 tag_alloc_create_new_tag(&tag_alloc_table, op->nbsp);
1484 }
a6095f81 1485 ovn_port_update_sbrec(op, &chassis_qdisc_queues);
5868eb24
BP
1486
1487 add_tnlid(&op->od->port_tnlids, op->sb->tunnel_key);
1488 if (op->sb->tunnel_key > op->od->port_key_hint) {
1489 op->od->port_key_hint = op->sb->tunnel_key;
1490 }
1491 }
1492
1493 /* Add southbound record for each unmatched northbound record. */
1494 LIST_FOR_EACH_SAFE (op, next, list, &nb_only) {
1495 uint16_t tunnel_key = ovn_port_allocate_key(op->od);
1496 if (!tunnel_key) {
1497 continue;
1498 }
1499
1500 op->sb = sbrec_port_binding_insert(ctx->ovnsb_txn);
a6095f81 1501 ovn_port_update_sbrec(op, &chassis_qdisc_queues);
5868eb24
BP
1502
1503 sbrec_port_binding_set_logical_port(op->sb, op->key);
1504 sbrec_port_binding_set_tunnel_key(op->sb, tunnel_key);
1505 }
1506
6e31816f
CSV
1507 bool remove_mac_bindings = false;
1508 if (!ovs_list_is_empty(&sb_only)) {
1509 remove_mac_bindings = true;
1510 }
1511
5868eb24
BP
1512 /* Delete southbound records without northbound matches. */
1513 LIST_FOR_EACH_SAFE(op, next, list, &sb_only) {
417e7e66 1514 ovs_list_remove(&op->list);
5868eb24
BP
1515 sbrec_port_binding_delete(op->sb);
1516 ovn_port_destroy(ports, op);
1517 }
6e31816f
CSV
1518 if (remove_mac_bindings) {
1519 cleanup_mac_bindings(ctx, ports);
1520 }
b511690b
GS
1521
1522 tag_alloc_destroy(&tag_alloc_table);
a6095f81 1523 destroy_chassis_queues(&chassis_qdisc_queues);
5868eb24
BP
1524}
1525\f
1526#define OVN_MIN_MULTICAST 32768
1527#define OVN_MAX_MULTICAST 65535
1528
1529struct multicast_group {
1530 const char *name;
1531 uint16_t key; /* OVN_MIN_MULTICAST...OVN_MAX_MULTICAST. */
1532};
1533
1534#define MC_FLOOD "_MC_flood"
1535static const struct multicast_group mc_flood = { MC_FLOOD, 65535 };
1536
1537#define MC_UNKNOWN "_MC_unknown"
1538static const struct multicast_group mc_unknown = { MC_UNKNOWN, 65534 };
1539
1540static bool
1541multicast_group_equal(const struct multicast_group *a,
1542 const struct multicast_group *b)
1543{
1544 return !strcmp(a->name, b->name) && a->key == b->key;
1545}
1546
1547/* Multicast group entry. */
1548struct ovn_multicast {
1549 struct hmap_node hmap_node; /* Index on 'datapath' and 'key'. */
1550 struct ovn_datapath *datapath;
1551 const struct multicast_group *group;
1552
1553 struct ovn_port **ports;
1554 size_t n_ports, allocated_ports;
1555};
1556
1557static uint32_t
1558ovn_multicast_hash(const struct ovn_datapath *datapath,
1559 const struct multicast_group *group)
1560{
1561 return hash_pointer(datapath, group->key);
1562}
1563
1564static struct ovn_multicast *
1565ovn_multicast_find(struct hmap *mcgroups, struct ovn_datapath *datapath,
1566 const struct multicast_group *group)
1567{
1568 struct ovn_multicast *mc;
1569
1570 HMAP_FOR_EACH_WITH_HASH (mc, hmap_node,
1571 ovn_multicast_hash(datapath, group), mcgroups) {
1572 if (mc->datapath == datapath
1573 && multicast_group_equal(mc->group, group)) {
1574 return mc;
4edcdcf4
RB
1575 }
1576 }
5868eb24
BP
1577 return NULL;
1578}
1579
1580static void
1581ovn_multicast_add(struct hmap *mcgroups, const struct multicast_group *group,
1582 struct ovn_port *port)
1583{
1584 struct ovn_datapath *od = port->od;
1585 struct ovn_multicast *mc = ovn_multicast_find(mcgroups, od, group);
1586 if (!mc) {
1587 mc = xmalloc(sizeof *mc);
1588 hmap_insert(mcgroups, &mc->hmap_node, ovn_multicast_hash(od, group));
1589 mc->datapath = od;
1590 mc->group = group;
1591 mc->n_ports = 0;
1592 mc->allocated_ports = 4;
1593 mc->ports = xmalloc(mc->allocated_ports * sizeof *mc->ports);
1594 }
1595 if (mc->n_ports >= mc->allocated_ports) {
1596 mc->ports = x2nrealloc(mc->ports, &mc->allocated_ports,
1597 sizeof *mc->ports);
1598 }
1599 mc->ports[mc->n_ports++] = port;
1600}
4edcdcf4 1601
5868eb24
BP
1602static void
1603ovn_multicast_destroy(struct hmap *mcgroups, struct ovn_multicast *mc)
1604{
1605 if (mc) {
1606 hmap_remove(mcgroups, &mc->hmap_node);
1607 free(mc->ports);
1608 free(mc);
1609 }
1610}
4edcdcf4 1611
5868eb24
BP
1612static void
1613ovn_multicast_update_sbrec(const struct ovn_multicast *mc,
1614 const struct sbrec_multicast_group *sb)
1615{
1616 struct sbrec_port_binding **ports = xmalloc(mc->n_ports * sizeof *ports);
1617 for (size_t i = 0; i < mc->n_ports; i++) {
1618 ports[i] = CONST_CAST(struct sbrec_port_binding *, mc->ports[i]->sb);
1619 }
1620 sbrec_multicast_group_set_ports(sb, ports, mc->n_ports);
1621 free(ports);
4edcdcf4 1622}
bd39395f 1623\f
48605550 1624/* Logical flow generation.
bd39395f 1625 *
48605550 1626 * This code generates the Logical_Flow table in the southbound database, as a
bd39395f
BP
1627 * function of most of the northbound database.
1628 */
1629
5868eb24
BP
1630struct ovn_lflow {
1631 struct hmap_node hmap_node;
bd39395f 1632
5868eb24 1633 struct ovn_datapath *od;
880fcd14 1634 enum ovn_stage stage;
5868eb24
BP
1635 uint16_t priority;
1636 char *match;
1637 char *actions;
d8026bbf 1638 const char *where;
bd39395f
BP
1639};
1640
1641static size_t
5868eb24 1642ovn_lflow_hash(const struct ovn_lflow *lflow)
bd39395f 1643{
5868eb24 1644 size_t hash = uuid_hash(&lflow->od->key);
880fcd14 1645 hash = hash_2words((lflow->stage << 16) | lflow->priority, hash);
5868eb24
BP
1646 hash = hash_string(lflow->match, hash);
1647 return hash_string(lflow->actions, hash);
bd39395f
BP
1648}
1649
5868eb24
BP
1650static bool
1651ovn_lflow_equal(const struct ovn_lflow *a, const struct ovn_lflow *b)
1652{
1653 return (a->od == b->od
880fcd14 1654 && a->stage == b->stage
5868eb24
BP
1655 && a->priority == b->priority
1656 && !strcmp(a->match, b->match)
1657 && !strcmp(a->actions, b->actions));
1658}
1659
1660static void
1661ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
d8026bbf
BP
1662 enum ovn_stage stage, uint16_t priority,
1663 char *match, char *actions, const char *where)
bd39395f 1664{
5868eb24 1665 lflow->od = od;
880fcd14 1666 lflow->stage = stage;
5868eb24
BP
1667 lflow->priority = priority;
1668 lflow->match = match;
1669 lflow->actions = actions;
d8026bbf 1670 lflow->where = where;
bd39395f
BP
1671}
1672
48605550 1673/* Adds a row with the specified contents to the Logical_Flow table. */
bd39395f 1674static void
d8026bbf
BP
1675ovn_lflow_add_at(struct hmap *lflow_map, struct ovn_datapath *od,
1676 enum ovn_stage stage, uint16_t priority,
1677 const char *match, const char *actions, const char *where)
5868eb24 1678{
9a9961d2
BP
1679 ovs_assert(ovn_stage_to_datapath_type(stage) == ovn_datapath_get_type(od));
1680
5868eb24 1681 struct ovn_lflow *lflow = xmalloc(sizeof *lflow);
880fcd14 1682 ovn_lflow_init(lflow, od, stage, priority,
d8026bbf 1683 xstrdup(match), xstrdup(actions), where);
5868eb24
BP
1684 hmap_insert(lflow_map, &lflow->hmap_node, ovn_lflow_hash(lflow));
1685}
1686
d8026bbf
BP
1687/* Adds a row with the specified contents to the Logical_Flow table. */
1688#define ovn_lflow_add(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, ACTIONS) \
1689 ovn_lflow_add_at(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, ACTIONS, \
1690 OVS_SOURCE_LOCATOR)
1691
5868eb24
BP
1692static struct ovn_lflow *
1693ovn_lflow_find(struct hmap *lflows, struct ovn_datapath *od,
880fcd14 1694 enum ovn_stage stage, uint16_t priority,
5868eb24
BP
1695 const char *match, const char *actions)
1696{
1697 struct ovn_lflow target;
880fcd14 1698 ovn_lflow_init(&target, od, stage, priority,
d8026bbf
BP
1699 CONST_CAST(char *, match), CONST_CAST(char *, actions),
1700 NULL);
5868eb24
BP
1701
1702 struct ovn_lflow *lflow;
1703 HMAP_FOR_EACH_WITH_HASH (lflow, hmap_node, ovn_lflow_hash(&target),
1704 lflows) {
1705 if (ovn_lflow_equal(lflow, &target)) {
1706 return lflow;
bd39395f
BP
1707 }
1708 }
5868eb24
BP
1709 return NULL;
1710}
bd39395f 1711
5868eb24
BP
1712static void
1713ovn_lflow_destroy(struct hmap *lflows, struct ovn_lflow *lflow)
1714{
1715 if (lflow) {
1716 hmap_remove(lflows, &lflow->hmap_node);
1717 free(lflow->match);
1718 free(lflow->actions);
1719 free(lflow);
1720 }
bd39395f
BP
1721}
1722
bd39395f 1723/* Appends port security constraints on L2 address field 'eth_addr_field'
e93b43d6
JP
1724 * (e.g. "eth.src" or "eth.dst") to 'match'. 'ps_addrs', with 'n_ps_addrs'
1725 * elements, is the collection of port_security constraints from an
1726 * OVN_NB Logical_Switch_Port row generated by extract_lsp_addresses(). */
bd39395f 1727static void
685f4dfe 1728build_port_security_l2(const char *eth_addr_field,
e93b43d6
JP
1729 struct lport_addresses *ps_addrs,
1730 unsigned int n_ps_addrs,
685f4dfe 1731 struct ds *match)
bd39395f 1732{
e93b43d6
JP
1733 if (!n_ps_addrs) {
1734 return;
1735 }
bd39395f 1736
e93b43d6 1737 ds_put_format(match, " && %s == {", eth_addr_field);
f7cb14cd 1738
e93b43d6
JP
1739 for (size_t i = 0; i < n_ps_addrs; i++) {
1740 ds_put_format(match, "%s ", ps_addrs[i].ea_s);
bd39395f 1741 }
f7cb14cd 1742 ds_chomp(match, ' ');
bd39395f 1743 ds_put_cstr(match, "}");
bd39395f
BP
1744}
1745
685f4dfe
NS
1746static void
1747build_port_security_ipv6_nd_flow(
1748 struct ds *match, struct eth_addr ea, struct ipv6_netaddr *ipv6_addrs,
1749 int n_ipv6_addrs)
1750{
1751 ds_put_format(match, " && ip6 && nd && ((nd.sll == "ETH_ADDR_FMT" || "
1752 "nd.sll == "ETH_ADDR_FMT") || ((nd.tll == "ETH_ADDR_FMT" || "
1753 "nd.tll == "ETH_ADDR_FMT")", ETH_ADDR_ARGS(eth_addr_zero),
1754 ETH_ADDR_ARGS(ea), ETH_ADDR_ARGS(eth_addr_zero),
1755 ETH_ADDR_ARGS(ea));
1756 if (!n_ipv6_addrs) {
1757 ds_put_cstr(match, "))");
1758 return;
1759 }
1760
1761 char ip6_str[INET6_ADDRSTRLEN + 1];
1762 struct in6_addr lla;
1763 in6_generate_lla(ea, &lla);
1764 memset(ip6_str, 0, sizeof(ip6_str));
1765 ipv6_string_mapped(ip6_str, &lla);
1766 ds_put_format(match, " && (nd.target == %s", ip6_str);
1767
1768 for(int i = 0; i < n_ipv6_addrs; i++) {
1769 memset(ip6_str, 0, sizeof(ip6_str));
1770 ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr);
1771 ds_put_format(match, " || nd.target == %s", ip6_str);
1772 }
1773
1774 ds_put_format(match, ")))");
1775}
1776
1777static void
1778build_port_security_ipv6_flow(
1779 enum ovn_pipeline pipeline, struct ds *match, struct eth_addr ea,
1780 struct ipv6_netaddr *ipv6_addrs, int n_ipv6_addrs)
1781{
1782 char ip6_str[INET6_ADDRSTRLEN + 1];
1783
1784 ds_put_format(match, " && %s == {",
1785 pipeline == P_IN ? "ip6.src" : "ip6.dst");
1786
1787 /* Allow link-local address. */
1788 struct in6_addr lla;
1789 in6_generate_lla(ea, &lla);
1790 ipv6_string_mapped(ip6_str, &lla);
1791 ds_put_format(match, "%s, ", ip6_str);
1792
9e687b23
DL
1793 /* Allow ip6.dst=ff00::/8 for multicast packets */
1794 if (pipeline == P_OUT) {
1795 ds_put_cstr(match, "ff00::/8, ");
1796 }
685f4dfe
NS
1797 for(int i = 0; i < n_ipv6_addrs; i++) {
1798 ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr);
9e687b23 1799 ds_put_format(match, "%s, ", ip6_str);
685f4dfe 1800 }
9e687b23
DL
1801 /* Replace ", " by "}". */
1802 ds_chomp(match, ' ');
1803 ds_chomp(match, ',');
685f4dfe
NS
1804 ds_put_cstr(match, "}");
1805}
1806
1807/**
1808 * Build port security constraints on ARP and IPv6 ND fields
1809 * and add logical flows to S_SWITCH_IN_PORT_SEC_ND stage.
1810 *
1811 * For each port security of the logical port, following
1812 * logical flows are added
1813 * - If the port security has no IP (both IPv4 and IPv6) or
1814 * if it has IPv4 address(es)
1815 * - Priority 90 flow to allow ARP packets for known MAC addresses
1816 * in the eth.src and arp.spa fields. If the port security
1817 * has IPv4 addresses, allow known IPv4 addresses in the arp.tpa field.
1818 *
1819 * - If the port security has no IP (both IPv4 and IPv6) or
1820 * if it has IPv6 address(es)
1821 * - Priority 90 flow to allow IPv6 ND packets for known MAC addresses
1822 * in the eth.src and nd.sll/nd.tll fields. If the port security
1823 * has IPv6 addresses, allow known IPv6 addresses in the nd.target field
1824 * for IPv6 Neighbor Advertisement packet.
1825 *
1826 * - Priority 80 flow to drop ARP and IPv6 ND packets.
1827 */
1828static void
1829build_port_security_nd(struct ovn_port *op, struct hmap *lflows)
1830{
e93b43d6
JP
1831 struct ds match = DS_EMPTY_INITIALIZER;
1832
1833 for (size_t i = 0; i < op->n_ps_addrs; i++) {
1834 struct lport_addresses *ps = &op->ps_addrs[i];
685f4dfe 1835
e93b43d6 1836 bool no_ip = !(ps->n_ipv4_addrs || ps->n_ipv6_addrs);
685f4dfe 1837
e93b43d6
JP
1838 ds_clear(&match);
1839 if (ps->n_ipv4_addrs || no_ip) {
1840 ds_put_format(&match,
1841 "inport == %s && eth.src == %s && arp.sha == %s",
1842 op->json_key, ps->ea_s, ps->ea_s);
685f4dfe 1843
e93b43d6
JP
1844 if (ps->n_ipv4_addrs) {
1845 ds_put_cstr(&match, " && arp.spa == {");
f95523c0 1846 for (size_t j = 0; j < ps->n_ipv4_addrs; j++) {
7d9d86ad
NS
1847 /* When the netmask is applied, if the host portion is
1848 * non-zero, the host can only use the specified
1849 * address in the arp.spa. If zero, the host is allowed
1850 * to use any address in the subnet. */
f95523c0
JP
1851 if (ps->ipv4_addrs[j].plen == 32
1852 || ps->ipv4_addrs[j].addr & ~ps->ipv4_addrs[j].mask) {
1853 ds_put_cstr(&match, ps->ipv4_addrs[j].addr_s);
7d9d86ad 1854 } else {
e93b43d6 1855 ds_put_format(&match, "%s/%d",
f95523c0
JP
1856 ps->ipv4_addrs[j].network_s,
1857 ps->ipv4_addrs[j].plen);
7d9d86ad 1858 }
e93b43d6 1859 ds_put_cstr(&match, ", ");
685f4dfe
NS
1860 }
1861 ds_chomp(&match, ' ');
e93b43d6
JP
1862 ds_chomp(&match, ',');
1863 ds_put_cstr(&match, "}");
685f4dfe
NS
1864 }
1865 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90,
1866 ds_cstr(&match), "next;");
685f4dfe
NS
1867 }
1868
e93b43d6
JP
1869 if (ps->n_ipv6_addrs || no_ip) {
1870 ds_clear(&match);
1871 ds_put_format(&match, "inport == %s && eth.src == %s",
1872 op->json_key, ps->ea_s);
1873 build_port_security_ipv6_nd_flow(&match, ps->ea, ps->ipv6_addrs,
1874 ps->n_ipv6_addrs);
685f4dfe
NS
1875 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90,
1876 ds_cstr(&match), "next;");
685f4dfe 1877 }
685f4dfe
NS
1878 }
1879
e93b43d6
JP
1880 ds_clear(&match);
1881 ds_put_format(&match, "inport == %s && (arp || nd)", op->json_key);
685f4dfe 1882 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 80,
e93b43d6
JP
1883 ds_cstr(&match), "drop;");
1884 ds_destroy(&match);
685f4dfe
NS
1885}
1886
1887/**
1888 * Build port security constraints on IPv4 and IPv6 src and dst fields
1889 * and add logical flows to S_SWITCH_(IN/OUT)_PORT_SEC_IP stage.
1890 *
1891 * For each port security of the logical port, following
1892 * logical flows are added
1893 * - If the port security has IPv4 addresses,
1894 * - Priority 90 flow to allow IPv4 packets for known IPv4 addresses
1895 *
1896 * - If the port security has IPv6 addresses,
1897 * - Priority 90 flow to allow IPv6 packets for known IPv6 addresses
1898 *
1899 * - If the port security has IPv4 addresses or IPv6 addresses or both
1900 * - Priority 80 flow to drop all IPv4 and IPv6 traffic
1901 */
1902static void
1903build_port_security_ip(enum ovn_pipeline pipeline, struct ovn_port *op,
1904 struct hmap *lflows)
1905{
1906 char *port_direction;
1907 enum ovn_stage stage;
1908 if (pipeline == P_IN) {
1909 port_direction = "inport";
1910 stage = S_SWITCH_IN_PORT_SEC_IP;
1911 } else {
1912 port_direction = "outport";
1913 stage = S_SWITCH_OUT_PORT_SEC_IP;
1914 }
1915
e93b43d6
JP
1916 for (size_t i = 0; i < op->n_ps_addrs; i++) {
1917 struct lport_addresses *ps = &op->ps_addrs[i];
685f4dfe 1918
e93b43d6 1919 if (!(ps->n_ipv4_addrs || ps->n_ipv6_addrs)) {
685f4dfe
NS
1920 continue;
1921 }
1922
e93b43d6 1923 if (ps->n_ipv4_addrs) {
685f4dfe
NS
1924 struct ds match = DS_EMPTY_INITIALIZER;
1925 if (pipeline == P_IN) {
9e687b23
DL
1926 /* Permit use of the unspecified address for DHCP discovery */
1927 struct ds dhcp_match = DS_EMPTY_INITIALIZER;
1928 ds_put_format(&dhcp_match, "inport == %s"
e93b43d6 1929 " && eth.src == %s"
9e687b23
DL
1930 " && ip4.src == 0.0.0.0"
1931 " && ip4.dst == 255.255.255.255"
e93b43d6
JP
1932 " && udp.src == 68 && udp.dst == 67",
1933 op->json_key, ps->ea_s);
9e687b23
DL
1934 ovn_lflow_add(lflows, op->od, stage, 90,
1935 ds_cstr(&dhcp_match), "next;");
1936 ds_destroy(&dhcp_match);
e93b43d6 1937 ds_put_format(&match, "inport == %s && eth.src == %s"
9e687b23 1938 " && ip4.src == {", op->json_key,
e93b43d6 1939 ps->ea_s);
685f4dfe 1940 } else {
e93b43d6 1941 ds_put_format(&match, "outport == %s && eth.dst == %s"
685f4dfe 1942 " && ip4.dst == {255.255.255.255, 224.0.0.0/4, ",
e93b43d6 1943 op->json_key, ps->ea_s);
685f4dfe
NS
1944 }
1945
f95523c0
JP
1946 for (int j = 0; j < ps->n_ipv4_addrs; j++) {
1947 ovs_be32 mask = ps->ipv4_addrs[j].mask;
7d9d86ad
NS
1948 /* When the netmask is applied, if the host portion is
1949 * non-zero, the host can only use the specified
1950 * address. If zero, the host is allowed to use any
1951 * address in the subnet.
e93b43d6 1952 */
f95523c0
JP
1953 if (ps->ipv4_addrs[j].plen == 32
1954 || ps->ipv4_addrs[j].addr & ~mask) {
1955 ds_put_format(&match, "%s", ps->ipv4_addrs[j].addr_s);
1956 if (pipeline == P_OUT && ps->ipv4_addrs[j].plen != 32) {
e93b43d6
JP
1957 /* Host is also allowed to receive packets to the
1958 * broadcast address in the specified subnet. */
1959 ds_put_format(&match, ", %s",
f95523c0 1960 ps->ipv4_addrs[j].bcast_s);
7d9d86ad
NS
1961 }
1962 } else {
1963 /* host portion is zero */
f95523c0
JP
1964 ds_put_format(&match, "%s/%d", ps->ipv4_addrs[j].network_s,
1965 ps->ipv4_addrs[j].plen);
7d9d86ad
NS
1966 }
1967 ds_put_cstr(&match, ", ");
685f4dfe
NS
1968 }
1969
1970 /* Replace ", " by "}". */
1971 ds_chomp(&match, ' ');
1972 ds_chomp(&match, ',');
1973 ds_put_cstr(&match, "}");
1974 ovn_lflow_add(lflows, op->od, stage, 90, ds_cstr(&match), "next;");
1975 ds_destroy(&match);
685f4dfe
NS
1976 }
1977
e93b43d6 1978 if (ps->n_ipv6_addrs) {
685f4dfe 1979 struct ds match = DS_EMPTY_INITIALIZER;
9e687b23
DL
1980 if (pipeline == P_IN) {
1981 /* Permit use of unspecified address for duplicate address
1982 * detection */
1983 struct ds dad_match = DS_EMPTY_INITIALIZER;
1984 ds_put_format(&dad_match, "inport == %s"
e93b43d6 1985 " && eth.src == %s"
9e687b23
DL
1986 " && ip6.src == ::"
1987 " && ip6.dst == ff02::/16"
1988 " && icmp6.type == {131, 135, 143}", op->json_key,
e93b43d6 1989 ps->ea_s);
9e687b23
DL
1990 ovn_lflow_add(lflows, op->od, stage, 90,
1991 ds_cstr(&dad_match), "next;");
1992 ds_destroy(&dad_match);
1993 }
e93b43d6 1994 ds_put_format(&match, "%s == %s && %s == %s",
685f4dfe 1995 port_direction, op->json_key,
e93b43d6
JP
1996 pipeline == P_IN ? "eth.src" : "eth.dst", ps->ea_s);
1997 build_port_security_ipv6_flow(pipeline, &match, ps->ea,
1998 ps->ipv6_addrs, ps->n_ipv6_addrs);
685f4dfe
NS
1999 ovn_lflow_add(lflows, op->od, stage, 90,
2000 ds_cstr(&match), "next;");
2001 ds_destroy(&match);
685f4dfe
NS
2002 }
2003
e93b43d6
JP
2004 char *match = xasprintf("%s == %s && %s == %s && ip",
2005 port_direction, op->json_key,
2006 pipeline == P_IN ? "eth.src" : "eth.dst",
2007 ps->ea_s);
685f4dfe
NS
2008 ovn_lflow_add(lflows, op->od, stage, 80, match, "drop;");
2009 free(match);
2010 }
f2a715b5 2011
685f4dfe
NS
2012}
2013
95a9a275 2014static bool
80f408f4 2015lsp_is_enabled(const struct nbrec_logical_switch_port *lsp)
95a9a275 2016{
80f408f4 2017 return !lsp->enabled || *lsp->enabled;
95a9a275
RB
2018}
2019
4c7bf534 2020static bool
80f408f4 2021lsp_is_up(const struct nbrec_logical_switch_port *lsp)
4c7bf534 2022{
80f408f4 2023 return !lsp->up || *lsp->up;
4c7bf534
NS
2024}
2025
281977f7
NS
2026static bool
2027build_dhcpv4_action(struct ovn_port *op, ovs_be32 offer_ip,
2028 struct ds *options_action, struct ds *response_action)
2029{
2030 if (!op->nbsp->dhcpv4_options) {
2031 /* CMS has disabled native DHCPv4 for this lport. */
2032 return false;
2033 }
2034
2035 ovs_be32 host_ip, mask;
2036 char *error = ip_parse_masked(op->nbsp->dhcpv4_options->cidr, &host_ip,
2037 &mask);
2038 if (error || ((offer_ip ^ host_ip) & mask)) {
2039 /* Either
2040 * - cidr defined is invalid or
2041 * - the offer ip of the logical port doesn't belong to the cidr
2042 * defined in the DHCPv4 options.
2043 * */
2044 free(error);
2045 return false;
2046 }
2047
2048 const char *server_ip = smap_get(
2049 &op->nbsp->dhcpv4_options->options, "server_id");
2050 const char *server_mac = smap_get(
2051 &op->nbsp->dhcpv4_options->options, "server_mac");
2052 const char *lease_time = smap_get(
2053 &op->nbsp->dhcpv4_options->options, "lease_time");
2054 const char *router = smap_get(
2055 &op->nbsp->dhcpv4_options->options, "router");
2056
2057 if (!(server_ip && server_mac && lease_time && router)) {
2058 /* "server_id", "server_mac", "lease_time" and "router" should be
2059 * present in the dhcp_options. */
2060 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
2061 VLOG_WARN_RL(&rl, "Required DHCPv4 options not defined for lport - %s",
2062 op->json_key);
2063 return false;
2064 }
2065
2066 struct smap dhcpv4_options = SMAP_INITIALIZER(&dhcpv4_options);
2067 smap_clone(&dhcpv4_options, &op->nbsp->dhcpv4_options->options);
2068
2069 /* server_mac is not DHCPv4 option, delete it from the smap. */
2070 smap_remove(&dhcpv4_options, "server_mac");
2071 char *netmask = xasprintf(IP_FMT, IP_ARGS(mask));
2072 smap_add(&dhcpv4_options, "netmask", netmask);
2073 free(netmask);
2074
2075 ds_put_format(options_action,
2076 REGBIT_DHCP_OPTS_RESULT" = put_dhcp_opts(offerip = "
2077 IP_FMT", ", IP_ARGS(offer_ip));
2078 struct smap_node *node;
2079 SMAP_FOR_EACH(node, &dhcpv4_options) {
2080 ds_put_format(options_action, "%s = %s, ", node->key, node->value);
2081 }
2082
2083 ds_chomp(options_action, ' ');
2084 ds_chomp(options_action, ',');
2085 ds_put_cstr(options_action, "); next;");
2086
2087 ds_put_format(response_action, "eth.dst = eth.src; eth.src = %s; "
2088 "ip4.dst = "IP_FMT"; ip4.src = %s; udp.src = 67; "
bf143492
JP
2089 "udp.dst = 68; outport = inport; flags.loopback = 1; "
2090 "output;",
281977f7
NS
2091 server_mac, IP_ARGS(offer_ip), server_ip);
2092
2093 smap_destroy(&dhcpv4_options);
2094 return true;
2095}
2096
33ac3c83
NS
2097static bool
2098build_dhcpv6_action(struct ovn_port *op, struct in6_addr *offer_ip,
2099 struct ds *options_action, struct ds *response_action)
2100{
2101 if (!op->nbsp->dhcpv6_options) {
2102 /* CMS has disabled native DHCPv6 for this lport. */
2103 return false;
2104 }
2105
2106 struct in6_addr host_ip, mask;
2107
2108 char *error = ipv6_parse_masked(op->nbsp->dhcpv6_options->cidr, &host_ip,
2109 &mask);
2110 if (error) {
2111 free(error);
2112 return false;
2113 }
2114 struct in6_addr ip6_mask = ipv6_addr_bitxor(offer_ip, &host_ip);
2115 ip6_mask = ipv6_addr_bitand(&ip6_mask, &mask);
2116 if (!ipv6_mask_is_any(&ip6_mask)) {
2117 /* offer_ip doesn't belongs to the cidr defined in lport's DHCPv6
2118 * options.*/
2119 return false;
2120 }
2121
2122 /* "server_id" should be the MAC address. */
2123 const char *server_mac = smap_get(&op->nbsp->dhcpv6_options->options,
2124 "server_id");
2125 struct eth_addr ea;
2126 if (!server_mac || !eth_addr_from_string(server_mac, &ea)) {
2127 /* "server_id" should be present in the dhcpv6_options. */
2128 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2129 VLOG_WARN_RL(&rl, "server_id not present in the DHCPv6 options"
2130 " for lport %s", op->json_key);
2131 return false;
2132 }
2133
2134 /* Get the link local IP of the DHCPv6 server from the server MAC. */
2135 struct in6_addr lla;
2136 in6_generate_lla(ea, &lla);
2137
2138 char server_ip[INET6_ADDRSTRLEN + 1];
2139 ipv6_string_mapped(server_ip, &lla);
2140
2141 char ia_addr[INET6_ADDRSTRLEN + 1];
2142 ipv6_string_mapped(ia_addr, offer_ip);
2143
2144 ds_put_format(options_action,
40df4566
ZKL
2145 REGBIT_DHCP_OPTS_RESULT" = put_dhcpv6_opts(");
2146
2147 /* Check whether the dhcpv6 options should be configured as stateful.
2148 * Only reply with ia_addr option for dhcpv6 stateful address mode. */
2149 if (!smap_get_bool(&op->nbsp->dhcpv6_options->options,
2150 "dhcpv6_stateless", false)) {
2151 char ia_addr[INET6_ADDRSTRLEN + 1];
2152 ipv6_string_mapped(ia_addr, offer_ip);
2153
2154 ds_put_format(options_action, "ia_addr = %s, ", ia_addr);
2155 }
2156
33ac3c83
NS
2157 struct smap_node *node;
2158 SMAP_FOR_EACH (node, &op->nbsp->dhcpv6_options->options) {
40df4566
ZKL
2159 if (strcmp(node->key, "dhcpv6_stateless")) {
2160 ds_put_format(options_action, "%s = %s, ", node->key, node->value);
2161 }
33ac3c83
NS
2162 }
2163 ds_chomp(options_action, ' ');
2164 ds_chomp(options_action, ',');
2165 ds_put_cstr(options_action, "); next;");
2166
2167 ds_put_format(response_action, "eth.dst = eth.src; eth.src = %s; "
2168 "ip6.dst = ip6.src; ip6.src = %s; udp.src = 547; "
2169 "udp.dst = 546; outport = inport; flags.loopback = 1; "
2170 "output;",
2171 server_mac, server_ip);
40df4566 2172
33ac3c83
NS
2173 return true;
2174}
2175
78aab811
JP
2176static bool
2177has_stateful_acl(struct ovn_datapath *od)
2178{
9975d7be
BP
2179 for (size_t i = 0; i < od->nbs->n_acls; i++) {
2180 struct nbrec_acl *acl = od->nbs->acls[i];
78aab811
JP
2181 if (!strcmp(acl->action, "allow-related")) {
2182 return true;
2183 }
2184 }
2185
2186 return false;
2187}
2188
2189static void
9ab989b7 2190build_pre_acls(struct ovn_datapath *od, struct hmap *lflows)
78aab811
JP
2191{
2192 bool has_stateful = has_stateful_acl(od);
2193
2194 /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are
2195 * allowed by default. */
880fcd14
BP
2196 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 0, "1", "next;");
2197 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 0, "1", "next;");
78aab811 2198
c132fca0 2199 /* If there are any stateful ACL rules in this datapath, we must
78aab811
JP
2200 * send all IP packets through the conntrack action, which handles
2201 * defragmentation, in order to match L4 headers. */
2202 if (has_stateful) {
9ab989b7
BP
2203 for (size_t i = 0; i < od->n_router_ports; i++) {
2204 struct ovn_port *op = od->router_ports[i];
2205 /* Can't use ct() for router ports. Consider the
2206 * following configuration: lp1(10.0.0.2) on
2207 * hostA--ls1--lr0--ls2--lp2(10.0.1.2) on hostB, For a
2208 * ping from lp1 to lp2, First, the response will go
2209 * through ct() with a zone for lp2 in the ls2 ingress
2210 * pipeline on hostB. That ct zone knows about this
2211 * connection. Next, it goes through ct() with the zone
2212 * for the router port in the egress pipeline of ls2 on
2213 * hostB. This zone does not know about the connection,
2214 * as the icmp request went through the logical router
2215 * on hostA, not hostB. This would only work with
2216 * distributed conntrack state across all chassis. */
2217 struct ds match_in = DS_EMPTY_INITIALIZER;
2218 struct ds match_out = DS_EMPTY_INITIALIZER;
2219
2220 ds_put_format(&match_in, "ip && inport == %s", op->json_key);
2221 ds_put_format(&match_out, "ip && outport == %s", op->json_key);
2222 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110,
2223 ds_cstr(&match_in), "next;");
2224 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110,
2225 ds_cstr(&match_out), "next;");
2226
2227 ds_destroy(&match_in);
2228 ds_destroy(&match_out);
48fcdb47 2229 }
2d018f9b
GS
2230 /* Ingress and Egress Pre-ACL Table (Priority 110).
2231 *
2232 * Not to do conntrack on ND packets. */
2233 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110, "nd", "next;");
2234 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110, "nd", "next;");
48fcdb47 2235
78aab811
JP
2236 /* Ingress and Egress Pre-ACL Table (Priority 100).
2237 *
2238 * Regardless of whether the ACL is "from-lport" or "to-lport",
2239 * we need rules in both the ingress and egress table, because
facf8652
GS
2240 * the return traffic needs to be followed.
2241 *
2242 * 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send
2243 * it to conntrack for tracking and defragmentation. */
2244 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 100, "ip",
2245 REGBIT_CONNTRACK_DEFRAG" = 1; next;");
2246 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip",
2247 REGBIT_CONNTRACK_DEFRAG" = 1; next;");
2d018f9b
GS
2248 }
2249}
78aab811 2250
7a15be69
GS
2251/* For a 'key' of the form "IP:port" or just "IP", sets 'port' and
2252 * 'ip_address'. The caller must free() the memory allocated for
2253 * 'ip_address'. */
2254static void
2255ip_address_and_port_from_lb_key(const char *key, char **ip_address,
2256 uint16_t *port)
2257{
2258 char *ip_str, *start, *next;
2259 *ip_address = NULL;
2260 *port = 0;
2261
2262 next = start = xstrdup(key);
2263 ip_str = strsep(&next, ":");
2264 if (!ip_str || !ip_str[0]) {
2265 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2266 VLOG_WARN_RL(&rl, "bad ip address for load balancer key %s", key);
2267 free(start);
2268 return;
2269 }
2270
2271 ovs_be32 ip, mask;
2272 char *error = ip_parse_masked(ip_str, &ip, &mask);
2273 if (error || mask != OVS_BE32_MAX) {
2274 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2275 VLOG_WARN_RL(&rl, "bad ip address for load balancer key %s", key);
2276 free(start);
2277 free(error);
2278 return;
2279 }
2280
2281 int l4_port = 0;
2282 if (next && next[0]) {
2283 if (!str_to_int(next, 0, &l4_port) || l4_port < 0 || l4_port > 65535) {
2284 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2285 VLOG_WARN_RL(&rl, "bad ip port for load balancer key %s", key);
2286 free(start);
2287 return;
2288 }
2289 }
2290
2291 *port = l4_port;
2292 *ip_address = strdup(ip_str);
2293 free(start);
2294}
2295
2296static void
2297build_pre_lb(struct ovn_datapath *od, struct hmap *lflows)
2298{
2299 /* Allow all packets to go to next tables by default. */
2300 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 0, "1", "next;");
2301 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 0, "1", "next;");
2302
2303 struct sset all_ips = SSET_INITIALIZER(&all_ips);
61591ad9
GS
2304 bool vip_configured = false;
2305 for (int i = 0; i < od->nbs->n_load_balancer; i++) {
2306 struct nbrec_load_balancer *lb = od->nbs->load_balancer[i];
7a15be69
GS
2307 struct smap *vips = &lb->vips;
2308 struct smap_node *node;
7a15be69
GS
2309
2310 SMAP_FOR_EACH (node, vips) {
2311 vip_configured = true;
2312
2313 /* node->key contains IP:port or just IP. */
2314 char *ip_address = NULL;
2315 uint16_t port;
2316 ip_address_and_port_from_lb_key(node->key, &ip_address, &port);
2317 if (!ip_address) {
2318 continue;
2319 }
2320
2321 if (!sset_contains(&all_ips, ip_address)) {
2322 sset_add(&all_ips, ip_address);
2323 }
2324
2325 free(ip_address);
2326
2327 /* Ignore L4 port information in the key because fragmented packets
2328 * may not have L4 information. The pre-stateful table will send
2329 * the packet through ct() action to de-fragment. In stateful
2330 * table, we will eventually look at L4 information. */
2331 }
61591ad9 2332 }
7a15be69 2333
61591ad9
GS
2334 /* 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send
2335 * packet to conntrack for defragmentation. */
2336 const char *ip_address;
2337 SSET_FOR_EACH(ip_address, &all_ips) {
2338 char *match = xasprintf("ip && ip4.dst == %s", ip_address);
2339 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB,
2340 100, match, REGBIT_CONNTRACK_DEFRAG" = 1; next;");
2341 free(match);
2342 }
7a15be69 2343
61591ad9 2344 sset_destroy(&all_ips);
7a15be69 2345
61591ad9
GS
2346 if (vip_configured) {
2347 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB,
2348 100, "ip", REGBIT_CONNTRACK_DEFRAG" = 1; next;");
7a15be69
GS
2349 }
2350}
2351
facf8652
GS
2352static void
2353build_pre_stateful(struct ovn_datapath *od, struct hmap *lflows)
2354{
2355 /* Ingress and Egress pre-stateful Table (Priority 0): Packets are
2356 * allowed by default. */
2357 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 0, "1", "next;");
2358 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 0, "1", "next;");
2359
2360 /* If REGBIT_CONNTRACK_DEFRAG is set as 1, then the packets should be
2361 * sent to conntrack for tracking and defragmentation. */
2362 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 100,
2363 REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;");
2364 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 100,
2365 REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;");
2366}
2367
2d018f9b
GS
2368static void
2369build_acls(struct ovn_datapath *od, struct hmap *lflows)
2370{
2371 bool has_stateful = has_stateful_acl(od);
e75451fe 2372
2d018f9b
GS
2373 /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
2374 * default. A related rule at priority 1 is added below if there
2375 * are any stateful ACLs in this datapath. */
2376 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 0, "1", "next;");
2377 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 0, "1", "next;");
2378
2379 if (has_stateful) {
78aab811
JP
2380 /* Ingress and Egress ACL Table (Priority 1).
2381 *
2382 * By default, traffic is allowed. This is partially handled by
2383 * the Priority 0 ACL flows added earlier, but we also need to
2384 * commit IP flows. This is because, while the initiater's
2385 * direction may not have any stateful rules, the server's may
2386 * and then its return traffic would not have an associated
cc58e1f2
RB
2387 * conntrack entry and would return "+invalid".
2388 *
2389 * We use "ct_commit" for a connection that is not already known
2390 * by the connection tracker. Once a connection is committed,
2391 * subsequent packets will hit the flow at priority 0 that just
2392 * uses "next;"
2393 *
b73db61d 2394 * We also check for established connections that have ct_label.blocked
cc58e1f2
RB
2395 * set on them. That's a connection that was disallowed, but is
2396 * now allowed by policy again since it hit this default-allow flow.
b73db61d 2397 * We need to set ct_label.blocked=0 to let the connection continue,
cc58e1f2
RB
2398 * which will be done by ct_commit() in the "stateful" stage.
2399 * Subsequent packets will hit the flow at priority 0 that just
2400 * uses "next;". */
2401 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 1,
b73db61d 2402 "ip && (!ct.est || (ct.est && ct_label.blocked == 1))",
cc58e1f2
RB
2403 REGBIT_CONNTRACK_COMMIT" = 1; next;");
2404 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 1,
b73db61d 2405 "ip && (!ct.est || (ct.est && ct_label.blocked == 1))",
cc58e1f2 2406 REGBIT_CONNTRACK_COMMIT" = 1; next;");
78aab811
JP
2407
2408 /* Ingress and Egress ACL Table (Priority 65535).
2409 *
cc58e1f2
RB
2410 * Always drop traffic that's in an invalid state. Also drop
2411 * reply direction packets for connections that have been marked
2412 * for deletion (bit 0 of ct_label is set).
2413 *
2414 * This is enforced at a higher priority than ACLs can be defined. */
880fcd14 2415 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
b73db61d 2416 "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
cc58e1f2 2417 "drop;");
880fcd14 2418 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
b73db61d 2419 "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
cc58e1f2 2420 "drop;");
78aab811
JP
2421
2422 /* Ingress and Egress ACL Table (Priority 65535).
2423 *
cc58e1f2
RB
2424 * Allow reply traffic that is part of an established
2425 * conntrack entry that has not been marked for deletion
2426 * (bit 0 of ct_label). We only match traffic in the
2427 * reply direction because we want traffic in the request
2428 * direction to hit the currently defined policy from ACLs.
2429 *
2430 * This is enforced at a higher priority than ACLs can be defined. */
880fcd14 2431 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
cc58e1f2 2432 "ct.est && !ct.rel && !ct.new && !ct.inv "
b73db61d 2433 "&& ct.rpl && ct_label.blocked == 0",
78aab811 2434 "next;");
880fcd14 2435 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
cc58e1f2 2436 "ct.est && !ct.rel && !ct.new && !ct.inv "
b73db61d 2437 "&& ct.rpl && ct_label.blocked == 0",
78aab811
JP
2438 "next;");
2439
2440 /* Ingress and Egress ACL Table (Priority 65535).
2441 *
cc58e1f2
RB
2442 * Allow traffic that is related to an existing conntrack entry that
2443 * has not been marked for deletion (bit 0 of ct_label).
2444 *
2445 * This is enforced at a higher priority than ACLs can be defined.
78aab811
JP
2446 *
2447 * NOTE: This does not support related data sessions (eg,
2448 * a dynamically negotiated FTP data channel), but will allow
2449 * related traffic such as an ICMP Port Unreachable through
2450 * that's generated from a non-listening UDP port. */
880fcd14 2451 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
cc58e1f2 2452 "!ct.est && ct.rel && !ct.new && !ct.inv "
b73db61d 2453 "&& ct_label.blocked == 0",
78aab811 2454 "next;");
880fcd14 2455 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
cc58e1f2 2456 "!ct.est && ct.rel && !ct.new && !ct.inv "
b73db61d 2457 "&& ct_label.blocked == 0",
78aab811 2458 "next;");
e75451fe
ZKL
2459
2460 /* Ingress and Egress ACL Table (Priority 65535).
2461 *
2462 * Not to do conntrack on ND packets. */
2463 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, "nd", "next;");
2464 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, "nd", "next;");
78aab811
JP
2465 }
2466
2467 /* Ingress or Egress ACL Table (Various priorities). */
9975d7be
BP
2468 for (size_t i = 0; i < od->nbs->n_acls; i++) {
2469 struct nbrec_acl *acl = od->nbs->acls[i];
78aab811 2470 bool ingress = !strcmp(acl->direction, "from-lport") ? true :false;
880fcd14 2471 enum ovn_stage stage = ingress ? S_SWITCH_IN_ACL : S_SWITCH_OUT_ACL;
78aab811 2472
cc58e1f2
RB
2473 if (!strcmp(acl->action, "allow")
2474 || !strcmp(acl->action, "allow-related")) {
78aab811
JP
2475 /* If there are any stateful flows, we must even commit "allow"
2476 * actions. This is because, while the initiater's
2477 * direction may not have any stateful rules, the server's
2478 * may and then its return traffic would not have an
2479 * associated conntrack entry and would return "+invalid". */
cc58e1f2
RB
2480 if (!has_stateful) {
2481 ovn_lflow_add(lflows, od, stage,
2482 acl->priority + OVN_ACL_PRI_OFFSET,
2483 acl->match, "next;");
2484 } else {
2485 struct ds match = DS_EMPTY_INITIALIZER;
2486
2487 /* Commit the connection tracking entry if it's a new
2488 * connection that matches this ACL. After this commit,
2489 * the reply traffic is allowed by a flow we create at
2490 * priority 65535, defined earlier.
2491 *
2492 * It's also possible that a known connection was marked for
2493 * deletion after a policy was deleted, but the policy was
2494 * re-added while that connection is still known. We catch
b73db61d 2495 * that case here and un-set ct_label.blocked (which will be done
cc58e1f2
RB
2496 * by ct_commit in the "stateful" stage) to indicate that the
2497 * connection should be allowed to resume.
2498 */
2499 ds_put_format(&match, "((ct.new && !ct.est)"
2500 " || (!ct.new && ct.est && !ct.rpl "
b73db61d 2501 "&& ct_label.blocked == 1)) "
cc58e1f2
RB
2502 "&& (%s)", acl->match);
2503 ovn_lflow_add(lflows, od, stage,
2504 acl->priority + OVN_ACL_PRI_OFFSET,
2505 ds_cstr(&match),
2506 REGBIT_CONNTRACK_COMMIT" = 1; next;");
2507
2508 /* Match on traffic in the request direction for an established
2509 * connection tracking entry that has not been marked for
2510 * deletion. There is no need to commit here, so we can just
2511 * proceed to the next table. We use this to ensure that this
2512 * connection is still allowed by the currently defined
2513 * policy. */
2514 ds_clear(&match);
2515 ds_put_format(&match,
2516 "!ct.new && ct.est && !ct.rpl"
b73db61d 2517 " && ct_label.blocked == 0 && (%s)",
cc58e1f2
RB
2518 acl->match);
2519 ovn_lflow_add(lflows, od, stage,
2520 acl->priority + OVN_ACL_PRI_OFFSET,
2521 ds_cstr(&match), "next;");
2522
2523 ds_destroy(&match);
2524 }
2525 } else if (!strcmp(acl->action, "drop")
2526 || !strcmp(acl->action, "reject")) {
78aab811
JP
2527 struct ds match = DS_EMPTY_INITIALIZER;
2528
cc58e1f2
RB
2529 /* XXX Need to support "reject", treat it as "drop;" for now. */
2530 if (!strcmp(acl->action, "reject")) {
2531 VLOG_INFO("reject is not a supported action");
2532 }
78aab811 2533
cc58e1f2
RB
2534 /* The implementation of "drop" differs if stateful ACLs are in
2535 * use for this datapath. In that case, the actions differ
2536 * depending on whether the connection was previously committed
2537 * to the connection tracker with ct_commit. */
2538 if (has_stateful) {
2539 /* If the packet is not part of an established connection, then
2540 * we can simply drop it. */
2541 ds_put_format(&match,
b73db61d 2542 "(!ct.est || (ct.est && ct_label.blocked == 1)) "
cc58e1f2
RB
2543 "&& (%s)",
2544 acl->match);
2545 ovn_lflow_add(lflows, od, stage, acl->priority +
2546 OVN_ACL_PRI_OFFSET, ds_cstr(&match), "drop;");
2547
2548 /* For an existing connection without ct_label set, we've
2549 * encountered a policy change. ACLs previously allowed
2550 * this connection and we committed the connection tracking
2551 * entry. Current policy says that we should drop this
2552 * connection. First, we set bit 0 of ct_label to indicate
2553 * that this connection is set for deletion. By not
2554 * specifying "next;", we implicitly drop the packet after
2555 * updating conntrack state. We would normally defer
2556 * ct_commit() to the "stateful" stage, but since we're
2557 * dropping the packet, we go ahead and do it here. */
2558 ds_clear(&match);
2559 ds_put_format(&match,
b73db61d 2560 "ct.est && ct_label.blocked == 0 && (%s)",
cc58e1f2
RB
2561 acl->match);
2562 ovn_lflow_add(lflows, od, stage,
2563 acl->priority + OVN_ACL_PRI_OFFSET,
2564 ds_cstr(&match), "ct_commit(ct_label=1/1);");
2565
2566 ds_destroy(&match);
2567 } else {
2568 /* There are no stateful ACLs in use on this datapath,
2569 * so a "drop" ACL is simply the "drop" logical flow action
2570 * in all cases. */
2571 ovn_lflow_add(lflows, od, stage,
2572 acl->priority + OVN_ACL_PRI_OFFSET,
2573 acl->match, "drop;");
2360b854 2574 ds_destroy(&match);
cc58e1f2 2575 }
78aab811
JP
2576 }
2577 }
281977f7
NS
2578
2579 /* Add 34000 priority flow to allow DHCP reply from ovn-controller to all
2580 * logical ports of the datapath if the CMS has configured DHCPv4 options*/
052fa3ac
BP
2581 for (size_t i = 0; i < od->nbs->n_ports; i++) {
2582 if (od->nbs->ports[i]->dhcpv4_options) {
2583 const char *server_id = smap_get(
2584 &od->nbs->ports[i]->dhcpv4_options->options, "server_id");
2585 const char *server_mac = smap_get(
2586 &od->nbs->ports[i]->dhcpv4_options->options, "server_mac");
2587 const char *lease_time = smap_get(
2588 &od->nbs->ports[i]->dhcpv4_options->options, "lease_time");
2589 const char *router = smap_get(
2590 &od->nbs->ports[i]->dhcpv4_options->options, "router");
2591 if (server_id && server_mac && lease_time && router) {
2592 struct ds match = DS_EMPTY_INITIALIZER;
2593 const char *actions =
2594 has_stateful ? "ct_commit; next;" : "next;";
2595 ds_put_format(&match, "outport == \"%s\" && eth.src == %s "
2596 "&& ip4.src == %s && udp && udp.src == 67 "
2597 "&& udp.dst == 68", od->nbs->ports[i]->name,
2598 server_mac, server_id);
2599 ovn_lflow_add(
2600 lflows, od, S_SWITCH_OUT_ACL, 34000, ds_cstr(&match),
2601 actions);
75e82c17 2602 ds_destroy(&match);
281977f7 2603 }
052fa3ac 2604 }
33ac3c83 2605
052fa3ac
BP
2606 if (od->nbs->ports[i]->dhcpv6_options) {
2607 const char *server_mac = smap_get(
2608 &od->nbs->ports[i]->dhcpv6_options->options, "server_id");
2609 struct eth_addr ea;
2610 if (server_mac && eth_addr_from_string(server_mac, &ea)) {
2611 /* Get the link local IP of the DHCPv6 server from the
2612 * server MAC. */
2613 struct in6_addr lla;
2614 in6_generate_lla(ea, &lla);
2615
2616 char server_ip[INET6_ADDRSTRLEN + 1];
2617 ipv6_string_mapped(server_ip, &lla);
2618
2619 struct ds match = DS_EMPTY_INITIALIZER;
2620 const char *actions = has_stateful ? "ct_commit; next;" :
2621 "next;";
2622 ds_put_format(&match, "outport == \"%s\" && eth.src == %s "
2623 "&& ip6.src == %s && udp && udp.src == 547 "
2624 "&& udp.dst == 546", od->nbs->ports[i]->name,
2625 server_mac, server_ip);
2626 ovn_lflow_add(
2627 lflows, od, S_SWITCH_OUT_ACL, 34000, ds_cstr(&match),
2628 actions);
75e82c17 2629 ds_destroy(&match);
33ac3c83 2630 }
281977f7
NS
2631 }
2632 }
78aab811
JP
2633}
2634
1a03fc7d
BS
2635static void
2636build_qos(struct ovn_datapath *od, struct hmap *lflows) {
2637 ovn_lflow_add(lflows, od, S_SWITCH_IN_QOS_MARK, 0, "1", "next;");
2638 ovn_lflow_add(lflows, od, S_SWITCH_OUT_QOS_MARK, 0, "1", "next;");
2639
2640 for (size_t i = 0; i < od->nbs->n_qos_rules; i++) {
2641 struct nbrec_qos *qos = od->nbs->qos_rules[i];
2642 bool ingress = !strcmp(qos->direction, "from-lport") ? true :false;
2643 enum ovn_stage stage = ingress ? S_SWITCH_IN_QOS_MARK : S_SWITCH_OUT_QOS_MARK;
2644
2645 if (!strcmp(qos->key_action, "dscp")) {
2646 struct ds dscp_action = DS_EMPTY_INITIALIZER;
2647
2648 ds_put_format(&dscp_action, "ip.dscp = %d; next;",
2649 (uint8_t)qos->value_action);
2650 ovn_lflow_add(lflows, od, stage,
2651 qos->priority,
2652 qos->match, ds_cstr(&dscp_action));
2653 ds_destroy(&dscp_action);
2654 }
2655 }
2656}
2657
7a15be69
GS
2658static void
2659build_lb(struct ovn_datapath *od, struct hmap *lflows)
2660{
2661 /* Ingress and Egress LB Table (Priority 0): Packets are allowed by
2662 * default. */
2663 ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, 0, "1", "next;");
2664 ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, 0, "1", "next;");
2665
2666 if (od->nbs->load_balancer) {
2667 /* Ingress and Egress LB Table (Priority 65535).
2668 *
2669 * Send established traffic through conntrack for just NAT. */
2670 ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, UINT16_MAX,
2671 "ct.est && !ct.rel && !ct.new && !ct.inv",
2672 REGBIT_CONNTRACK_NAT" = 1; next;");
2673 ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, UINT16_MAX,
2674 "ct.est && !ct.rel && !ct.new && !ct.inv",
2675 REGBIT_CONNTRACK_NAT" = 1; next;");
2676 }
2677}
2678
fa313a8c
GS
2679static void
2680build_stateful(struct ovn_datapath *od, struct hmap *lflows)
2681{
2682 /* Ingress and Egress stateful Table (Priority 0): Packets are
2683 * allowed by default. */
2684 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 0, "1", "next;");
2685 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 0, "1", "next;");
2686
2687 /* If REGBIT_CONNTRACK_COMMIT is set as 1, then the packets should be
b73db61d 2688 * committed to conntrack. We always set ct_label.blocked to 0 here as
cc58e1f2
RB
2689 * any packet that makes it this far is part of a connection we
2690 * want to allow to continue. */
fa313a8c 2691 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100,
cc58e1f2 2692 REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit(ct_label=0/1); next;");
fa313a8c 2693 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100,
cc58e1f2 2694 REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit(ct_label=0/1); next;");
7a15be69
GS
2695
2696 /* If REGBIT_CONNTRACK_NAT is set as 1, then packets should just be sent
2697 * through nat (without committing).
2698 *
2699 * REGBIT_CONNTRACK_COMMIT is set for new connections and
2700 * REGBIT_CONNTRACK_NAT is set for established connections. So they
2701 * don't overlap.
2702 */
2703 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100,
2704 REGBIT_CONNTRACK_NAT" == 1", "ct_lb;");
2705 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100,
2706 REGBIT_CONNTRACK_NAT" == 1", "ct_lb;");
2707
2708 /* Load balancing rules for new connections get committed to conntrack
2709 * table. So even if REGBIT_CONNTRACK_COMMIT is set in a previous table
2710 * a higher priority rule for load balancing below also commits the
2711 * connection, so it is okay if we do not hit the above match on
2712 * REGBIT_CONNTRACK_COMMIT. */
61591ad9
GS
2713 for (int i = 0; i < od->nbs->n_load_balancer; i++) {
2714 struct nbrec_load_balancer *lb = od->nbs->load_balancer[i];
7a15be69
GS
2715 struct smap *vips = &lb->vips;
2716 struct smap_node *node;
2717
2718 SMAP_FOR_EACH (node, vips) {
2719 uint16_t port = 0;
2720
2721 /* node->key contains IP:port or just IP. */
2722 char *ip_address = NULL;
2723 ip_address_and_port_from_lb_key(node->key, &ip_address, &port);
2724 if (!ip_address) {
2725 continue;
2726 }
2727
2728 /* New connections in Ingress table. */
2729 char *action = xasprintf("ct_lb(%s);", node->value);
2730 struct ds match = DS_EMPTY_INITIALIZER;
9784ffaf 2731 ds_put_format(&match, "ct.new && ip4.dst == %s", ip_address);
7a15be69
GS
2732 if (port) {
2733 if (lb->protocol && !strcmp(lb->protocol, "udp")) {
9784ffaf 2734 ds_put_format(&match, " && udp.dst == %d", port);
7a15be69 2735 } else {
9784ffaf 2736 ds_put_format(&match, " && tcp.dst == %d", port);
7a15be69
GS
2737 }
2738 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL,
2739 120, ds_cstr(&match), action);
2740 } else {
2741 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL,
2742 110, ds_cstr(&match), action);
2743 }
2744
7443e4ec 2745 free(ip_address);
7a15be69
GS
2746 ds_destroy(&match);
2747 free(action);
2748 }
2749 }
fa313a8c
GS
2750}
2751
bd39395f 2752static void
9975d7be
BP
2753build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
2754 struct hmap *lflows, struct hmap *mcgroups)
bd39395f 2755{
5cff6b99
BP
2756 /* This flow table structure is documented in ovn-northd(8), so please
2757 * update ovn-northd.8.xml if you change anything. */
2758
09b39248
JP
2759 struct ds match = DS_EMPTY_INITIALIZER;
2760 struct ds actions = DS_EMPTY_INITIALIZER;
2761
9975d7be 2762 /* Build pre-ACL and ACL tables for both ingress and egress.
1a03fc7d 2763 * Ingress tables 3 through 9. Egress tables 0 through 6. */
5868eb24
BP
2764 struct ovn_datapath *od;
2765 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
2766 if (!od->nbs) {
2767 continue;
2768 }
2769
9ab989b7 2770 build_pre_acls(od, lflows);
7a15be69 2771 build_pre_lb(od, lflows);
facf8652 2772 build_pre_stateful(od, lflows);
2d018f9b 2773 build_acls(od, lflows);
1a03fc7d 2774 build_qos(od, lflows);
7a15be69 2775 build_lb(od, lflows);
fa313a8c 2776 build_stateful(od, lflows);
9975d7be
BP
2777 }
2778
2779 /* Logical switch ingress table 0: Admission control framework (priority
2780 * 100). */
2781 HMAP_FOR_EACH (od, key_node, datapaths) {
2782 if (!od->nbs) {
2783 continue;
2784 }
2785
bd39395f 2786 /* Logical VLANs not supported. */
685f4dfe 2787 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "vlan.present",
091e3af9 2788 "drop;");
bd39395f
BP
2789
2790 /* Broadcast/multicast source address is invalid. */
685f4dfe 2791 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "eth.src[40]",
091e3af9 2792 "drop;");
bd39395f 2793
35060cdc
BP
2794 /* Port security flows have priority 50 (see below) and will continue
2795 * to the next table if packet source is acceptable. */
bd39395f
BP
2796 }
2797
685f4dfe
NS
2798 /* Logical switch ingress table 0: Ingress port security - L2
2799 * (priority 50).
2800 * Ingress table 1: Ingress port security - IP (priority 90 and 80)
2801 * Ingress table 2: Ingress port security - ND (priority 90 and 80)
2802 */
5868eb24
BP
2803 struct ovn_port *op;
2804 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 2805 if (!op->nbsp) {
9975d7be
BP
2806 continue;
2807 }
2808
0ee00741 2809 if (!lsp_is_enabled(op->nbsp)) {
96af668a
BP
2810 /* Drop packets from disabled logical ports (since logical flow
2811 * tables are default-drop). */
2812 continue;
2813 }
2814
09b39248 2815 ds_clear(&match);
a6095f81 2816 ds_clear(&actions);
9975d7be 2817 ds_put_format(&match, "inport == %s", op->json_key);
e93b43d6
JP
2818 build_port_security_l2("eth.src", op->ps_addrs, op->n_ps_addrs,
2819 &match);
a6095f81
BS
2820
2821 const char *queue_id = smap_get(&op->sb->options, "qdisc_queue_id");
2822 if (queue_id) {
2823 ds_put_format(&actions, "set_queue(%s); ", queue_id);
2824 }
2825 ds_put_cstr(&actions, "next;");
685f4dfe 2826 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_L2, 50,
a6095f81 2827 ds_cstr(&match), ds_cstr(&actions));
685f4dfe 2828
0ee00741 2829 if (op->nbsp->n_port_security) {
685f4dfe
NS
2830 build_port_security_ip(P_IN, op, lflows);
2831 build_port_security_nd(op, lflows);
2832 }
2833 }
2834
2835 /* Ingress table 1 and 2: Port security - IP and ND, by default goto next.
2836 * (priority 0)*/
2837 HMAP_FOR_EACH (od, key_node, datapaths) {
2838 if (!od->nbs) {
2839 continue;
2840 }
2841
2842 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_ND, 0, "1", "next;");
2843 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_IP, 0, "1", "next;");
5868eb24 2844 }
445a266a 2845
1a03fc7d 2846 /* Ingress table 10: ARP/ND responder, skip requests coming from localnet
281977f7 2847 * ports. (priority 100). */
fa128126 2848 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 2849 if (!op->nbsp) {
fa128126
HZ
2850 continue;
2851 }
2852
0ee00741 2853 if (!strcmp(op->nbsp->type, "localnet")) {
09b39248
JP
2854 ds_clear(&match);
2855 ds_put_format(&match, "inport == %s", op->json_key);
e75451fe 2856 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100,
09b39248 2857 ds_cstr(&match), "next;");
fa128126
HZ
2858 }
2859 }
2860
1a03fc7d 2861 /* Ingress table 10: ARP/ND responder, reply for known IPs.
fa128126 2862 * (priority 50). */
57d143eb 2863 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 2864 if (!op->nbsp) {
57d143eb
HZ
2865 continue;
2866 }
2867
4c7bf534 2868 /*
e75451fe 2869 * Add ARP/ND reply flows if either the
4c7bf534
NS
2870 * - port is up or
2871 * - port type is router
2872 */
0ee00741 2873 if (!lsp_is_up(op->nbsp) && strcmp(op->nbsp->type, "router")) {
4c7bf534
NS
2874 continue;
2875 }
2876
e93b43d6
JP
2877 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
2878 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
09b39248 2879 ds_clear(&match);
e93b43d6
JP
2880 ds_put_format(&match, "arp.tpa == %s && arp.op == 1",
2881 op->lsp_addrs[i].ipv4_addrs[j].addr_s);
09b39248
JP
2882 ds_clear(&actions);
2883 ds_put_format(&actions,
57d143eb 2884 "eth.dst = eth.src; "
e93b43d6 2885 "eth.src = %s; "
57d143eb
HZ
2886 "arp.op = 2; /* ARP reply */ "
2887 "arp.tha = arp.sha; "
e93b43d6 2888 "arp.sha = %s; "
57d143eb 2889 "arp.tpa = arp.spa; "
e93b43d6 2890 "arp.spa = %s; "
57d143eb 2891 "outport = inport; "
bf143492 2892 "flags.loopback = 1; "
57d143eb 2893 "output;",
e93b43d6
JP
2894 op->lsp_addrs[i].ea_s, op->lsp_addrs[i].ea_s,
2895 op->lsp_addrs[i].ipv4_addrs[j].addr_s);
e75451fe 2896 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50,
09b39248 2897 ds_cstr(&match), ds_cstr(&actions));
9fcb6a18
BP
2898
2899 /* Do not reply to an ARP request from the port that owns the
2900 * address (otherwise a DHCP client that ARPs to check for a
2901 * duplicate address will fail). Instead, forward it the usual
2902 * way.
2903 *
2904 * (Another alternative would be to simply drop the packet. If
2905 * everything is working as it is configured, then this would
2906 * produce equivalent results, since no one should reply to the
2907 * request. But ARPing for one's own IP address is intended to
2908 * detect situations where the network is not working as
2909 * configured, so dropping the request would frustrate that
2910 * intent.) */
2911 ds_put_format(&match, " && inport == %s", op->json_key);
2912 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100,
2913 ds_cstr(&match), "next;");
57d143eb 2914 }
7dc88496 2915
6fdb7cd6
JP
2916 /* For ND solicitations, we need to listen for both the
2917 * unicast IPv6 address and its all-nodes multicast address,
2918 * but always respond with the unicast IPv6 address. */
2919 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
09b39248 2920 ds_clear(&match);
6fdb7cd6
JP
2921 ds_put_format(&match,
2922 "nd_ns && ip6.dst == {%s, %s} && nd.target == %s",
2923 op->lsp_addrs[i].ipv6_addrs[j].addr_s,
2924 op->lsp_addrs[i].ipv6_addrs[j].sn_addr_s,
2925 op->lsp_addrs[i].ipv6_addrs[j].addr_s);
2926
09b39248
JP
2927 ds_clear(&actions);
2928 ds_put_format(&actions,
6fdb7cd6
JP
2929 "nd_na { "
2930 "eth.src = %s; "
2931 "ip6.src = %s; "
2932 "nd.target = %s; "
2933 "nd.tll = %s; "
2934 "outport = inport; "
bf143492 2935 "flags.loopback = 1; "
6fdb7cd6
JP
2936 "output; "
2937 "};",
2938 op->lsp_addrs[i].ea_s,
2939 op->lsp_addrs[i].ipv6_addrs[j].addr_s,
2940 op->lsp_addrs[i].ipv6_addrs[j].addr_s,
2941 op->lsp_addrs[i].ea_s);
e75451fe 2942 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50,
09b39248 2943 ds_cstr(&match), ds_cstr(&actions));
9fcb6a18
BP
2944
2945 /* Do not reply to a solicitation from the port that owns the
2946 * address (otherwise DAD detection will fail). */
2947 ds_put_format(&match, " && inport == %s", op->json_key);
2948 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100,
2949 ds_cstr(&match), "next;");
e75451fe 2950 }
57d143eb
HZ
2951 }
2952 }
2953
1a03fc7d 2954 /* Ingress table 10: ARP/ND responder, by default goto next.
fa128126
HZ
2955 * (priority 0)*/
2956 HMAP_FOR_EACH (od, key_node, datapaths) {
2957 if (!od->nbs) {
2958 continue;
2959 }
2960
e75451fe 2961 ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_ND_RSP, 0, "1", "next;");
fa128126
HZ
2962 }
2963
1a03fc7d 2964 /* Logical switch ingress table 11 and 12: DHCP options and response
281977f7
NS
2965 * priority 100 flows. */
2966 HMAP_FOR_EACH (op, key_node, ports) {
2967 if (!op->nbsp) {
2968 continue;
2969 }
2970
2971 if (!lsp_is_enabled(op->nbsp) || !strcmp(op->nbsp->type, "router")) {
2972 /* Don't add the DHCP flows if the port is not enabled or if the
2973 * port is a router port. */
2974 continue;
2975 }
2976
33ac3c83
NS
2977 if (!op->nbsp->dhcpv4_options && !op->nbsp->dhcpv6_options) {
2978 /* CMS has disabled both native DHCPv4 and DHCPv6 for this lport.
2979 */
281977f7
NS
2980 continue;
2981 }
2982
2983 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
2984 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
2985 struct ds options_action = DS_EMPTY_INITIALIZER;
2986 struct ds response_action = DS_EMPTY_INITIALIZER;
2987 if (build_dhcpv4_action(
2988 op, op->lsp_addrs[i].ipv4_addrs[j].addr,
2989 &options_action, &response_action)) {
2990 struct ds match = DS_EMPTY_INITIALIZER;
2991 ds_put_format(
2992 &match, "inport == %s && eth.src == %s && "
2993 "ip4.src == 0.0.0.0 && ip4.dst == 255.255.255.255 && "
2994 "udp.src == 68 && udp.dst == 67", op->json_key,
2995 op->lsp_addrs[i].ea_s);
2996
2997 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS,
2998 100, ds_cstr(&match),
2999 ds_cstr(&options_action));
3000 /* If REGBIT_DHCP_OPTS_RESULT is set, it means the
3001 * put_dhcp_opts action is successful */
3002 ds_put_cstr(&match, " && "REGBIT_DHCP_OPTS_RESULT);
3003 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_RESPONSE,
3004 100, ds_cstr(&match),
3005 ds_cstr(&response_action));
3006 ds_destroy(&match);
3007 ds_destroy(&options_action);
3008 ds_destroy(&response_action);
3009 break;
3010 }
3011 }
33ac3c83
NS
3012
3013 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
3014 struct ds options_action = DS_EMPTY_INITIALIZER;
3015 struct ds response_action = DS_EMPTY_INITIALIZER;
3016 if (build_dhcpv6_action(
3017 op, &op->lsp_addrs[i].ipv6_addrs[j].addr,
3018 &options_action, &response_action)) {
3019 struct ds match = DS_EMPTY_INITIALIZER;
3020 ds_put_format(
3021 &match, "inport == %s && eth.src == %s"
3022 " && ip6.dst == ff02::1:2 && udp.src == 546 &&"
3023 " udp.dst == 547", op->json_key,
3024 op->lsp_addrs[i].ea_s);
3025
3026 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS, 100,
3027 ds_cstr(&match), ds_cstr(&options_action));
3028
3029 /* If REGBIT_DHCP_OPTS_RESULT is set to 1, it means the
3030 * put_dhcpv6_opts action is successful */
3031 ds_put_cstr(&match, " && "REGBIT_DHCP_OPTS_RESULT);
3032 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_RESPONSE, 100,
3033 ds_cstr(&match), ds_cstr(&response_action));
3034 ds_destroy(&match);
3035 ds_destroy(&options_action);
3036 ds_destroy(&response_action);
3037 break;
3038 }
3039 }
281977f7
NS
3040 }
3041 }
3042
1a03fc7d 3043 /* Ingress table 11 and 12: DHCP options and response, by default goto next.
281977f7
NS
3044 * (priority 0). */
3045
3046 HMAP_FOR_EACH (od, key_node, datapaths) {
3047 if (!od->nbs) {
3048 continue;
3049 }
3050
3051 ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_OPTIONS, 0, "1", "next;");
3052 ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_RESPONSE, 0, "1", "next;");
3053 }
3054
1a03fc7d 3055 /* Ingress table 13: Destination lookup, broadcast and multicast handling
5868eb24
BP
3056 * (priority 100). */
3057 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 3058 if (!op->nbsp) {
9975d7be
BP
3059 continue;
3060 }
3061
0ee00741 3062 if (lsp_is_enabled(op->nbsp)) {
9975d7be 3063 ovn_multicast_add(mcgroups, &mc_flood, op);
445a266a 3064 }
5868eb24
BP
3065 }
3066 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
3067 if (!od->nbs) {
3068 continue;
3069 }
3070
3071 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 100, "eth.mcast",
5868eb24 3072 "outport = \""MC_FLOOD"\"; output;");
bd39395f 3073 }
bd39395f 3074
1a03fc7d 3075 /* Ingress table 13: Destination lookup, unicast handling (priority 50), */
5868eb24 3076 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 3077 if (!op->nbsp) {
9975d7be
BP
3078 continue;
3079 }
3080
0ee00741 3081 for (size_t i = 0; i < op->nbsp->n_addresses; i++) {
10c3fcdf 3082 /* Addresses are owned by the logical port.
3083 * Ethernet address followed by zero or more IPv4
3084 * or IPv6 addresses (or both). */
74ff3298 3085 struct eth_addr mac;
10c3fcdf 3086 if (ovs_scan(op->nbsp->addresses[i],
3087 ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) {
09b39248 3088 ds_clear(&match);
9975d7be
BP
3089 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
3090 ETH_ADDR_ARGS(mac));
5868eb24 3091
09b39248 3092 ds_clear(&actions);
9975d7be
BP
3093 ds_put_format(&actions, "outport = %s; output;", op->json_key);
3094 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
5868eb24 3095 ds_cstr(&match), ds_cstr(&actions));
0ee00741
HK
3096 } else if (!strcmp(op->nbsp->addresses[i], "unknown")) {
3097 if (lsp_is_enabled(op->nbsp)) {
9975d7be 3098 ovn_multicast_add(mcgroups, &mc_unknown, op);
96af668a
BP
3099 op->od->has_unknown = true;
3100 }
6374d518 3101 } else if (is_dynamic_lsp_address(op->nbsp->addresses[i])) {
8639f9be 3102 if (!op->nbsp->dynamic_addresses
10c3fcdf 3103 || !ovs_scan(op->nbsp->dynamic_addresses,
3104 ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) {
8639f9be
ND
3105 continue;
3106 }
3107 ds_clear(&match);
3108 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
3109 ETH_ADDR_ARGS(mac));
3110
3111 ds_clear(&actions);
3112 ds_put_format(&actions, "outport = %s; output;", op->json_key);
3113 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
3114 ds_cstr(&match), ds_cstr(&actions));
5868eb24
BP
3115 } else {
3116 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
445a266a 3117
2fa326a3
BP
3118 VLOG_INFO_RL(&rl,
3119 "%s: invalid syntax '%s' in addresses column",
0ee00741 3120 op->nbsp->name, op->nbsp->addresses[i]);
445a266a
BP
3121 }
3122 }
bd39395f
BP
3123 }
3124
1a03fc7d 3125 /* Ingress table 13: Destination lookup for unknown MACs (priority 0). */
5868eb24 3126 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
3127 if (!od->nbs) {
3128 continue;
3129 }
3130
5868eb24 3131 if (od->has_unknown) {
9975d7be 3132 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1",
5868eb24 3133 "outport = \""MC_UNKNOWN"\"; output;");
445a266a 3134 }
bd39395f
BP
3135 }
3136
94300e09
JP
3137 /* Egress tables 6: Egress port security - IP (priority 0)
3138 * Egress table 7: Egress port security L2 - multicast/broadcast
3139 * (priority 100). */
5868eb24 3140 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
3141 if (!od->nbs) {
3142 continue;
3143 }
3144
685f4dfe
NS
3145 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_IP, 0, "1", "next;");
3146 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_L2, 100, "eth.mcast",
091e3af9 3147 "output;");
48f42f3a
RB
3148 }
3149
94300e09 3150 /* Egress table 6: Egress port security - IP (priorities 90 and 80)
685f4dfe
NS
3151 * if port security enabled.
3152 *
94300e09 3153 * Egress table 7: Egress port security - L2 (priorities 50 and 150).
d770a830
BP
3154 *
3155 * Priority 50 rules implement port security for enabled logical port.
3156 *
3157 * Priority 150 rules drop packets to disabled logical ports, so that they
3158 * don't even receive multicast or broadcast packets. */
5868eb24 3159 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 3160 if (!op->nbsp) {
9975d7be
BP
3161 continue;
3162 }
3163
09b39248 3164 ds_clear(&match);
9975d7be 3165 ds_put_format(&match, "outport == %s", op->json_key);
0ee00741 3166 if (lsp_is_enabled(op->nbsp)) {
e93b43d6
JP
3167 build_port_security_l2("eth.dst", op->ps_addrs, op->n_ps_addrs,
3168 &match);
685f4dfe 3169 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 50,
d770a830
BP
3170 ds_cstr(&match), "output;");
3171 } else {
685f4dfe 3172 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 150,
d770a830
BP
3173 ds_cstr(&match), "drop;");
3174 }
eb00399e 3175
0ee00741 3176 if (op->nbsp->n_port_security) {
685f4dfe
NS
3177 build_port_security_ip(P_OUT, op, lflows);
3178 }
eb00399e 3179 }
09b39248
JP
3180
3181 ds_destroy(&match);
3182 ds_destroy(&actions);
9975d7be 3183}
eb00399e 3184
9975d7be
BP
3185static bool
3186lrport_is_enabled(const struct nbrec_logical_router_port *lrport)
3187{
3188 return !lrport->enabled || *lrport->enabled;
3189}
3190
4685e523
JP
3191/* Returns a string of the IP address of the router port 'op' that
3192 * overlaps with 'ip_s". If one is not found, returns NULL.
3193 *
3194 * The caller must not free the returned string. */
3195static const char *
3196find_lrp_member_ip(const struct ovn_port *op, const char *ip_s)
3197{
6fdb7cd6 3198 bool is_ipv4 = strchr(ip_s, '.') ? true : false;
4685e523 3199
6fdb7cd6
JP
3200 if (is_ipv4) {
3201 ovs_be32 ip;
4685e523 3202
6fdb7cd6
JP
3203 if (!ip_parse(ip_s, &ip)) {
3204 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3205 VLOG_WARN_RL(&rl, "bad ip address %s", ip_s);
3206 return NULL;
3207 }
4685e523 3208
6fdb7cd6
JP
3209 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
3210 const struct ipv4_netaddr *na = &op->lrp_networks.ipv4_addrs[i];
3211
3212 if (!((na->network ^ ip) & na->mask)) {
3213 /* There should be only 1 interface that matches the
3214 * supplied IP. Otherwise, it's a configuration error,
3215 * because subnets of a router's interfaces should NOT
3216 * overlap. */
3217 return na->addr_s;
3218 }
3219 }
3220 } else {
3221 struct in6_addr ip6;
3222
3223 if (!ipv6_parse(ip_s, &ip6)) {
3224 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3225 VLOG_WARN_RL(&rl, "bad ipv6 address %s", ip_s);
3226 return NULL;
3227 }
3228
3229 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
3230 const struct ipv6_netaddr *na = &op->lrp_networks.ipv6_addrs[i];
3231 struct in6_addr xor_addr = ipv6_addr_bitxor(&na->network, &ip6);
3232 struct in6_addr and_addr = ipv6_addr_bitand(&xor_addr, &na->mask);
3233
3234 if (ipv6_is_zero(&and_addr)) {
3235 /* There should be only 1 interface that matches the
3236 * supplied IP. Otherwise, it's a configuration error,
3237 * because subnets of a router's interfaces should NOT
3238 * overlap. */
3239 return na->addr_s;
3240 }
4685e523
JP
3241 }
3242 }
3243
3244 return NULL;
3245}
3246
9975d7be 3247static void
0bac7164 3248add_route(struct hmap *lflows, const struct ovn_port *op,
4685e523 3249 const char *lrp_addr_s, const char *network_s, int plen,
440a9f4b 3250 const char *gateway, const char *policy)
9975d7be 3251{
6fdb7cd6 3252 bool is_ipv4 = strchr(network_s, '.') ? true : false;
a63f7235 3253 struct ds match = DS_EMPTY_INITIALIZER;
440a9f4b
GS
3254 const char *dir;
3255 uint16_t priority;
3256
3257 if (policy && !strcmp(policy, "src-ip")) {
3258 dir = "src";
3259 priority = plen * 2;
3260 } else {
3261 dir = "dst";
3262 priority = (plen * 2) + 1;
3263 }
6fdb7cd6 3264
a63f7235
JP
3265 /* IPv6 link-local addresses must be scoped to the local router port. */
3266 if (!is_ipv4) {
3267 struct in6_addr network;
3268 ovs_assert(ipv6_parse(network_s, &network));
3269 if (in6_is_lla(&network)) {
3270 ds_put_format(&match, "inport == %s && ", op->json_key);
3271 }
3272 }
440a9f4b 3273 ds_put_format(&match, "ip%s.%s == %s/%d", is_ipv4 ? "4" : "6", dir,
a63f7235 3274 network_s, plen);
9975d7be
BP
3275
3276 struct ds actions = DS_EMPTY_INITIALIZER;
6fdb7cd6
JP
3277 ds_put_format(&actions, "ip.ttl--; %sreg0 = ", is_ipv4 ? "" : "xx");
3278
9975d7be 3279 if (gateway) {
c9bdf7bd 3280 ds_put_cstr(&actions, gateway);
9975d7be 3281 } else {
6fdb7cd6 3282 ds_put_format(&actions, "ip%s.dst", is_ipv4 ? "4" : "6");
9975d7be 3283 }
4685e523 3284 ds_put_format(&actions, "; "
6fdb7cd6 3285 "%sreg1 = %s; "
4685e523 3286 "eth.src = %s; "
0bac7164 3287 "outport = %s; "
bf143492 3288 "flags.loopback = 1; "
0bac7164 3289 "next;",
6fdb7cd6 3290 is_ipv4 ? "" : "xx",
4685e523
JP
3291 lrp_addr_s,
3292 op->lrp_networks.ea_s,
3293 op->json_key);
9975d7be
BP
3294
3295 /* The priority here is calculated to implement longest-prefix-match
3296 * routing. */
440a9f4b 3297 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_ROUTING, priority,
a63f7235
JP
3298 ds_cstr(&match), ds_cstr(&actions));
3299 ds_destroy(&match);
9975d7be 3300 ds_destroy(&actions);
9975d7be
BP
3301}
3302
28dc3fe9
SR
3303static void
3304build_static_route_flow(struct hmap *lflows, struct ovn_datapath *od,
3305 struct hmap *ports,
3306 const struct nbrec_logical_router_static_route *route)
3307{
6fdb7cd6 3308 ovs_be32 nexthop;
4685e523 3309 const char *lrp_addr_s;
6fdb7cd6
JP
3310 unsigned int plen;
3311 bool is_ipv4;
28dc3fe9 3312
6fdb7cd6
JP
3313 /* Verify that the next hop is an IP address with an all-ones mask. */
3314 char *error = ip_parse_cidr(route->nexthop, &nexthop, &plen);
3315 if (!error) {
3316 if (plen != 32) {
3317 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3318 VLOG_WARN_RL(&rl, "bad next hop mask %s", route->nexthop);
3319 return;
3320 }
3321 is_ipv4 = true;
3322 } else {
28dc3fe9 3323 free(error);
6fdb7cd6
JP
3324
3325 struct in6_addr ip6;
3326 char *error = ipv6_parse_cidr(route->nexthop, &ip6, &plen);
3327 if (!error) {
3328 if (plen != 128) {
3329 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3330 VLOG_WARN_RL(&rl, "bad next hop mask %s", route->nexthop);
3331 return;
3332 }
3333 is_ipv4 = false;
3334 } else {
3335 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3336 VLOG_WARN_RL(&rl, "bad next hop ip address %s", route->nexthop);
3337 free(error);
3338 return;
3339 }
28dc3fe9
SR
3340 }
3341
6fdb7cd6
JP
3342 char *prefix_s;
3343 if (is_ipv4) {
3344 ovs_be32 prefix;
3345 /* Verify that ip prefix is a valid IPv4 address. */
3346 error = ip_parse_cidr(route->ip_prefix, &prefix, &plen);
3347 if (error) {
3348 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3349 VLOG_WARN_RL(&rl, "bad 'ip_prefix' in static routes %s",
3350 route->ip_prefix);
3351 free(error);
3352 return;
3353 }
3354 prefix_s = xasprintf(IP_FMT, IP_ARGS(prefix & be32_prefix_mask(plen)));
3355 } else {
3356 /* Verify that ip prefix is a valid IPv6 address. */
3357 struct in6_addr prefix;
3358 error = ipv6_parse_cidr(route->ip_prefix, &prefix, &plen);
3359 if (error) {
3360 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3361 VLOG_WARN_RL(&rl, "bad 'ip_prefix' in static routes %s",
3362 route->ip_prefix);
3363 free(error);
3364 return;
3365 }
3366 struct in6_addr mask = ipv6_create_mask(plen);
3367 struct in6_addr network = ipv6_addr_bitand(&prefix, &mask);
3368 prefix_s = xmalloc(INET6_ADDRSTRLEN);
3369 inet_ntop(AF_INET6, &network, prefix_s, INET6_ADDRSTRLEN);
28dc3fe9
SR
3370 }
3371
3372 /* Find the outgoing port. */
3373 struct ovn_port *out_port = NULL;
3374 if (route->output_port) {
3375 out_port = ovn_port_find(ports, route->output_port);
3376 if (!out_port) {
3377 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3378 VLOG_WARN_RL(&rl, "Bad out port %s for static route %s",
3379 route->output_port, route->ip_prefix);
6fdb7cd6 3380 goto free_prefix_s;
28dc3fe9 3381 }
4685e523 3382 lrp_addr_s = find_lrp_member_ip(out_port, route->nexthop);
28dc3fe9
SR
3383 } else {
3384 /* output_port is not specified, find the
3385 * router port matching the next hop. */
3386 int i;
3387 for (i = 0; i < od->nbr->n_ports; i++) {
3388 struct nbrec_logical_router_port *lrp = od->nbr->ports[i];
3389 out_port = ovn_port_find(ports, lrp->name);
3390 if (!out_port) {
3391 /* This should not happen. */
3392 continue;
3393 }
3394
4685e523
JP
3395 lrp_addr_s = find_lrp_member_ip(out_port, route->nexthop);
3396 if (lrp_addr_s) {
28dc3fe9
SR
3397 break;
3398 }
3399 }
28dc3fe9
SR
3400 }
3401
4685e523
JP
3402 if (!lrp_addr_s) {
3403 /* There is no matched out port. */
3404 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3405 VLOG_WARN_RL(&rl, "No path for static route %s; next hop %s",
3406 route->ip_prefix, route->nexthop);
6fdb7cd6 3407 goto free_prefix_s;
4685e523
JP
3408 }
3409
440a9f4b
GS
3410 char *policy = route->policy ? route->policy : "dst-ip";
3411 add_route(lflows, out_port, lrp_addr_s, prefix_s, plen, route->nexthop,
3412 policy);
6fdb7cd6
JP
3413
3414free_prefix_s:
c9bdf7bd 3415 free(prefix_s);
28dc3fe9
SR
3416}
3417
4685e523 3418static void
6fdb7cd6 3419op_put_v4_networks(struct ds *ds, const struct ovn_port *op, bool add_bcast)
4685e523
JP
3420{
3421 if (!add_bcast && op->lrp_networks.n_ipv4_addrs == 1) {
3422 ds_put_format(ds, "%s", op->lrp_networks.ipv4_addrs[0].addr_s);
3423 return;
3424 }
3425
3426 ds_put_cstr(ds, "{");
3427 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
3428 ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].addr_s);
3429 if (add_bcast) {
3430 ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].bcast_s);
3431 }
3432 }
3433 ds_chomp(ds, ' ');
3434 ds_chomp(ds, ',');
3435 ds_put_cstr(ds, "}");
3436}
3437
6fdb7cd6
JP
3438static void
3439op_put_v6_networks(struct ds *ds, const struct ovn_port *op)
3440{
3441 if (op->lrp_networks.n_ipv6_addrs == 1) {
3442 ds_put_format(ds, "%s", op->lrp_networks.ipv6_addrs[0].addr_s);
3443 return;
3444 }
3445
3446 ds_put_cstr(ds, "{");
3447 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
3448 ds_put_format(ds, "%s, ", op->lrp_networks.ipv6_addrs[i].addr_s);
3449 }
3450 ds_chomp(ds, ' ');
3451 ds_chomp(ds, ',');
3452 ds_put_cstr(ds, "}");
3453}
3454
9975d7be
BP
3455static void
3456build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
3457 struct hmap *lflows)
3458{
3459 /* This flow table structure is documented in ovn-northd(8), so please
3460 * update ovn-northd.8.xml if you change anything. */
3461
09b39248
JP
3462 struct ds match = DS_EMPTY_INITIALIZER;
3463 struct ds actions = DS_EMPTY_INITIALIZER;
3464
9975d7be
BP
3465 /* Logical router ingress table 0: Admission control framework. */
3466 struct ovn_datapath *od;
3467 HMAP_FOR_EACH (od, key_node, datapaths) {
3468 if (!od->nbr) {
3469 continue;
3470 }
3471
3472 /* Logical VLANs not supported.
3473 * Broadcast/multicast source address is invalid. */
3474 ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100,
3475 "vlan.present || eth.src[40]", "drop;");
3476 }
3477
3478 /* Logical router ingress table 0: match (priority 50). */
3479 struct ovn_port *op;
3480 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 3481 if (!op->nbrp) {
9975d7be
BP
3482 continue;
3483 }
3484
0ee00741 3485 if (!lrport_is_enabled(op->nbrp)) {
9975d7be
BP
3486 /* Drop packets from disabled logical ports (since logical flow
3487 * tables are default-drop). */
3488 continue;
3489 }
3490
09b39248 3491 ds_clear(&match);
4685e523
JP
3492 ds_put_format(&match, "(eth.mcast || eth.dst == %s) && inport == %s",
3493 op->lrp_networks.ea_s, op->json_key);
9975d7be 3494 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
09b39248 3495 ds_cstr(&match), "next;");
9975d7be
BP
3496 }
3497
3498 /* Logical router ingress table 1: IP Input. */
78aab811 3499 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
3500 if (!od->nbr) {
3501 continue;
3502 }
3503
3504 /* L3 admission control: drop multicast and broadcast source, localhost
3505 * source or destination, and zero network source or destination
3506 * (priority 100). */
3507 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100,
3508 "ip4.mcast || "
3509 "ip4.src == 255.255.255.255 || "
3510 "ip4.src == 127.0.0.0/8 || "
3511 "ip4.dst == 127.0.0.0/8 || "
3512 "ip4.src == 0.0.0.0/8 || "
3513 "ip4.dst == 0.0.0.0/8",
3514 "drop;");
3515
0bac7164
BP
3516 /* ARP reply handling. Use ARP replies to populate the logical
3517 * router's ARP table. */
3518 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "arp.op == 2",
3519 "put_arp(inport, arp.spa, arp.sha);");
3520
9975d7be
BP
3521 /* Drop Ethernet local broadcast. By definition this traffic should
3522 * not be forwarded.*/
3523 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
3524 "eth.bcast", "drop;");
3525
9975d7be
BP
3526 /* TTL discard.
3527 *
3528 * XXX Need to send ICMP time exceeded if !ip.later_frag. */
09b39248
JP
3529 ds_clear(&match);
3530 ds_put_cstr(&match, "ip4 && ip.ttl == {0, 1}");
3531 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30,
3532 ds_cstr(&match), "drop;");
9975d7be 3533
c34a87b6
JP
3534 /* ND advertisement handling. Use advertisements to populate
3535 * the logical router's ARP/ND table. */
3536 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "nd_na",
3537 "put_nd(inport, nd.target, nd.tll);");
3538
3539 /* Lean from neighbor solicitations that were not directed at
3540 * us. (A priority-90 flow will respond to requests to us and
3541 * learn the sender's mac address. */
3542 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 80, "nd_ns",
3543 "put_nd(inport, ip6.src, nd.sll);");
3544
9975d7be
BP
3545 /* Pass other traffic not already handled to the next table for
3546 * routing. */
3547 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;");
78aab811
JP
3548 }
3549
6fdb7cd6 3550 /* Logical router ingress table 1: IP Input for IPv4. */
9975d7be 3551 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 3552 if (!op->nbrp) {
9975d7be
BP
3553 continue;
3554 }
3555
9975d7be 3556
6fdb7cd6
JP
3557 if (op->lrp_networks.n_ipv4_addrs) {
3558 /* L3 admission control: drop packets that originate from an
3559 * IPv4 address owned by the router or a broadcast address
3560 * known to the router (priority 100). */
3561 ds_clear(&match);
3562 ds_put_cstr(&match, "ip4.src == ");
3563 op_put_v4_networks(&match, op, true);
3564 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
3565 ds_cstr(&match), "drop;");
3566
3567 /* ICMP echo reply. These flows reply to ICMP echo requests
3568 * received for the router's IP address. Since packets only
3569 * get here as part of the logical router datapath, the inport
3570 * (i.e. the incoming locally attached net) does not matter.
3571 * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */
3572 ds_clear(&match);
3573 ds_put_cstr(&match, "ip4.dst == ");
3574 op_put_v4_networks(&match, op, false);
3575 ds_put_cstr(&match, " && icmp4.type == 8 && icmp4.code == 0");
3576
3577 ds_clear(&actions);
3578 ds_put_format(&actions,
3579 "ip4.dst <-> ip4.src; "
3580 "ip.ttl = 255; "
3581 "icmp4.type = 0; "
bf143492 3582 "flags.loopback = 1; "
6fdb7cd6
JP
3583 "next; ");
3584 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
3585 ds_cstr(&match), ds_cstr(&actions));
3586 }
dd7652e6 3587
9975d7be
BP
3588 /* ARP reply. These flows reply to ARP requests for the router's own
3589 * IP address. */
4685e523
JP
3590 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
3591 ds_clear(&match);
3592 ds_put_format(&match,
3593 "inport == %s && arp.tpa == %s && arp.op == 1",
3594 op->json_key, op->lrp_networks.ipv4_addrs[i].addr_s);
3595
3596 ds_clear(&actions);
3597 ds_put_format(&actions,
3598 "eth.dst = eth.src; "
3599 "eth.src = %s; "
3600 "arp.op = 2; /* ARP reply */ "
3601 "arp.tha = arp.sha; "
3602 "arp.sha = %s; "
3603 "arp.tpa = arp.spa; "
3604 "arp.spa = %s; "
3605 "outport = %s; "
bf143492 3606 "flags.loopback = 1; "
4685e523
JP
3607 "output;",
3608 op->lrp_networks.ea_s,
3609 op->lrp_networks.ea_s,
3610 op->lrp_networks.ipv4_addrs[i].addr_s,
3611 op->json_key);
3612 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
3613 ds_cstr(&match), ds_cstr(&actions));
3614 }
9975d7be 3615
cc4583aa
GS
3616 /* A set to hold all load-balancer vips that need ARP responses. */
3617 struct sset all_ips = SSET_INITIALIZER(&all_ips);
3618
3619 for (int i = 0; i < op->od->nbr->n_load_balancer; i++) {
3620 struct nbrec_load_balancer *lb = op->od->nbr->load_balancer[i];
3621 struct smap *vips = &lb->vips;
3622 struct smap_node *node;
3623
3624 SMAP_FOR_EACH (node, vips) {
3625 /* node->key contains IP:port or just IP. */
3626 char *ip_address = NULL;
3627 uint16_t port;
3628
3629 ip_address_and_port_from_lb_key(node->key, &ip_address, &port);
3630 if (!ip_address) {
3631 continue;
3632 }
3633
3634 if (!sset_contains(&all_ips, ip_address)) {
3635 sset_add(&all_ips, ip_address);
3636 }
3637
3638 free(ip_address);
3639 }
3640 }
3641
3642 const char *ip_address;
3643 SSET_FOR_EACH(ip_address, &all_ips) {
3644 ovs_be32 ip;
3645 if (!ip_parse(ip_address, &ip) || !ip) {
3646 continue;
3647 }
3648
3649 ds_clear(&match);
3650 ds_put_format(&match,
3651 "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1",
3652 op->json_key, IP_ARGS(ip));
3653
3654 ds_clear(&actions);
3655 ds_put_format(&actions,
3656 "eth.dst = eth.src; "
3657 "eth.src = %s; "
3658 "arp.op = 2; /* ARP reply */ "
3659 "arp.tha = arp.sha; "
3660 "arp.sha = %s; "
3661 "arp.tpa = arp.spa; "
3662 "arp.spa = "IP_FMT"; "
3663 "outport = %s; "
3664 "flags.loopback = 1; "
3665 "output;",
3666 op->lrp_networks.ea_s,
3667 op->lrp_networks.ea_s,
3668 IP_ARGS(ip),
3669 op->json_key);
3670 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
3671 ds_cstr(&match), ds_cstr(&actions));
3672 }
3673
3674 sset_destroy(&all_ips);
3675
dde5ea7b
GS
3676 ovs_be32 *snat_ips = xmalloc(sizeof *snat_ips * op->od->nbr->n_nat);
3677 size_t n_snat_ips = 0;
de297547
GS
3678 for (int i = 0; i < op->od->nbr->n_nat; i++) {
3679 const struct nbrec_nat *nat;
3680
3681 nat = op->od->nbr->nat[i];
3682
de297547
GS
3683 ovs_be32 ip;
3684 if (!ip_parse(nat->external_ip, &ip) || !ip) {
3685 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
dde5ea7b 3686 VLOG_WARN_RL(&rl, "bad ip address %s in nat configuration "
de297547
GS
3687 "for router %s", nat->external_ip, op->key);
3688 continue;
3689 }
3690
dde5ea7b
GS
3691 if (!strcmp(nat->type, "snat")) {
3692 snat_ips[n_snat_ips++] = ip;
3693 continue;
3694 }
3695
3696 /* ARP handling for external IP addresses.
3697 *
3698 * DNAT IP addresses are external IP addresses that need ARP
3699 * handling. */
09b39248
JP
3700 ds_clear(&match);
3701 ds_put_format(&match,
3702 "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1",
3703 op->json_key, IP_ARGS(ip));
4685e523 3704
09b39248
JP
3705 ds_clear(&actions);
3706 ds_put_format(&actions,
de297547 3707 "eth.dst = eth.src; "
4685e523 3708 "eth.src = %s; "
de297547
GS
3709 "arp.op = 2; /* ARP reply */ "
3710 "arp.tha = arp.sha; "
4685e523 3711 "arp.sha = %s; "
de297547
GS
3712 "arp.tpa = arp.spa; "
3713 "arp.spa = "IP_FMT"; "
3714 "outport = %s; "
bf143492 3715 "flags.loopback = 1; "
de297547 3716 "output;",
4685e523
JP
3717 op->lrp_networks.ea_s,
3718 op->lrp_networks.ea_s,
de297547
GS
3719 IP_ARGS(ip),
3720 op->json_key);
3721 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
09b39248 3722 ds_cstr(&match), ds_cstr(&actions));
de297547
GS
3723 }
3724
4685e523
JP
3725 ds_clear(&match);
3726 ds_put_cstr(&match, "ip4.dst == {");
3727 bool has_drop_ips = false;
3728 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
49da9ec0 3729 bool snat_ip_is_router_ip = false;
dde5ea7b
GS
3730 for (int j = 0; j < n_snat_ips; j++) {
3731 /* Packets to SNAT IPs should not be dropped. */
3732 if (op->lrp_networks.ipv4_addrs[i].addr == snat_ips[j]) {
49da9ec0
CSV
3733 snat_ip_is_router_ip = true;
3734 break;
4685e523 3735 }
4ef48e9d 3736 }
49da9ec0
CSV
3737 if (snat_ip_is_router_ip) {
3738 continue;
3739 }
4685e523
JP
3740 ds_put_format(&match, "%s, ",
3741 op->lrp_networks.ipv4_addrs[i].addr_s);
3742 has_drop_ips = true;
4ef48e9d 3743 }
4685e523
JP
3744 ds_chomp(&match, ' ');
3745 ds_chomp(&match, ',');
3746 ds_put_cstr(&match, "}");
4ef48e9d 3747
4685e523
JP
3748 if (has_drop_ips) {
3749 /* Drop IP traffic to this router. */
09b39248
JP
3750 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
3751 ds_cstr(&match), "drop;");
4ef48e9d 3752 }
4685e523 3753
dde5ea7b 3754 free(snat_ips);
9975d7be
BP
3755 }
3756
6fdb7cd6
JP
3757 /* Logical router ingress table 1: IP Input for IPv6. */
3758 HMAP_FOR_EACH (op, key_node, ports) {
3759 if (!op->nbrp) {
3760 continue;
3761 }
3762
3763 if (op->lrp_networks.n_ipv6_addrs) {
3764 /* L3 admission control: drop packets that originate from an
3765 * IPv6 address owned by the router (priority 100). */
3766 ds_clear(&match);
3767 ds_put_cstr(&match, "ip6.src == ");
3768 op_put_v6_networks(&match, op);
3769 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
3770 ds_cstr(&match), "drop;");
3771
3772 /* ICMPv6 echo reply. These flows reply to echo requests
3773 * received for the router's IP address. */
3774 ds_clear(&match);
3775 ds_put_cstr(&match, "ip6.dst == ");
3776 op_put_v6_networks(&match, op);
3777 ds_put_cstr(&match, " && icmp6.type == 128 && icmp6.code == 0");
3778
3779 ds_clear(&actions);
3780 ds_put_cstr(&actions,
3781 "ip6.dst <-> ip6.src; "
3782 "ip.ttl = 255; "
3783 "icmp6.type = 129; "
bf143492 3784 "flags.loopback = 1; "
6fdb7cd6
JP
3785 "next; ");
3786 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
3787 ds_cstr(&match), ds_cstr(&actions));
3788
3789 /* Drop IPv6 traffic to this router. */
3790 ds_clear(&match);
3791 ds_put_cstr(&match, "ip6.dst == ");
3792 op_put_v6_networks(&match, op);
3793 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
3794 ds_cstr(&match), "drop;");
3795 }
3796
3797 /* ND reply. These flows reply to ND solicitations for the
3798 * router's own IP address. */
3799 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
3800 ds_clear(&match);
3801 ds_put_format(&match,
3802 "inport == %s && nd_ns && ip6.dst == {%s, %s} "
3803 "&& nd.target == %s",
3804 op->json_key,
3805 op->lrp_networks.ipv6_addrs[i].addr_s,
3806 op->lrp_networks.ipv6_addrs[i].sn_addr_s,
3807 op->lrp_networks.ipv6_addrs[i].addr_s);
3808
3809 ds_clear(&actions);
3810 ds_put_format(&actions,
c34a87b6 3811 "put_nd(inport, ip6.src, nd.sll); "
6fdb7cd6
JP
3812 "nd_na { "
3813 "eth.src = %s; "
3814 "ip6.src = %s; "
3815 "nd.target = %s; "
3816 "nd.tll = %s; "
3817 "outport = inport; "
bf143492 3818 "flags.loopback = 1; "
6fdb7cd6
JP
3819 "output; "
3820 "};",
3821 op->lrp_networks.ea_s,
3822 op->lrp_networks.ipv6_addrs[i].addr_s,
3823 op->lrp_networks.ipv6_addrs[i].addr_s,
3824 op->lrp_networks.ea_s);
3825 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
3826 ds_cstr(&match), ds_cstr(&actions));
3827 }
3828 }
3829
cc4583aa 3830 /* NAT, Defrag and load balancing in Gateway routers. */
de297547
GS
3831 HMAP_FOR_EACH (od, key_node, datapaths) {
3832 if (!od->nbr) {
3833 continue;
3834 }
3835
3836 /* Packets are allowed by default. */
cc4583aa 3837 ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG, 0, "1", "next;");
de297547
GS
3838 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;");
3839 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;");
3840 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;");
3841
cc4583aa
GS
3842 /* NAT rules, packet defrag and load balancing are only valid on
3843 * Gateway routers. */
de297547
GS
3844 if (!smap_get(&od->nbr->options, "chassis")) {
3845 continue;
3846 }
3847
cc4583aa
GS
3848 /* A set to hold all ips that need defragmentation and tracking. */
3849 struct sset all_ips = SSET_INITIALIZER(&all_ips);
3850
3851 for (int i = 0; i < od->nbr->n_load_balancer; i++) {
3852 struct nbrec_load_balancer *lb = od->nbr->load_balancer[i];
3853 struct smap *vips = &lb->vips;
3854 struct smap_node *node;
3855
3856 SMAP_FOR_EACH (node, vips) {
3857 uint16_t port = 0;
3858
3859 /* node->key contains IP:port or just IP. */
3860 char *ip_address = NULL;
3861 ip_address_and_port_from_lb_key(node->key, &ip_address, &port);
3862 if (!ip_address) {
3863 continue;
3864 }
3865
3866 if (!sset_contains(&all_ips, ip_address)) {
3867 sset_add(&all_ips, ip_address);
3868 }
3869
3870 /* Higher priority rules are added in DNAT table to match on
3871 * ct.new which in-turn have group id as an action for load
3872 * balancing. */
3873 ds_clear(&actions);
3874 ds_put_format(&actions, "ct_lb(%s);", node->value);
3875
3876 ds_clear(&match);
3877 ds_put_format(&match, "ct.new && ip && ip4.dst == %s",
3878 ip_address);
3879 free(ip_address);
3880
3881 if (port) {
3882 if (lb->protocol && !strcmp(lb->protocol, "udp")) {
546f1ff3
GS
3883 ds_put_format(&match, " && udp && udp.dst == %d",
3884 port);
cc4583aa 3885 } else {
546f1ff3
GS
3886 ds_put_format(&match, " && tcp && tcp.dst == %d",
3887 port);
cc4583aa
GS
3888 }
3889 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT,
3890 120, ds_cstr(&match), ds_cstr(&actions));
3891 } else {
3892 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT,
3893 110, ds_cstr(&match), ds_cstr(&actions));
3894 }
3895 }
3896 }
3897
3898 /* If there are any load balancing rules, we should send the
3899 * packet to conntrack for defragmentation and tracking. This helps
3900 * with two things.
3901 *
3902 * 1. With tracking, we can send only new connections to pick a
3903 * DNAT ip address from a group.
3904 * 2. If there are L4 ports in load balancing rules, we need the
3905 * defragmentation to match on L4 ports. */
3906 const char *ip_address;
3907 SSET_FOR_EACH(ip_address, &all_ips) {
3908 ds_clear(&match);
3909 ds_put_format(&match, "ip && ip4.dst == %s", ip_address);
3910 ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG,
3911 100, ds_cstr(&match), "ct_next;");
3912 }
3913
3914 sset_destroy(&all_ips);
3915
de297547
GS
3916 for (int i = 0; i < od->nbr->n_nat; i++) {
3917 const struct nbrec_nat *nat;
3918
3919 nat = od->nbr->nat[i];
3920
3921 ovs_be32 ip, mask;
3922
3923 char *error = ip_parse_masked(nat->external_ip, &ip, &mask);
3924 if (error || mask != OVS_BE32_MAX) {
3925 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3926 VLOG_WARN_RL(&rl, "bad external ip %s for nat",
3927 nat->external_ip);
3928 free(error);
3929 continue;
3930 }
3931
3932 /* Check the validity of nat->logical_ip. 'logical_ip' can
3933 * be a subnet when the type is "snat". */
3934 error = ip_parse_masked(nat->logical_ip, &ip, &mask);
3935 if (!strcmp(nat->type, "snat")) {
3936 if (error) {
3937 static struct vlog_rate_limit rl =
3938 VLOG_RATE_LIMIT_INIT(5, 1);
3939 VLOG_WARN_RL(&rl, "bad ip network or ip %s for snat "
3940 "in router "UUID_FMT"",
3941 nat->logical_ip, UUID_ARGS(&od->key));
3942 free(error);
3943 continue;
3944 }
3945 } else {
3946 if (error || mask != OVS_BE32_MAX) {
3947 static struct vlog_rate_limit rl =
3948 VLOG_RATE_LIMIT_INIT(5, 1);
3949 VLOG_WARN_RL(&rl, "bad ip %s for dnat in router "
3950 ""UUID_FMT"", nat->logical_ip, UUID_ARGS(&od->key));
3951 free(error);
3952 continue;
3953 }
3954 }
3955
de297547
GS
3956 /* Ingress UNSNAT table: It is for already established connections'
3957 * reverse traffic. i.e., SNAT has already been done in egress
3958 * pipeline and now the packet has entered the ingress pipeline as
3959 * part of a reply. We undo the SNAT here.
3960 *
3961 * Undoing SNAT has to happen before DNAT processing. This is
3962 * because when the packet was DNATed in ingress pipeline, it did
3963 * not know about the possibility of eventual additional SNAT in
3964 * egress pipeline. */
3965 if (!strcmp(nat->type, "snat")
3966 || !strcmp(nat->type, "dnat_and_snat")) {
09b39248
JP
3967 ds_clear(&match);
3968 ds_put_format(&match, "ip && ip4.dst == %s", nat->external_ip);
de297547 3969 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 100,
09b39248 3970 ds_cstr(&match), "ct_snat; next;");
de297547
GS
3971 }
3972
3973 /* Ingress DNAT table: Packets enter the pipeline with destination
3974 * IP address that needs to be DNATted from a external IP address
3975 * to a logical IP address. */
3976 if (!strcmp(nat->type, "dnat")
3977 || !strcmp(nat->type, "dnat_and_snat")) {
3978 /* Packet when it goes from the initiator to destination.
3979 * We need to zero the inport because the router can
3980 * send the packet back through the same interface. */
09b39248
JP
3981 ds_clear(&match);
3982 ds_put_format(&match, "ip && ip4.dst == %s", nat->external_ip);
3983 ds_clear(&actions);
bf143492 3984 ds_put_format(&actions,"flags.loopback = 1; ct_dnat(%s);",
09b39248 3985 nat->logical_ip);
de297547 3986 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100,
09b39248 3987 ds_cstr(&match), ds_cstr(&actions));
de297547
GS
3988 }
3989
3990 /* Egress SNAT table: Packets enter the egress pipeline with
3991 * source ip address that needs to be SNATted to a external ip
3992 * address. */
3993 if (!strcmp(nat->type, "snat")
3994 || !strcmp(nat->type, "dnat_and_snat")) {
09b39248
JP
3995 ds_clear(&match);
3996 ds_put_format(&match, "ip && ip4.src == %s", nat->logical_ip);
3997 ds_clear(&actions);
3998 ds_put_format(&actions, "ct_snat(%s);", nat->external_ip);
de297547
GS
3999
4000 /* The priority here is calculated such that the
4001 * nat->logical_ip with the longest mask gets a higher
4002 * priority. */
4003 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT,
09b39248
JP
4004 count_1bits(ntohl(mask)) + 1,
4005 ds_cstr(&match), ds_cstr(&actions));
de297547
GS
4006 }
4007 }
4008
4009 /* Re-circulate every packet through the DNAT zone.
cc4583aa 4010 * This helps with three things.
de297547
GS
4011 *
4012 * 1. Any packet that needs to be unDNATed in the reverse
4013 * direction gets unDNATed. Ideally this could be done in
4014 * the egress pipeline. But since the gateway router
4015 * does not have any feature that depends on the source
4016 * ip address being external IP address for IP routing,
4017 * we can do it here, saving a future re-circulation.
4018 *
cc4583aa
GS
4019 * 2. Established load-balanced connections automatically get
4020 * DNATed.
4021 *
4022 * 3. Any packet that was sent through SNAT zone in the
de297547
GS
4023 * previous table automatically gets re-circulated to get
4024 * back the new destination IP address that is needed for
4025 * routing in the openflow pipeline. */
4026 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50,
bf143492 4027 "ip", "flags.loopback = 1; ct_dnat;");
de297547
GS
4028 }
4029
4f6d33f3 4030 /* Logical router ingress table 5: IP Routing.
9975d7be
BP
4031 *
4032 * A packet that arrives at this table is an IP packet that should be
6fdb7cd6
JP
4033 * routed to the address in 'ip[46].dst'. This table sets outport to
4034 * the correct output port, eth.src to the output port's MAC
4035 * address, and '[xx]reg0' to the next-hop IP address (leaving
4036 * 'ip[46].dst', the packet’s final destination, unchanged), and
4037 * advances to the next table for ARP/ND resolution. */
9975d7be 4038 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 4039 if (!op->nbrp) {
9975d7be
BP
4040 continue;
4041 }
4042
4685e523
JP
4043 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
4044 add_route(lflows, op, op->lrp_networks.ipv4_addrs[i].addr_s,
4045 op->lrp_networks.ipv4_addrs[i].network_s,
440a9f4b 4046 op->lrp_networks.ipv4_addrs[i].plen, NULL, NULL);
4685e523 4047 }
6fdb7cd6
JP
4048
4049 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
4050 add_route(lflows, op, op->lrp_networks.ipv6_addrs[i].addr_s,
4051 op->lrp_networks.ipv6_addrs[i].network_s,
440a9f4b 4052 op->lrp_networks.ipv6_addrs[i].plen, NULL, NULL);
6fdb7cd6 4053 }
9975d7be 4054 }
4685e523 4055
6fdb7cd6 4056 /* Convert the static routes to flows. */
9975d7be
BP
4057 HMAP_FOR_EACH (od, key_node, datapaths) {
4058 if (!od->nbr) {
4059 continue;
4060 }
4061
28dc3fe9
SR
4062 for (int i = 0; i < od->nbr->n_static_routes; i++) {
4063 const struct nbrec_logical_router_static_route *route;
4064
4065 route = od->nbr->static_routes[i];
4066 build_static_route_flow(lflows, od, ports, route);
4067 }
9975d7be 4068 }
6fdb7cd6 4069
9975d7be
BP
4070 /* XXX destination unreachable */
4071
4f6d33f3 4072 /* Local router ingress table 6: ARP Resolution.
9975d7be
BP
4073 *
4074 * Any packet that reaches this table is an IP packet whose next-hop IP
4075 * address is in reg0. (ip4.dst is the final destination.) This table
4076 * resolves the IP address in reg0 into an output port in outport and an
4077 * Ethernet address in eth.dst. */
4078 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 4079 if (op->nbrp) {
6fdb7cd6
JP
4080 /* This is a logical router port. If next-hop IP address in
4081 * '[xx]reg0' matches IP address of this router port, then
4082 * the packet is intended to eventually be sent to this
4083 * logical port. Set the destination mac address using this
4084 * port's mac address.
509afdc3
GS
4085 *
4086 * The packet is still in peer's logical pipeline. So the match
4087 * should be on peer's outport. */
6fdb7cd6
JP
4088 if (op->peer && op->nbrp->peer) {
4089 if (op->lrp_networks.n_ipv4_addrs) {
4090 ds_clear(&match);
4091 ds_put_format(&match, "outport == %s && reg0 == ",
4092 op->peer->json_key);
4093 op_put_v4_networks(&match, op, false);
4094
4095 ds_clear(&actions);
4096 ds_put_format(&actions, "eth.dst = %s; next;",
4097 op->lrp_networks.ea_s);
4098 ovn_lflow_add(lflows, op->peer->od, S_ROUTER_IN_ARP_RESOLVE,
4099 100, ds_cstr(&match), ds_cstr(&actions));
4100 }
4685e523 4101
6fdb7cd6
JP
4102 if (op->lrp_networks.n_ipv6_addrs) {
4103 ds_clear(&match);
4104 ds_put_format(&match, "outport == %s && xxreg0 == ",
4105 op->peer->json_key);
4106 op_put_v6_networks(&match, op);
4107
4108 ds_clear(&actions);
4109 ds_put_format(&actions, "eth.dst = %s; next;",
4110 op->lrp_networks.ea_s);
4111 ovn_lflow_add(lflows, op->peer->od, S_ROUTER_IN_ARP_RESOLVE,
4112 100, ds_cstr(&match), ds_cstr(&actions));
4113 }
509afdc3 4114 }
0ee00741 4115 } else if (op->od->n_router_ports && strcmp(op->nbsp->type, "router")) {
75cf9d2b
GS
4116 /* This is a logical switch port that backs a VM or a container.
4117 * Extract its addresses. For each of the address, go through all
4118 * the router ports attached to the switch (to which this port
4119 * connects) and if the address in question is reachable from the
6fdb7cd6 4120 * router port, add an ARP/ND entry in that router's pipeline. */
75cf9d2b 4121
e93b43d6 4122 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
4685e523 4123 const char *ea_s = op->lsp_addrs[i].ea_s;
e93b43d6 4124 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
4685e523 4125 const char *ip_s = op->lsp_addrs[i].ipv4_addrs[j].addr_s;
e93b43d6 4126 for (size_t k = 0; k < op->od->n_router_ports; k++) {
80f408f4
JP
4127 /* Get the Logical_Router_Port that the
4128 * Logical_Switch_Port is connected to, as
4129 * 'peer'. */
86e98048 4130 const char *peer_name = smap_get(
0ee00741 4131 &op->od->router_ports[k]->nbsp->options,
86e98048
BP
4132 "router-port");
4133 if (!peer_name) {
4134 continue;
4135 }
4136
e93b43d6 4137 struct ovn_port *peer = ovn_port_find(ports, peer_name);
0ee00741 4138 if (!peer || !peer->nbrp) {
86e98048
BP
4139 continue;
4140 }
4141
4685e523 4142 if (!find_lrp_member_ip(peer, ip_s)) {
86e98048
BP
4143 continue;
4144 }
4145
09b39248 4146 ds_clear(&match);
e93b43d6 4147 ds_put_format(&match, "outport == %s && reg0 == %s",
4685e523
JP
4148 peer->json_key, ip_s);
4149
09b39248 4150 ds_clear(&actions);
4685e523 4151 ds_put_format(&actions, "eth.dst = %s; next;", ea_s);
86e98048 4152 ovn_lflow_add(lflows, peer->od,
09b39248
JP
4153 S_ROUTER_IN_ARP_RESOLVE, 100,
4154 ds_cstr(&match), ds_cstr(&actions));
86e98048 4155 }
9975d7be 4156 }
6fdb7cd6
JP
4157
4158 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
4159 const char *ip_s = op->lsp_addrs[i].ipv6_addrs[j].addr_s;
4160 for (size_t k = 0; k < op->od->n_router_ports; k++) {
4161 /* Get the Logical_Router_Port that the
4162 * Logical_Switch_Port is connected to, as
4163 * 'peer'. */
4164 const char *peer_name = smap_get(
4165 &op->od->router_ports[k]->nbsp->options,
4166 "router-port");
4167 if (!peer_name) {
4168 continue;
4169 }
4170
4171 struct ovn_port *peer = ovn_port_find(ports, peer_name);
4172 if (!peer || !peer->nbrp) {
4173 continue;
4174 }
4175
4176 if (!find_lrp_member_ip(peer, ip_s)) {
4177 continue;
4178 }
4179
4180 ds_clear(&match);
4181 ds_put_format(&match, "outport == %s && xxreg0 == %s",
4182 peer->json_key, ip_s);
4183
4184 ds_clear(&actions);
4185 ds_put_format(&actions, "eth.dst = %s; next;", ea_s);
4186 ovn_lflow_add(lflows, peer->od,
4187 S_ROUTER_IN_ARP_RESOLVE, 100,
4188 ds_cstr(&match), ds_cstr(&actions));
4189 }
4190 }
9975d7be 4191 }
0ee00741 4192 } else if (!strcmp(op->nbsp->type, "router")) {
75cf9d2b
GS
4193 /* This is a logical switch port that connects to a router. */
4194
4195 /* The peer of this switch port is the router port for which
4196 * we need to add logical flows such that it can resolve
4197 * ARP entries for all the other router ports connected to
4198 * the switch in question. */
4199
0ee00741 4200 const char *peer_name = smap_get(&op->nbsp->options,
75cf9d2b
GS
4201 "router-port");
4202 if (!peer_name) {
4203 continue;
4204 }
4205
4206 struct ovn_port *peer = ovn_port_find(ports, peer_name);
0ee00741 4207 if (!peer || !peer->nbrp) {
75cf9d2b
GS
4208 continue;
4209 }
4210
4685e523 4211 for (size_t i = 0; i < op->od->n_router_ports; i++) {
75cf9d2b 4212 const char *router_port_name = smap_get(
0ee00741 4213 &op->od->router_ports[i]->nbsp->options,
75cf9d2b
GS
4214 "router-port");
4215 struct ovn_port *router_port = ovn_port_find(ports,
4216 router_port_name);
0ee00741 4217 if (!router_port || !router_port->nbrp) {
75cf9d2b
GS
4218 continue;
4219 }
4220
4221 /* Skip the router port under consideration. */
4222 if (router_port == peer) {
4223 continue;
4224 }
4225
6fdb7cd6
JP
4226 if (router_port->lrp_networks.n_ipv4_addrs) {
4227 ds_clear(&match);
4228 ds_put_format(&match, "outport == %s && reg0 == ",
4229 peer->json_key);
4230 op_put_v4_networks(&match, router_port, false);
4231
4232 ds_clear(&actions);
4233 ds_put_format(&actions, "eth.dst = %s; next;",
4234 router_port->lrp_networks.ea_s);
4235 ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE,
4236 100, ds_cstr(&match), ds_cstr(&actions));
4237 }
4685e523 4238
6fdb7cd6
JP
4239 if (router_port->lrp_networks.n_ipv6_addrs) {
4240 ds_clear(&match);
4241 ds_put_format(&match, "outport == %s && xxreg0 == ",
4242 peer->json_key);
4243 op_put_v6_networks(&match, router_port);
4244
4245 ds_clear(&actions);
4246 ds_put_format(&actions, "eth.dst = %s; next;",
4247 router_port->lrp_networks.ea_s);
4248 ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE,
4249 100, ds_cstr(&match), ds_cstr(&actions));
4250 }
75cf9d2b 4251 }
9975d7be
BP
4252 }
4253 }
75cf9d2b 4254
0bac7164
BP
4255 HMAP_FOR_EACH (od, key_node, datapaths) {
4256 if (!od->nbr) {
4257 continue;
4258 }
4259
c34a87b6
JP
4260 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip4",
4261 "get_arp(outport, reg0); next;");
4262
4263 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip6",
4264 "get_nd(outport, xxreg0); next;");
0bac7164
BP
4265 }
4266
4f6d33f3 4267 /* Local router ingress table 7: ARP request.
0bac7164
BP
4268 *
4269 * In the common case where the Ethernet destination has been resolved,
94300e09
JP
4270 * this table outputs the packet (priority 0). Otherwise, it composes
4271 * and sends an ARP request (priority 100). */
0bac7164
BP
4272 HMAP_FOR_EACH (od, key_node, datapaths) {
4273 if (!od->nbr) {
4274 continue;
4275 }
4276
4277 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100,
4278 "eth.dst == 00:00:00:00:00:00",
4279 "arp { "
4280 "eth.dst = ff:ff:ff:ff:ff:ff; "
4281 "arp.spa = reg1; "
47021598 4282 "arp.tpa = reg0; "
0bac7164
BP
4283 "arp.op = 1; " /* ARP request */
4284 "output; "
4285 "};");
4286 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1", "output;");
4287 }
9975d7be 4288
de297547 4289 /* Logical router egress table 1: Delivery (priority 100).
9975d7be
BP
4290 *
4291 * Priority 100 rules deliver packets to enabled logical ports. */
4292 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 4293 if (!op->nbrp) {
9975d7be
BP
4294 continue;
4295 }
4296
0ee00741 4297 if (!lrport_is_enabled(op->nbrp)) {
9975d7be
BP
4298 /* Drop packets to disabled logical ports (since logical flow
4299 * tables are default-drop). */
4300 continue;
4301 }
4302
09b39248
JP
4303 ds_clear(&match);
4304 ds_put_format(&match, "outport == %s", op->json_key);
9975d7be 4305 ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100,
09b39248 4306 ds_cstr(&match), "output;");
9975d7be 4307 }
09b39248
JP
4308
4309 ds_destroy(&match);
4310 ds_destroy(&actions);
9975d7be
BP
4311}
4312
4313/* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
4314 * constructing their contents based on the OVN_NB database. */
4315static void
4316build_lflows(struct northd_context *ctx, struct hmap *datapaths,
4317 struct hmap *ports)
4318{
4319 struct hmap lflows = HMAP_INITIALIZER(&lflows);
4320 struct hmap mcgroups = HMAP_INITIALIZER(&mcgroups);
4321
4322 build_lswitch_flows(datapaths, ports, &lflows, &mcgroups);
4323 build_lrouter_flows(datapaths, ports, &lflows);
4324
5868eb24
BP
4325 /* Push changes to the Logical_Flow table to database. */
4326 const struct sbrec_logical_flow *sbflow, *next_sbflow;
4327 SBREC_LOGICAL_FLOW_FOR_EACH_SAFE (sbflow, next_sbflow, ctx->ovnsb_idl) {
4328 struct ovn_datapath *od
4329 = ovn_datapath_from_sbrec(datapaths, sbflow->logical_datapath);
4330 if (!od) {
4331 sbrec_logical_flow_delete(sbflow);
4332 continue;
eb00399e 4333 }
eb00399e 4334
9975d7be 4335 enum ovn_datapath_type dp_type = od->nbs ? DP_SWITCH : DP_ROUTER;
880fcd14
BP
4336 enum ovn_pipeline pipeline
4337 = !strcmp(sbflow->pipeline, "ingress") ? P_IN : P_OUT;
5868eb24 4338 struct ovn_lflow *lflow = ovn_lflow_find(
880fcd14
BP
4339 &lflows, od, ovn_stage_build(dp_type, pipeline, sbflow->table_id),
4340 sbflow->priority, sbflow->match, sbflow->actions);
5868eb24
BP
4341 if (lflow) {
4342 ovn_lflow_destroy(&lflows, lflow);
4343 } else {
4344 sbrec_logical_flow_delete(sbflow);
4edcdcf4
RB
4345 }
4346 }
5868eb24
BP
4347 struct ovn_lflow *lflow, *next_lflow;
4348 HMAP_FOR_EACH_SAFE (lflow, next_lflow, hmap_node, &lflows) {
880fcd14
BP
4349 enum ovn_pipeline pipeline = ovn_stage_get_pipeline(lflow->stage);
4350 uint8_t table = ovn_stage_get_table(lflow->stage);
4351
5868eb24
BP
4352 sbflow = sbrec_logical_flow_insert(ctx->ovnsb_txn);
4353 sbrec_logical_flow_set_logical_datapath(sbflow, lflow->od->sb);
9975d7be
BP
4354 sbrec_logical_flow_set_pipeline(
4355 sbflow, pipeline == P_IN ? "ingress" : "egress");
880fcd14 4356 sbrec_logical_flow_set_table_id(sbflow, table);
5868eb24
BP
4357 sbrec_logical_flow_set_priority(sbflow, lflow->priority);
4358 sbrec_logical_flow_set_match(sbflow, lflow->match);
4359 sbrec_logical_flow_set_actions(sbflow, lflow->actions);
091e3af9 4360
d8026bbf
BP
4361 /* Trim the source locator lflow->where, which looks something like
4362 * "ovn/northd/ovn-northd.c:1234", down to just the part following the
4363 * last slash, e.g. "ovn-northd.c:1234". */
4364 const char *slash = strrchr(lflow->where, '/');
4365#if _WIN32
4366 const char *backslash = strrchr(lflow->where, '\\');
4367 if (!slash || backslash > slash) {
4368 slash = backslash;
4369 }
4370#endif
4371 const char *where = slash ? slash + 1 : lflow->where;
4372
4373 const struct smap ids = SMAP_CONST2(
4374 &ids,
4375 "stage-name", ovn_stage_to_str(lflow->stage),
4376 "source", where);
aaf881c6 4377 sbrec_logical_flow_set_external_ids(sbflow, &ids);
091e3af9 4378
5868eb24 4379 ovn_lflow_destroy(&lflows, lflow);
eb00399e 4380 }
5868eb24
BP
4381 hmap_destroy(&lflows);
4382
4383 /* Push changes to the Multicast_Group table to database. */
4384 const struct sbrec_multicast_group *sbmc, *next_sbmc;
4385 SBREC_MULTICAST_GROUP_FOR_EACH_SAFE (sbmc, next_sbmc, ctx->ovnsb_idl) {
4386 struct ovn_datapath *od = ovn_datapath_from_sbrec(datapaths,
4387 sbmc->datapath);
4388 if (!od) {
4389 sbrec_multicast_group_delete(sbmc);
4390 continue;
4391 }
eb00399e 4392
5868eb24
BP
4393 struct multicast_group group = { .name = sbmc->name,
4394 .key = sbmc->tunnel_key };
4395 struct ovn_multicast *mc = ovn_multicast_find(&mcgroups, od, &group);
4396 if (mc) {
4397 ovn_multicast_update_sbrec(mc, sbmc);
4398 ovn_multicast_destroy(&mcgroups, mc);
4399 } else {
4400 sbrec_multicast_group_delete(sbmc);
4401 }
4402 }
4403 struct ovn_multicast *mc, *next_mc;
4404 HMAP_FOR_EACH_SAFE (mc, next_mc, hmap_node, &mcgroups) {
4405 sbmc = sbrec_multicast_group_insert(ctx->ovnsb_txn);
4406 sbrec_multicast_group_set_datapath(sbmc, mc->datapath->sb);
4407 sbrec_multicast_group_set_name(sbmc, mc->group->name);
4408 sbrec_multicast_group_set_tunnel_key(sbmc, mc->group->key);
4409 ovn_multicast_update_sbrec(mc, sbmc);
4410 ovn_multicast_destroy(&mcgroups, mc);
4edcdcf4 4411 }
5868eb24 4412 hmap_destroy(&mcgroups);
4edcdcf4 4413}
ea382567
RB
4414
4415/* OVN_Northbound and OVN_Southbound have an identical Address_Set table.
4416 * We always update OVN_Southbound to match the current data in
4417 * OVN_Northbound, so that the address sets used in Logical_Flows in
4418 * OVN_Southbound is checked against the proper set.*/
4419static void
4420sync_address_sets(struct northd_context *ctx)
4421{
4422 struct shash sb_address_sets = SHASH_INITIALIZER(&sb_address_sets);
4423
4424 const struct sbrec_address_set *sb_address_set;
4425 SBREC_ADDRESS_SET_FOR_EACH (sb_address_set, ctx->ovnsb_idl) {
4426 shash_add(&sb_address_sets, sb_address_set->name, sb_address_set);
4427 }
4428
4429 const struct nbrec_address_set *nb_address_set;
4430 NBREC_ADDRESS_SET_FOR_EACH (nb_address_set, ctx->ovnnb_idl) {
4431 sb_address_set = shash_find_and_delete(&sb_address_sets,
4432 nb_address_set->name);
4433 if (!sb_address_set) {
4434 sb_address_set = sbrec_address_set_insert(ctx->ovnsb_txn);
4435 sbrec_address_set_set_name(sb_address_set, nb_address_set->name);
4436 }
4437
4438 sbrec_address_set_set_addresses(sb_address_set,
4439 /* "char **" is not compatible with "const char **" */
4440 (const char **) nb_address_set->addresses,
4441 nb_address_set->n_addresses);
4442 }
4443
4444 struct shash_node *node, *next;
4445 SHASH_FOR_EACH_SAFE (node, next, &sb_address_sets) {
4446 sbrec_address_set_delete(node->data);
4447 shash_delete(&sb_address_sets, node);
4448 }
4449 shash_destroy(&sb_address_sets);
4450}
5868eb24 4451\f
4edcdcf4 4452static void
fa183acc 4453ovnnb_db_run(struct northd_context *ctx, struct ovsdb_idl_loop *sb_loop)
4edcdcf4 4454{
b511690b 4455 if (!ctx->ovnsb_txn || !ctx->ovnnb_txn) {
331e7aef
NS
4456 return;
4457 }
5868eb24
BP
4458 struct hmap datapaths, ports;
4459 build_datapaths(ctx, &datapaths);
4460 build_ports(ctx, &datapaths, &ports);
b511690b 4461 build_ipam(&datapaths, &ports);
5868eb24
BP
4462 build_lflows(ctx, &datapaths, &ports);
4463
ea382567
RB
4464 sync_address_sets(ctx);
4465
5868eb24
BP
4466 struct ovn_datapath *dp, *next_dp;
4467 HMAP_FOR_EACH_SAFE (dp, next_dp, key_node, &datapaths) {
4468 ovn_datapath_destroy(&datapaths, dp);
4469 }
4470 hmap_destroy(&datapaths);
4471
4472 struct ovn_port *port, *next_port;
4473 HMAP_FOR_EACH_SAFE (port, next_port, key_node, &ports) {
4474 ovn_port_destroy(&ports, port);
4475 }
4476 hmap_destroy(&ports);
fa183acc
BP
4477
4478 /* Copy nb_cfg from northbound to southbound database.
4479 *
4480 * Also set up to update sb_cfg once our southbound transaction commits. */
4481 const struct nbrec_nb_global *nb = nbrec_nb_global_first(ctx->ovnnb_idl);
14338f22
GS
4482 if (!nb) {
4483 nb = nbrec_nb_global_insert(ctx->ovnnb_txn);
4484 }
fa183acc 4485 const struct sbrec_sb_global *sb = sbrec_sb_global_first(ctx->ovnsb_idl);
14338f22
GS
4486 if (!sb) {
4487 sb = sbrec_sb_global_insert(ctx->ovnsb_txn);
fa183acc 4488 }
14338f22
GS
4489 sbrec_sb_global_set_nb_cfg(sb, nb->nb_cfg);
4490 sb_loop->next_cfg = nb->nb_cfg;
8639f9be
ND
4491
4492 cleanup_macam(&macam);
ac0630a2
RB
4493}
4494
fa183acc
BP
4495/* Handle changes to the 'chassis' column of the 'Port_Binding' table. When
4496 * this column is not empty, it means we need to set the corresponding logical
4497 * port as 'up' in the northbound DB. */
ac0630a2 4498static void
fa183acc 4499update_logical_port_status(struct northd_context *ctx)
ac0630a2 4500{
fc3113bc 4501 struct hmap lports_hmap;
5868eb24 4502 const struct sbrec_port_binding *sb;
0ee00741 4503 const struct nbrec_logical_switch_port *nbsp;
fc3113bc
RB
4504
4505 struct lport_hash_node {
4506 struct hmap_node node;
0ee00741 4507 const struct nbrec_logical_switch_port *nbsp;
4ec3d7c7 4508 } *hash_node;
f93818dd 4509
fc3113bc 4510 hmap_init(&lports_hmap);
f93818dd 4511
0ee00741 4512 NBREC_LOGICAL_SWITCH_PORT_FOR_EACH(nbsp, ctx->ovnnb_idl) {
fc3113bc 4513 hash_node = xzalloc(sizeof *hash_node);
0ee00741
HK
4514 hash_node->nbsp = nbsp;
4515 hmap_insert(&lports_hmap, &hash_node->node, hash_string(nbsp->name, 0));
fc3113bc
RB
4516 }
4517
5868eb24 4518 SBREC_PORT_BINDING_FOR_EACH(sb, ctx->ovnsb_idl) {
0ee00741 4519 nbsp = NULL;
fc3113bc 4520 HMAP_FOR_EACH_WITH_HASH(hash_node, node,
5868eb24
BP
4521 hash_string(sb->logical_port, 0),
4522 &lports_hmap) {
0ee00741
HK
4523 if (!strcmp(sb->logical_port, hash_node->nbsp->name)) {
4524 nbsp = hash_node->nbsp;
fc3113bc
RB
4525 break;
4526 }
f93818dd
RB
4527 }
4528
0ee00741 4529 if (!nbsp) {
dcda6e0d 4530 /* The logical port doesn't exist for this port binding. This can
2e2762d4 4531 * happen under normal circumstances when ovn-northd hasn't gotten
dcda6e0d 4532 * around to pruning the Port_Binding yet. */
f93818dd
RB
4533 continue;
4534 }
4535
0ee00741 4536 if (sb->chassis && (!nbsp->up || !*nbsp->up)) {
f93818dd 4537 bool up = true;
0ee00741
HK
4538 nbrec_logical_switch_port_set_up(nbsp, &up, 1);
4539 } else if (!sb->chassis && (!nbsp->up || *nbsp->up)) {
f93818dd 4540 bool up = false;
0ee00741 4541 nbrec_logical_switch_port_set_up(nbsp, &up, 1);
f93818dd
RB
4542 }
4543 }
fc3113bc 4544
4ec3d7c7 4545 HMAP_FOR_EACH_POP(hash_node, node, &lports_hmap) {
fc3113bc
RB
4546 free(hash_node);
4547 }
4548 hmap_destroy(&lports_hmap);
ac0630a2 4549}
45f98d4c 4550
281977f7
NS
4551static struct dhcp_opts_map supported_dhcp_opts[] = {
4552 OFFERIP,
4553 DHCP_OPT_NETMASK,
4554 DHCP_OPT_ROUTER,
4555 DHCP_OPT_DNS_SERVER,
4556 DHCP_OPT_LOG_SERVER,
4557 DHCP_OPT_LPR_SERVER,
4558 DHCP_OPT_SWAP_SERVER,
4559 DHCP_OPT_POLICY_FILTER,
4560 DHCP_OPT_ROUTER_SOLICITATION,
4561 DHCP_OPT_NIS_SERVER,
4562 DHCP_OPT_NTP_SERVER,
4563 DHCP_OPT_SERVER_ID,
4564 DHCP_OPT_TFTP_SERVER,
4565 DHCP_OPT_CLASSLESS_STATIC_ROUTE,
4566 DHCP_OPT_MS_CLASSLESS_STATIC_ROUTE,
4567 DHCP_OPT_IP_FORWARD_ENABLE,
4568 DHCP_OPT_ROUTER_DISCOVERY,
4569 DHCP_OPT_ETHERNET_ENCAP,
4570 DHCP_OPT_DEFAULT_TTL,
4571 DHCP_OPT_TCP_TTL,
4572 DHCP_OPT_MTU,
4573 DHCP_OPT_LEASE_TIME,
4574 DHCP_OPT_T1,
4575 DHCP_OPT_T2
4576};
4577
33ac3c83
NS
4578static struct dhcp_opts_map supported_dhcpv6_opts[] = {
4579 DHCPV6_OPT_IA_ADDR,
4580 DHCPV6_OPT_SERVER_ID,
4581 DHCPV6_OPT_DOMAIN_SEARCH,
4582 DHCPV6_OPT_DNS_SERVER
4583};
4584
281977f7
NS
4585static void
4586check_and_add_supported_dhcp_opts_to_sb_db(struct northd_context *ctx)
4587{
4588 struct hmap dhcp_opts_to_add = HMAP_INITIALIZER(&dhcp_opts_to_add);
4589 for (size_t i = 0; (i < sizeof(supported_dhcp_opts) /
4590 sizeof(supported_dhcp_opts[0])); i++) {
4591 hmap_insert(&dhcp_opts_to_add, &supported_dhcp_opts[i].hmap_node,
4592 dhcp_opt_hash(supported_dhcp_opts[i].name));
4593 }
4594
4595 const struct sbrec_dhcp_options *opt_row, *opt_row_next;
4596 SBREC_DHCP_OPTIONS_FOR_EACH_SAFE(opt_row, opt_row_next, ctx->ovnsb_idl) {
4597 struct dhcp_opts_map *dhcp_opt =
4598 dhcp_opts_find(&dhcp_opts_to_add, opt_row->name);
4599 if (dhcp_opt) {
4600 hmap_remove(&dhcp_opts_to_add, &dhcp_opt->hmap_node);
4601 } else {
4602 sbrec_dhcp_options_delete(opt_row);
4603 }
4604 }
4605
4606 struct dhcp_opts_map *opt;
4607 HMAP_FOR_EACH (opt, hmap_node, &dhcp_opts_to_add) {
4608 struct sbrec_dhcp_options *sbrec_dhcp_option =
4609 sbrec_dhcp_options_insert(ctx->ovnsb_txn);
4610 sbrec_dhcp_options_set_name(sbrec_dhcp_option, opt->name);
4611 sbrec_dhcp_options_set_code(sbrec_dhcp_option, opt->code);
4612 sbrec_dhcp_options_set_type(sbrec_dhcp_option, opt->type);
4613 }
4614
4615 hmap_destroy(&dhcp_opts_to_add);
4616}
4617
33ac3c83
NS
4618static void
4619check_and_add_supported_dhcpv6_opts_to_sb_db(struct northd_context *ctx)
4620{
4621 struct hmap dhcpv6_opts_to_add = HMAP_INITIALIZER(&dhcpv6_opts_to_add);
4622 for (size_t i = 0; (i < sizeof(supported_dhcpv6_opts) /
4623 sizeof(supported_dhcpv6_opts[0])); i++) {
4624 hmap_insert(&dhcpv6_opts_to_add, &supported_dhcpv6_opts[i].hmap_node,
4625 dhcp_opt_hash(supported_dhcpv6_opts[i].name));
4626 }
4627
4628 const struct sbrec_dhcpv6_options *opt_row, *opt_row_next;
4629 SBREC_DHCPV6_OPTIONS_FOR_EACH_SAFE(opt_row, opt_row_next, ctx->ovnsb_idl) {
4630 struct dhcp_opts_map *dhcp_opt =
4631 dhcp_opts_find(&dhcpv6_opts_to_add, opt_row->name);
4632 if (dhcp_opt) {
4633 hmap_remove(&dhcpv6_opts_to_add, &dhcp_opt->hmap_node);
4634 } else {
4635 sbrec_dhcpv6_options_delete(opt_row);
4636 }
4637 }
4638
4639 struct dhcp_opts_map *opt;
4640 HMAP_FOR_EACH(opt, hmap_node, &dhcpv6_opts_to_add) {
4641 struct sbrec_dhcpv6_options *sbrec_dhcpv6_option =
4642 sbrec_dhcpv6_options_insert(ctx->ovnsb_txn);
4643 sbrec_dhcpv6_options_set_name(sbrec_dhcpv6_option, opt->name);
4644 sbrec_dhcpv6_options_set_code(sbrec_dhcpv6_option, opt->code);
4645 sbrec_dhcpv6_options_set_type(sbrec_dhcpv6_option, opt->type);
4646 }
4647
4648 hmap_destroy(&dhcpv6_opts_to_add);
4649}
4650
fa183acc
BP
4651/* Updates the sb_cfg and hv_cfg columns in the northbound NB_Global table. */
4652static void
4653update_northbound_cfg(struct northd_context *ctx,
4654 struct ovsdb_idl_loop *sb_loop)
4655{
4656 /* Update northbound sb_cfg if appropriate. */
4657 const struct nbrec_nb_global *nbg = nbrec_nb_global_first(ctx->ovnnb_idl);
4658 int64_t sb_cfg = sb_loop->cur_cfg;
4659 if (nbg && sb_cfg && nbg->sb_cfg != sb_cfg) {
4660 nbrec_nb_global_set_sb_cfg(nbg, sb_cfg);
4661 }
4662
4663 /* Update northbound hv_cfg if appropriate. */
4664 if (nbg) {
4665 /* Find minimum nb_cfg among all chassis. */
4666 const struct sbrec_chassis *chassis;
4667 int64_t hv_cfg = nbg->nb_cfg;
4668 SBREC_CHASSIS_FOR_EACH (chassis, ctx->ovnsb_idl) {
4669 if (chassis->nb_cfg < hv_cfg) {
4670 hv_cfg = chassis->nb_cfg;
4671 }
4672 }
4673
4674 /* Update hv_cfg. */
4675 if (nbg->hv_cfg != hv_cfg) {
4676 nbrec_nb_global_set_hv_cfg(nbg, hv_cfg);
4677 }
4678 }
4679}
4680
4681/* Handle a fairly small set of changes in the southbound database. */
4682static void
4683ovnsb_db_run(struct northd_context *ctx, struct ovsdb_idl_loop *sb_loop)
4684{
4685 if (!ctx->ovnnb_txn || !ovsdb_idl_has_ever_connected(ctx->ovnsb_idl)) {
4686 return;
4687 }
4688
4689 update_logical_port_status(ctx);
4690 update_northbound_cfg(ctx, sb_loop);
4691}
4692\f
ac0630a2
RB
4693static void
4694parse_options(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
4695{
4696 enum {
67d9b930 4697 DAEMON_OPTION_ENUMS,
ac0630a2 4698 VLOG_OPTION_ENUMS,
e18a1d08 4699 SSL_OPTION_ENUMS,
ac0630a2
RB
4700 };
4701 static const struct option long_options[] = {
ec78987f 4702 {"ovnsb-db", required_argument, NULL, 'd'},
ac0630a2
RB
4703 {"ovnnb-db", required_argument, NULL, 'D'},
4704 {"help", no_argument, NULL, 'h'},
4705 {"options", no_argument, NULL, 'o'},
4706 {"version", no_argument, NULL, 'V'},
67d9b930 4707 DAEMON_LONG_OPTIONS,
ac0630a2
RB
4708 VLOG_LONG_OPTIONS,
4709 STREAM_SSL_LONG_OPTIONS,
4710 {NULL, 0, NULL, 0},
4711 };
4712 char *short_options = ovs_cmdl_long_options_to_short_options(long_options);
4713
4714 for (;;) {
4715 int c;
4716
4717 c = getopt_long(argc, argv, short_options, long_options, NULL);
4718 if (c == -1) {
4719 break;
4720 }
4721
4722 switch (c) {
67d9b930 4723 DAEMON_OPTION_HANDLERS;
ac0630a2
RB
4724 VLOG_OPTION_HANDLERS;
4725 STREAM_SSL_OPTION_HANDLERS;
4726
4727 case 'd':
ec78987f 4728 ovnsb_db = optarg;
ac0630a2
RB
4729 break;
4730
4731 case 'D':
4732 ovnnb_db = optarg;
4733 break;
4734
4735 case 'h':
4736 usage();
4737 exit(EXIT_SUCCESS);
4738
4739 case 'o':
4740 ovs_cmdl_print_options(long_options);
4741 exit(EXIT_SUCCESS);
4742
4743 case 'V':
4744 ovs_print_version(0, 0);
4745 exit(EXIT_SUCCESS);
4746
4747 default:
4748 break;
4749 }
4750 }
4751
ec78987f 4752 if (!ovnsb_db) {
60bdd011 4753 ovnsb_db = default_sb_db();
ac0630a2
RB
4754 }
4755
4756 if (!ovnnb_db) {
60bdd011 4757 ovnnb_db = default_nb_db();
ac0630a2
RB
4758 }
4759
4760 free(short_options);
4761}
4762
5868eb24
BP
4763static void
4764add_column_noalert(struct ovsdb_idl *idl,
4765 const struct ovsdb_idl_column *column)
4766{
4767 ovsdb_idl_add_column(idl, column);
4768 ovsdb_idl_omit_alert(idl, column);
4769}
4770
ac0630a2
RB
4771int
4772main(int argc, char *argv[])
4773{
ac0630a2 4774 int res = EXIT_SUCCESS;
7b303ff9
AW
4775 struct unixctl_server *unixctl;
4776 int retval;
4777 bool exiting;
ac0630a2
RB
4778
4779 fatal_ignore_sigpipe();
4780 set_program_name(argv[0]);
485f0696 4781 service_start(&argc, &argv);
ac0630a2 4782 parse_options(argc, argv);
67d9b930 4783
e91b927d 4784 daemonize_start(false);
7b303ff9
AW
4785
4786 retval = unixctl_server_create(NULL, &unixctl);
4787 if (retval) {
4788 exit(EXIT_FAILURE);
4789 }
4790 unixctl_command_register("exit", "", 0, 0, ovn_northd_exit, &exiting);
4791
4792 daemonize_complete();
67d9b930 4793
fa183acc 4794 /* We want to detect (almost) all changes to the ovn-nb db. */
331e7aef
NS
4795 struct ovsdb_idl_loop ovnnb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
4796 ovsdb_idl_create(ovnnb_db, &nbrec_idl_class, true, true));
fa183acc
BP
4797 ovsdb_idl_omit_alert(ovnnb_idl_loop.idl, &nbrec_nb_global_col_sb_cfg);
4798 ovsdb_idl_omit_alert(ovnnb_idl_loop.idl, &nbrec_nb_global_col_hv_cfg);
331e7aef 4799
fa183acc 4800 /* We want to detect only selected changes to the ovn-sb db. */
331e7aef
NS
4801 struct ovsdb_idl_loop ovnsb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
4802 ovsdb_idl_create(ovnsb_db, &sbrec_idl_class, false, true));
4803
fa183acc
BP
4804 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_sb_global);
4805 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_sb_global_col_nb_cfg);
4806
331e7aef
NS
4807 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_logical_flow);
4808 add_column_noalert(ovnsb_idl_loop.idl,
4809 &sbrec_logical_flow_col_logical_datapath);
4810 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_pipeline);
4811 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_table_id);
4812 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_priority);
4813 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_match);
4814 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_actions);
4815
4816 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_multicast_group);
4817 add_column_noalert(ovnsb_idl_loop.idl,
4818 &sbrec_multicast_group_col_datapath);
4819 add_column_noalert(ovnsb_idl_loop.idl,
4820 &sbrec_multicast_group_col_tunnel_key);
4821 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_name);
4822 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_ports);
4823
4824 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_datapath_binding);
4825 add_column_noalert(ovnsb_idl_loop.idl,
4826 &sbrec_datapath_binding_col_tunnel_key);
4827 add_column_noalert(ovnsb_idl_loop.idl,
4828 &sbrec_datapath_binding_col_external_ids);
4829
4830 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_port_binding);
4831 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_datapath);
4832 add_column_noalert(ovnsb_idl_loop.idl,
4833 &sbrec_port_binding_col_logical_port);
4834 add_column_noalert(ovnsb_idl_loop.idl,
4835 &sbrec_port_binding_col_tunnel_key);
4836 add_column_noalert(ovnsb_idl_loop.idl,
4837 &sbrec_port_binding_col_parent_port);
4838 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_tag);
4839 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_type);
4840 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_options);
4841 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_mac);
4842 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_port_binding_col_chassis);
6e31816f
CSV
4843 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_mac_binding);
4844 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_datapath);
4845 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_ip);
4846 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_mac);
4847 add_column_noalert(ovnsb_idl_loop.idl,
4848 &sbrec_mac_binding_col_logical_port);
281977f7
NS
4849 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_dhcp_options);
4850 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_code);
4851 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_type);
4852 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_name);
33ac3c83
NS
4853 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_dhcpv6_options);
4854 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_code);
4855 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_type);
4856 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_name);
ea382567
RB
4857 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_address_set);
4858 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_name);
4859 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_addresses);
4860
fa183acc
BP
4861 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_chassis);
4862 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_nb_cfg);
4863
331e7aef 4864 /* Main loop. */
7b303ff9
AW
4865 exiting = false;
4866 while (!exiting) {
331e7aef
NS
4867 struct northd_context ctx = {
4868 .ovnnb_idl = ovnnb_idl_loop.idl,
4869 .ovnnb_txn = ovsdb_idl_loop_run(&ovnnb_idl_loop),
4870 .ovnsb_idl = ovnsb_idl_loop.idl,
4871 .ovnsb_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop),
4872 };
ac0630a2 4873
fa183acc
BP
4874 ovnnb_db_run(&ctx, &ovnsb_idl_loop);
4875 ovnsb_db_run(&ctx, &ovnsb_idl_loop);
281977f7
NS
4876 if (ctx.ovnsb_txn) {
4877 check_and_add_supported_dhcp_opts_to_sb_db(&ctx);
33ac3c83 4878 check_and_add_supported_dhcpv6_opts_to_sb_db(&ctx);
281977f7 4879 }
f93818dd 4880
331e7aef
NS
4881 unixctl_server_run(unixctl);
4882 unixctl_server_wait(unixctl);
4883 if (exiting) {
4884 poll_immediate_wake();
ac0630a2 4885 }
331e7aef
NS
4886 ovsdb_idl_loop_commit_and_wait(&ovnnb_idl_loop);
4887 ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);
ac0630a2 4888
331e7aef 4889 poll_block();
485f0696
GS
4890 if (should_service_stop()) {
4891 exiting = true;
4892 }
ac0630a2
RB
4893 }
4894
7b303ff9 4895 unixctl_server_destroy(unixctl);
331e7aef
NS
4896 ovsdb_idl_loop_destroy(&ovnnb_idl_loop);
4897 ovsdb_idl_loop_destroy(&ovnsb_idl_loop);
485f0696 4898 service_stop();
ac0630a2
RB
4899
4900 exit(res);
4901}
7b303ff9
AW
4902
4903static void
4904ovn_northd_exit(struct unixctl_conn *conn, int argc OVS_UNUSED,
4905 const char *argv[] OVS_UNUSED, void *exiting_)
4906{
4907 bool *exiting = exiting_;
4908 *exiting = true;
4909
4910 unixctl_command_reply(conn, NULL);
4911}