]> git.proxmox.com Git - mirror_ovs.git/blame - ovn/northd/ovn-northd.c
meta-flow: New functions mf_subfield_copy() and mf_subfield_swap().
[mirror_ovs.git] / ovn / northd / ovn-northd.c
CommitLineData
ac0630a2
RB
1/*
2 * Licensed under the Apache License, Version 2.0 (the "License");
3 * you may not use this file except in compliance with the License.
4 * You may obtain a copy of the License at:
5 *
6 * http://www.apache.org/licenses/LICENSE-2.0
7 *
8 * Unless required by applicable law or agreed to in writing, software
9 * distributed under the License is distributed on an "AS IS" BASIS,
10 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 * See the License for the specific language governing permissions and
12 * limitations under the License.
13 */
14
15#include <config.h>
16
17#include <getopt.h>
18#include <stdlib.h>
19#include <stdio.h>
20
21#include "command-line.h"
67d9b930 22#include "daemon.h"
ac0630a2 23#include "dirs.h"
3e8a2ad1 24#include "openvswitch/dynamic-string.h"
ac0630a2 25#include "fatal-signal.h"
4edcdcf4 26#include "hash.h"
ee89ea7b
TW
27#include "openvswitch/hmap.h"
28#include "openvswitch/json.h"
8b2ed684 29#include "ovn/lex.h"
281977f7 30#include "ovn/lib/ovn-dhcp.h"
e3df8838
BP
31#include "ovn/lib/ovn-nb-idl.h"
32#include "ovn/lib/ovn-sb-idl.h"
218351dd 33#include "ovn/lib/ovn-util.h"
064d7f84 34#include "packets.h"
ac0630a2 35#include "poll-loop.h"
5868eb24 36#include "smap.h"
7a15be69 37#include "sset.h"
ac0630a2
RB
38#include "stream.h"
39#include "stream-ssl.h"
7b303ff9 40#include "unixctl.h"
ac0630a2 41#include "util.h"
4edcdcf4 42#include "uuid.h"
ac0630a2
RB
43#include "openvswitch/vlog.h"
44
2e2762d4 45VLOG_DEFINE_THIS_MODULE(ovn_northd);
ac0630a2 46
7b303ff9
AW
47static unixctl_cb_func ovn_northd_exit;
48
2e2762d4 49struct northd_context {
f93818dd 50 struct ovsdb_idl *ovnnb_idl;
ec78987f 51 struct ovsdb_idl *ovnsb_idl;
f93818dd 52 struct ovsdb_idl_txn *ovnnb_txn;
3c78b3ca 53 struct ovsdb_idl_txn *ovnsb_txn;
f93818dd
RB
54};
55
ac0630a2 56static const char *ovnnb_db;
ec78987f 57static const char *ovnsb_db;
ac0630a2 58
8639f9be
ND
59#define MAC_ADDR_PREFIX 0x0A0000000000ULL
60#define MAC_ADDR_SPACE 0xffffff
61
62/* MAC address management (macam) table of "struct eth_addr"s, that holds the
63 * MAC addresses allocated by the OVN ipam module. */
64static struct hmap macam = HMAP_INITIALIZER(&macam);
880fcd14
BP
65\f
66/* Pipeline stages. */
ac0630a2 67
880fcd14
BP
68/* The two pipelines in an OVN logical flow table. */
69enum ovn_pipeline {
70 P_IN, /* Ingress pipeline. */
71 P_OUT /* Egress pipeline. */
72};
091e3af9 73
880fcd14
BP
74/* The two purposes for which ovn-northd uses OVN logical datapaths. */
75enum ovn_datapath_type {
76 DP_SWITCH, /* OVN logical switch. */
77 DP_ROUTER /* OVN logical router. */
091e3af9
JP
78};
79
880fcd14
BP
80/* Returns an "enum ovn_stage" built from the arguments.
81 *
82 * (It's better to use ovn_stage_build() for type-safety reasons, but inline
83 * functions can't be used in enums or switch cases.) */
84#define OVN_STAGE_BUILD(DP_TYPE, PIPELINE, TABLE) \
85 (((DP_TYPE) << 9) | ((PIPELINE) << 8) | (TABLE))
86
87/* A stage within an OVN logical switch or router.
091e3af9 88 *
880fcd14
BP
89 * An "enum ovn_stage" indicates whether the stage is part of a logical switch
90 * or router, whether the stage is part of the ingress or egress pipeline, and
91 * the table within that pipeline. The first three components are combined to
685f4dfe 92 * form the stage's full name, e.g. S_SWITCH_IN_PORT_SEC_L2,
880fcd14
BP
93 * S_ROUTER_OUT_DELIVERY. */
94enum ovn_stage {
e0c9e58b
JP
95#define PIPELINE_STAGES \
96 /* Logical switch ingress stages. */ \
685f4dfe
NS
97 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_L2, 0, "ls_in_port_sec_l2") \
98 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_IP, 1, "ls_in_port_sec_ip") \
99 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_ND, 2, "ls_in_port_sec_nd") \
100 PIPELINE_STAGE(SWITCH, IN, PRE_ACL, 3, "ls_in_pre_acl") \
7a15be69
GS
101 PIPELINE_STAGE(SWITCH, IN, PRE_LB, 4, "ls_in_pre_lb") \
102 PIPELINE_STAGE(SWITCH, IN, PRE_STATEFUL, 5, "ls_in_pre_stateful") \
103 PIPELINE_STAGE(SWITCH, IN, ACL, 6, "ls_in_acl") \
104 PIPELINE_STAGE(SWITCH, IN, LB, 7, "ls_in_lb") \
105 PIPELINE_STAGE(SWITCH, IN, STATEFUL, 8, "ls_in_stateful") \
106 PIPELINE_STAGE(SWITCH, IN, ARP_ND_RSP, 9, "ls_in_arp_rsp") \
281977f7
NS
107 PIPELINE_STAGE(SWITCH, IN, DHCP_OPTIONS, 10, "ls_in_dhcp_options") \
108 PIPELINE_STAGE(SWITCH, IN, DHCP_RESPONSE, 11, "ls_in_dhcp_response") \
109 PIPELINE_STAGE(SWITCH, IN, L2_LKUP, 12, "ls_in_l2_lkup") \
e0c9e58b
JP
110 \
111 /* Logical switch egress stages. */ \
7a15be69
GS
112 PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 0, "ls_out_pre_lb") \
113 PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 1, "ls_out_pre_acl") \
114 PIPELINE_STAGE(SWITCH, OUT, PRE_STATEFUL, 2, "ls_out_pre_stateful") \
115 PIPELINE_STAGE(SWITCH, OUT, LB, 3, "ls_out_lb") \
116 PIPELINE_STAGE(SWITCH, OUT, ACL, 4, "ls_out_acl") \
117 PIPELINE_STAGE(SWITCH, OUT, STATEFUL, 5, "ls_out_stateful") \
118 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_IP, 6, "ls_out_port_sec_ip") \
119 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_L2, 7, "ls_out_port_sec_l2") \
e0c9e58b
JP
120 \
121 /* Logical router ingress stages. */ \
122 PIPELINE_STAGE(ROUTER, IN, ADMISSION, 0, "lr_in_admission") \
123 PIPELINE_STAGE(ROUTER, IN, IP_INPUT, 1, "lr_in_ip_input") \
de297547
GS
124 PIPELINE_STAGE(ROUTER, IN, UNSNAT, 2, "lr_in_unsnat") \
125 PIPELINE_STAGE(ROUTER, IN, DNAT, 3, "lr_in_dnat") \
126 PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 4, "lr_in_ip_routing") \
127 PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 5, "lr_in_arp_resolve") \
128 PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 6, "lr_in_arp_request") \
e0c9e58b
JP
129 \
130 /* Logical router egress stages. */ \
de297547
GS
131 PIPELINE_STAGE(ROUTER, OUT, SNAT, 0, "lr_out_snat") \
132 PIPELINE_STAGE(ROUTER, OUT, DELIVERY, 1, "lr_out_delivery")
880fcd14
BP
133
134#define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
135 S_##DP_TYPE##_##PIPELINE##_##STAGE \
136 = OVN_STAGE_BUILD(DP_##DP_TYPE, P_##PIPELINE, TABLE),
137 PIPELINE_STAGES
138#undef PIPELINE_STAGE
091e3af9
JP
139};
140
6bb4a18e
JP
141/* Due to various hard-coded priorities need to implement ACLs, the
142 * northbound database supports a smaller range of ACL priorities than
143 * are available to logical flows. This value is added to an ACL
144 * priority to determine the ACL's logical flow priority. */
145#define OVN_ACL_PRI_OFFSET 1000
146
facf8652 147#define REGBIT_CONNTRACK_DEFRAG "reg0[0]"
fa313a8c 148#define REGBIT_CONNTRACK_COMMIT "reg0[1]"
7a15be69 149#define REGBIT_CONNTRACK_NAT "reg0[2]"
281977f7 150#define REGBIT_DHCP_OPTS_RESULT "reg0[3]"
facf8652 151
880fcd14
BP
152/* Returns an "enum ovn_stage" built from the arguments. */
153static enum ovn_stage
154ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline,
155 uint8_t table)
156{
157 return OVN_STAGE_BUILD(dp_type, pipeline, table);
158}
159
160/* Returns the pipeline to which 'stage' belongs. */
161static enum ovn_pipeline
162ovn_stage_get_pipeline(enum ovn_stage stage)
163{
164 return (stage >> 8) & 1;
165}
166
167/* Returns the table to which 'stage' belongs. */
168static uint8_t
169ovn_stage_get_table(enum ovn_stage stage)
170{
171 return stage & 0xff;
172}
173
174/* Returns a string name for 'stage'. */
175static const char *
176ovn_stage_to_str(enum ovn_stage stage)
177{
178 switch (stage) {
179#define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
180 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return NAME;
181 PIPELINE_STAGES
182#undef PIPELINE_STAGE
183 default: return "<unknown>";
184 }
185}
9a9961d2
BP
186
187/* Returns the type of the datapath to which a flow with the given 'stage' may
188 * be added. */
189static enum ovn_datapath_type
190ovn_stage_to_datapath_type(enum ovn_stage stage)
191{
192 switch (stage) {
193#define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
194 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return DP_##DP_TYPE;
195 PIPELINE_STAGES
196#undef PIPELINE_STAGE
197 default: OVS_NOT_REACHED();
198 }
199}
880fcd14 200\f
ac0630a2
RB
201static void
202usage(void)
203{
204 printf("\
205%s: OVN northbound management daemon\n\
206usage: %s [OPTIONS]\n\
207\n\
208Options:\n\
209 --ovnnb-db=DATABASE connect to ovn-nb database at DATABASE\n\
210 (default: %s)\n\
ec78987f 211 --ovnsb-db=DATABASE connect to ovn-sb database at DATABASE\n\
ac0630a2
RB
212 (default: %s)\n\
213 -h, --help display this help message\n\
214 -o, --options list available options\n\
215 -V, --version display version information\n\
60bdd011 216", program_name, program_name, default_nb_db(), default_sb_db());
67d9b930 217 daemon_usage();
ac0630a2
RB
218 vlog_usage();
219 stream_usage("database", true, true, false);
220}
221\f
5868eb24
BP
222struct tnlid_node {
223 struct hmap_node hmap_node;
224 uint32_t tnlid;
225};
226
227static void
228destroy_tnlids(struct hmap *tnlids)
4edcdcf4 229{
4ec3d7c7
DDP
230 struct tnlid_node *node;
231 HMAP_FOR_EACH_POP (node, hmap_node, tnlids) {
5868eb24
BP
232 free(node);
233 }
234 hmap_destroy(tnlids);
235}
236
237static void
238add_tnlid(struct hmap *set, uint32_t tnlid)
239{
240 struct tnlid_node *node = xmalloc(sizeof *node);
241 hmap_insert(set, &node->hmap_node, hash_int(tnlid, 0));
242 node->tnlid = tnlid;
4edcdcf4
RB
243}
244
4edcdcf4 245static bool
5868eb24 246tnlid_in_use(const struct hmap *set, uint32_t tnlid)
4edcdcf4 247{
5868eb24
BP
248 const struct tnlid_node *node;
249 HMAP_FOR_EACH_IN_BUCKET (node, hmap_node, hash_int(tnlid, 0), set) {
250 if (node->tnlid == tnlid) {
251 return true;
252 }
253 }
254 return false;
255}
4edcdcf4 256
5868eb24
BP
257static uint32_t
258allocate_tnlid(struct hmap *set, const char *name, uint32_t max,
259 uint32_t *hint)
260{
261 for (uint32_t tnlid = *hint + 1; tnlid != *hint;
262 tnlid = tnlid + 1 <= max ? tnlid + 1 : 1) {
263 if (!tnlid_in_use(set, tnlid)) {
264 add_tnlid(set, tnlid);
265 *hint = tnlid;
266 return tnlid;
267 }
4edcdcf4
RB
268 }
269
5868eb24
BP
270 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
271 VLOG_WARN_RL(&rl, "all %s tunnel ids exhausted", name);
272 return 0;
273}
274\f
9975d7be
BP
275/* The 'key' comes from nbs->header_.uuid or nbr->header_.uuid or
276 * sb->external_ids:logical-switch. */
5868eb24
BP
277struct ovn_datapath {
278 struct hmap_node key_node; /* Index on 'key'. */
9975d7be 279 struct uuid key; /* (nbs/nbr)->header_.uuid. */
4edcdcf4 280
9975d7be
BP
281 const struct nbrec_logical_switch *nbs; /* May be NULL. */
282 const struct nbrec_logical_router *nbr; /* May be NULL. */
5868eb24 283 const struct sbrec_datapath_binding *sb; /* May be NULL. */
4edcdcf4 284
5868eb24 285 struct ovs_list list; /* In list of similar records. */
4edcdcf4 286
9975d7be 287 /* Logical switch data. */
86e98048
BP
288 struct ovn_port **router_ports;
289 size_t n_router_ports;
9975d7be 290
5868eb24
BP
291 struct hmap port_tnlids;
292 uint32_t port_key_hint;
293
294 bool has_unknown;
8639f9be
ND
295
296 /* IPAM data. */
297 struct hmap ipam;
298};
299
300struct macam_node {
301 struct hmap_node hmap_node;
302 struct eth_addr mac_addr; /* Allocated MAC address. */
5868eb24
BP
303};
304
8639f9be
ND
305static void
306cleanup_macam(struct hmap *macam)
307{
308 struct macam_node *node;
309 HMAP_FOR_EACH_POP (node, hmap_node, macam) {
310 free(node);
311 }
312}
313
314struct ipam_node {
315 struct hmap_node hmap_node;
316 uint32_t ip_addr; /* Allocated IP address. */
317};
318
319static void
320destroy_ipam(struct hmap *ipam)
321{
322 struct ipam_node *node;
323 HMAP_FOR_EACH_POP (node, hmap_node, ipam) {
324 free(node);
325 }
326 hmap_destroy(ipam);
327}
328
5868eb24
BP
329static struct ovn_datapath *
330ovn_datapath_create(struct hmap *datapaths, const struct uuid *key,
9975d7be
BP
331 const struct nbrec_logical_switch *nbs,
332 const struct nbrec_logical_router *nbr,
5868eb24
BP
333 const struct sbrec_datapath_binding *sb)
334{
335 struct ovn_datapath *od = xzalloc(sizeof *od);
336 od->key = *key;
337 od->sb = sb;
9975d7be
BP
338 od->nbs = nbs;
339 od->nbr = nbr;
5868eb24 340 hmap_init(&od->port_tnlids);
8639f9be 341 hmap_init(&od->ipam);
5868eb24
BP
342 od->port_key_hint = 0;
343 hmap_insert(datapaths, &od->key_node, uuid_hash(&od->key));
344 return od;
345}
346
347static void
348ovn_datapath_destroy(struct hmap *datapaths, struct ovn_datapath *od)
349{
350 if (od) {
351 /* Don't remove od->list. It is used within build_datapaths() as a
352 * private list and once we've exited that function it is not safe to
353 * use it. */
354 hmap_remove(datapaths, &od->key_node);
355 destroy_tnlids(&od->port_tnlids);
8639f9be 356 destroy_ipam(&od->ipam);
86e98048 357 free(od->router_ports);
5868eb24
BP
358 free(od);
359 }
360}
361
9a9961d2
BP
362/* Returns 'od''s datapath type. */
363static enum ovn_datapath_type
364ovn_datapath_get_type(const struct ovn_datapath *od)
365{
366 return od->nbs ? DP_SWITCH : DP_ROUTER;
367}
368
5868eb24
BP
369static struct ovn_datapath *
370ovn_datapath_find(struct hmap *datapaths, const struct uuid *uuid)
371{
372 struct ovn_datapath *od;
373
374 HMAP_FOR_EACH_WITH_HASH (od, key_node, uuid_hash(uuid), datapaths) {
375 if (uuid_equals(uuid, &od->key)) {
376 return od;
377 }
378 }
379 return NULL;
380}
381
382static struct ovn_datapath *
383ovn_datapath_from_sbrec(struct hmap *datapaths,
384 const struct sbrec_datapath_binding *sb)
385{
386 struct uuid key;
387
9975d7be
BP
388 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
389 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
5868eb24
BP
390 return NULL;
391 }
392 return ovn_datapath_find(datapaths, &key);
393}
394
5412db30
J
395static bool
396lrouter_is_enabled(const struct nbrec_logical_router *lrouter)
397{
398 return !lrouter->enabled || *lrouter->enabled;
399}
400
5868eb24
BP
401static void
402join_datapaths(struct northd_context *ctx, struct hmap *datapaths,
403 struct ovs_list *sb_only, struct ovs_list *nb_only,
404 struct ovs_list *both)
405{
406 hmap_init(datapaths);
417e7e66
BW
407 ovs_list_init(sb_only);
408 ovs_list_init(nb_only);
409 ovs_list_init(both);
5868eb24
BP
410
411 const struct sbrec_datapath_binding *sb, *sb_next;
412 SBREC_DATAPATH_BINDING_FOR_EACH_SAFE (sb, sb_next, ctx->ovnsb_idl) {
413 struct uuid key;
9975d7be
BP
414 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
415 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
416 ovsdb_idl_txn_add_comment(
417 ctx->ovnsb_txn,
418 "deleting Datapath_Binding "UUID_FMT" that lacks "
419 "external-ids:logical-switch and "
420 "external-ids:logical-router",
421 UUID_ARGS(&sb->header_.uuid));
5868eb24
BP
422 sbrec_datapath_binding_delete(sb);
423 continue;
424 }
425
426 if (ovn_datapath_find(datapaths, &key)) {
427 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
9975d7be
BP
428 VLOG_INFO_RL(
429 &rl, "deleting Datapath_Binding "UUID_FMT" with "
430 "duplicate external-ids:logical-switch/router "UUID_FMT,
431 UUID_ARGS(&sb->header_.uuid), UUID_ARGS(&key));
5868eb24
BP
432 sbrec_datapath_binding_delete(sb);
433 continue;
434 }
435
436 struct ovn_datapath *od = ovn_datapath_create(datapaths, &key,
9975d7be 437 NULL, NULL, sb);
417e7e66 438 ovs_list_push_back(sb_only, &od->list);
5868eb24
BP
439 }
440
9975d7be
BP
441 const struct nbrec_logical_switch *nbs;
442 NBREC_LOGICAL_SWITCH_FOR_EACH (nbs, ctx->ovnnb_idl) {
5868eb24 443 struct ovn_datapath *od = ovn_datapath_find(datapaths,
9975d7be 444 &nbs->header_.uuid);
5868eb24 445 if (od) {
9975d7be 446 od->nbs = nbs;
417e7e66
BW
447 ovs_list_remove(&od->list);
448 ovs_list_push_back(both, &od->list);
5868eb24 449 } else {
9975d7be
BP
450 od = ovn_datapath_create(datapaths, &nbs->header_.uuid,
451 nbs, NULL, NULL);
417e7e66 452 ovs_list_push_back(nb_only, &od->list);
5868eb24
BP
453 }
454 }
9975d7be
BP
455
456 const struct nbrec_logical_router *nbr;
457 NBREC_LOGICAL_ROUTER_FOR_EACH (nbr, ctx->ovnnb_idl) {
5412db30
J
458 if (!lrouter_is_enabled(nbr)) {
459 continue;
460 }
461
9975d7be
BP
462 struct ovn_datapath *od = ovn_datapath_find(datapaths,
463 &nbr->header_.uuid);
464 if (od) {
465 if (!od->nbs) {
466 od->nbr = nbr;
417e7e66
BW
467 ovs_list_remove(&od->list);
468 ovs_list_push_back(both, &od->list);
9975d7be
BP
469 } else {
470 /* Can't happen! */
471 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
472 VLOG_WARN_RL(&rl,
473 "duplicate UUID "UUID_FMT" in OVN_Northbound",
474 UUID_ARGS(&nbr->header_.uuid));
475 continue;
476 }
477 } else {
478 od = ovn_datapath_create(datapaths, &nbr->header_.uuid,
479 NULL, nbr, NULL);
417e7e66 480 ovs_list_push_back(nb_only, &od->list);
9975d7be 481 }
9975d7be 482 }
5868eb24
BP
483}
484
485static uint32_t
486ovn_datapath_allocate_key(struct hmap *dp_tnlids)
487{
488 static uint32_t hint;
489 return allocate_tnlid(dp_tnlids, "datapath", (1u << 24) - 1, &hint);
490}
491
0bac7164
BP
492/* Updates the southbound Datapath_Binding table so that it contains the
493 * logical switches and routers specified by the northbound database.
494 *
495 * Initializes 'datapaths' to contain a "struct ovn_datapath" for every logical
496 * switch and router. */
5868eb24
BP
497static void
498build_datapaths(struct northd_context *ctx, struct hmap *datapaths)
499{
500 struct ovs_list sb_only, nb_only, both;
501
502 join_datapaths(ctx, datapaths, &sb_only, &nb_only, &both);
503
417e7e66 504 if (!ovs_list_is_empty(&nb_only)) {
5868eb24
BP
505 /* First index the in-use datapath tunnel IDs. */
506 struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
507 struct ovn_datapath *od;
508 LIST_FOR_EACH (od, list, &both) {
509 add_tnlid(&dp_tnlids, od->sb->tunnel_key);
510 }
511
512 /* Add southbound record for each unmatched northbound record. */
513 LIST_FOR_EACH (od, list, &nb_only) {
514 uint16_t tunnel_key = ovn_datapath_allocate_key(&dp_tnlids);
515 if (!tunnel_key) {
516 break;
517 }
518
519 od->sb = sbrec_datapath_binding_insert(ctx->ovnsb_txn);
520
5868eb24 521 char uuid_s[UUID_LEN + 1];
9975d7be
BP
522 sprintf(uuid_s, UUID_FMT, UUID_ARGS(&od->key));
523 const char *key = od->nbs ? "logical-switch" : "logical-router";
524 const struct smap id = SMAP_CONST1(&id, key, uuid_s);
aaf881c6 525 sbrec_datapath_binding_set_external_ids(od->sb, &id);
5868eb24
BP
526
527 sbrec_datapath_binding_set_tunnel_key(od->sb, tunnel_key);
528 }
529 destroy_tnlids(&dp_tnlids);
530 }
531
532 /* Delete southbound records without northbound matches. */
533 struct ovn_datapath *od, *next;
534 LIST_FOR_EACH_SAFE (od, next, list, &sb_only) {
417e7e66 535 ovs_list_remove(&od->list);
5868eb24
BP
536 sbrec_datapath_binding_delete(od->sb);
537 ovn_datapath_destroy(datapaths, od);
538 }
539}
540\f
541struct ovn_port {
542 struct hmap_node key_node; /* Index on 'key'. */
9975d7be
BP
543 char *key; /* nbs->name, nbr->name, sb->logical_port. */
544 char *json_key; /* 'key', quoted for use in JSON. */
5868eb24 545
9975d7be
BP
546 const struct sbrec_port_binding *sb; /* May be NULL. */
547
e93b43d6 548 /* Logical switch port data. */
0ee00741 549 const struct nbrec_logical_switch_port *nbsp; /* May be NULL. */
e93b43d6
JP
550
551 struct lport_addresses *lsp_addrs; /* Logical switch port addresses. */
552 unsigned int n_lsp_addrs;
553
554 struct lport_addresses *ps_addrs; /* Port security addresses. */
555 unsigned int n_ps_addrs;
556
9975d7be 557 /* Logical router port data. */
0ee00741 558 const struct nbrec_logical_router_port *nbrp; /* May be NULL. */
e93b43d6 559
4685e523 560 struct lport_addresses lrp_networks;
c9bdf7bd 561
ad386c3f
BP
562 /* The port's peer:
563 *
564 * - A switch port S of type "router" has a router port R as a peer,
565 * and R in turn has S has its peer.
566 *
567 * - Two connected logical router ports have each other as peer. */
9975d7be 568 struct ovn_port *peer;
5868eb24
BP
569
570 struct ovn_datapath *od;
571
572 struct ovs_list list; /* In list of similar records. */
573};
574
575static struct ovn_port *
576ovn_port_create(struct hmap *ports, const char *key,
0ee00741
HK
577 const struct nbrec_logical_switch_port *nbsp,
578 const struct nbrec_logical_router_port *nbrp,
5868eb24
BP
579 const struct sbrec_port_binding *sb)
580{
581 struct ovn_port *op = xzalloc(sizeof *op);
9975d7be
BP
582
583 struct ds json_key = DS_EMPTY_INITIALIZER;
584 json_string_escape(key, &json_key);
585 op->json_key = ds_steal_cstr(&json_key);
586
587 op->key = xstrdup(key);
5868eb24 588 op->sb = sb;
0ee00741
HK
589 op->nbsp = nbsp;
590 op->nbrp = nbrp;
5868eb24
BP
591 hmap_insert(ports, &op->key_node, hash_string(op->key, 0));
592 return op;
593}
594
595static void
596ovn_port_destroy(struct hmap *ports, struct ovn_port *port)
597{
598 if (port) {
599 /* Don't remove port->list. It is used within build_ports() as a
600 * private list and once we've exited that function it is not safe to
601 * use it. */
602 hmap_remove(ports, &port->key_node);
e93b43d6
JP
603
604 for (int i = 0; i < port->n_lsp_addrs; i++) {
605 destroy_lport_addresses(&port->lsp_addrs[i]);
606 }
607 free(port->lsp_addrs);
608
609 for (int i = 0; i < port->n_ps_addrs; i++) {
610 destroy_lport_addresses(&port->ps_addrs[i]);
611 }
612 free(port->ps_addrs);
613
4685e523 614 destroy_lport_addresses(&port->lrp_networks);
9975d7be
BP
615 free(port->json_key);
616 free(port->key);
5868eb24
BP
617 free(port);
618 }
619}
620
621static struct ovn_port *
622ovn_port_find(struct hmap *ports, const char *name)
623{
624 struct ovn_port *op;
625
626 HMAP_FOR_EACH_WITH_HASH (op, key_node, hash_string(name, 0), ports) {
627 if (!strcmp(op->key, name)) {
628 return op;
629 }
630 }
631 return NULL;
632}
633
634static uint32_t
635ovn_port_allocate_key(struct ovn_datapath *od)
636{
637 return allocate_tnlid(&od->port_tnlids, "port",
638 (1u << 15) - 1, &od->port_key_hint);
639}
640
8639f9be
ND
641static bool
642ipam_is_duplicate_mac(struct eth_addr *ea, uint64_t mac64, bool warn)
643{
644 struct macam_node *macam_node;
645 HMAP_FOR_EACH_WITH_HASH (macam_node, hmap_node, hash_uint64(mac64),
646 &macam) {
647 if (eth_addr_equals(*ea, macam_node->mac_addr)) {
648 if (warn) {
649 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
650 VLOG_WARN_RL(&rl, "Duplicate MAC set: "ETH_ADDR_FMT,
651 ETH_ADDR_ARGS(macam_node->mac_addr));
652 }
653 return true;
654 }
655 }
656 return false;
657}
658
659static bool
660ipam_is_duplicate_ip(struct ovn_datapath *od, uint32_t ip, bool warn)
661{
662 struct ipam_node *ipam_node;
663 HMAP_FOR_EACH_WITH_HASH (ipam_node, hmap_node, hash_int(ip, 0),
664 &od->ipam) {
665 if (ipam_node->ip_addr == ip) {
666 if (warn) {
667 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
668 VLOG_WARN_RL(&rl, "Duplicate IP set: "IP_FMT,
669 IP_ARGS(htonl(ip)));
670 }
671 return true;
672 }
673 }
674 return false;
675}
676
677static void
678ipam_insert_mac(struct eth_addr *ea, bool check)
679{
680 if (!ea) {
681 return;
682 }
683
684 uint64_t mac64 = eth_addr_to_uint64(*ea);
685 /* If the new MAC was not assigned by this address management system or
686 * check is true and the new MAC is a duplicate, do not insert it into the
687 * macam hmap. */
688 if (((mac64 ^ MAC_ADDR_PREFIX) >> 24)
689 || (check && ipam_is_duplicate_mac(ea, mac64, true))) {
690 return;
691 }
692
693 struct macam_node *new_macam_node = xmalloc(sizeof *new_macam_node);
694 new_macam_node->mac_addr = *ea;
695 hmap_insert(&macam, &new_macam_node->hmap_node, hash_uint64(mac64));
696}
697
698static void
699ipam_insert_ip(struct ovn_datapath *od, uint32_t ip, bool check)
700{
701 if (!od) {
702 return;
703 }
704
705 if (check && ipam_is_duplicate_ip(od, ip, true)) {
706 return;
707 }
708
709 struct ipam_node *new_ipam_node = xmalloc(sizeof *new_ipam_node);
710 new_ipam_node->ip_addr = ip;
711 hmap_insert(&od->ipam, &new_ipam_node->hmap_node, hash_int(ip, 0));
712}
713
714static void
715ipam_insert_lsp_addresses(struct ovn_datapath *od, struct ovn_port *op,
716 char *address)
717{
718 if (!od || !op || !address || !strcmp(address, "unknown")
719 || !strcmp(address, "dynamic")) {
720 return;
721 }
722
723 struct lport_addresses laddrs;
724 if (!extract_lsp_addresses(address, &laddrs)) {
725 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
726 VLOG_WARN_RL(&rl, "Extract addresses failed.");
727 return;
728 }
729 ipam_insert_mac(&laddrs.ea, true);
730
731 /* IP is only added to IPAM if the switch's subnet option
732 * is set, whereas MAC is always added to MACAM. */
733 if (!smap_get(&od->nbs->other_config, "subnet")) {
734 destroy_lport_addresses(&laddrs);
735 return;
736 }
737
738 for (size_t j = 0; j < laddrs.n_ipv4_addrs; j++) {
739 uint32_t ip = ntohl(laddrs.ipv4_addrs[j].addr);
740 ipam_insert_ip(od, ip, true);
741 }
742
743 destroy_lport_addresses(&laddrs);
744}
745
746static void
747ipam_add_port_addresses(struct ovn_datapath *od, struct ovn_port *op)
748{
749 if (!od || !op) {
750 return;
751 }
752
753 if (op->nbsp) {
754 /* Add all the port's addresses to address data structures. */
755 for (size_t i = 0; i < op->nbsp->n_addresses; i++) {
756 ipam_insert_lsp_addresses(od, op, op->nbsp->addresses[i]);
757 }
758 if (op->nbsp->dynamic_addresses) {
759 ipam_insert_lsp_addresses(od, op, op->nbsp->dynamic_addresses);
760 }
761 } else if (op->nbrp) {
762 struct lport_addresses lrp_networks;
763 if (!extract_lrp_networks(op->nbrp, &lrp_networks)) {
764 static struct vlog_rate_limit rl
765 = VLOG_RATE_LIMIT_INIT(1, 1);
766 VLOG_WARN_RL(&rl, "Extract addresses failed.");
767 return;
768 }
769 ipam_insert_mac(&lrp_networks.ea, true);
770
771 if (!op->peer || !op->peer->nbsp || !op->peer->od || !op->peer->od->nbs
772 || !smap_get(&op->peer->od->nbs->other_config, "subnet")) {
773 destroy_lport_addresses(&lrp_networks);
774 return;
775 }
776
777 for (size_t i = 0; i < lrp_networks.n_ipv4_addrs; i++) {
778 uint32_t ip = ntohl(lrp_networks.ipv4_addrs[i].addr);
779 ipam_insert_ip(op->peer->od, ip, true);
780 }
781
782 destroy_lport_addresses(&lrp_networks);
783 }
784}
785
786static uint64_t
787ipam_get_unused_mac(void)
788{
789 /* Stores the suffix of the most recently ipam-allocated MAC address. */
790 static uint32_t last_mac;
791
792 uint64_t mac64;
793 struct eth_addr mac;
794 uint32_t mac_addr_suffix, i;
795 for (i = 0; i < MAC_ADDR_SPACE - 1; i++) {
796 /* The tentative MAC's suffix will be in the interval (1, 0xfffffe). */
797 mac_addr_suffix = ((last_mac + i) % (MAC_ADDR_SPACE - 1)) + 1;
798 mac64 = MAC_ADDR_PREFIX | mac_addr_suffix;
799 eth_addr_from_uint64(mac64, &mac);
800 if (!ipam_is_duplicate_mac(&mac, mac64, false)) {
801 last_mac = mac_addr_suffix;
802 break;
803 }
804 }
805
806 if (i == MAC_ADDR_SPACE) {
807 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
808 VLOG_WARN_RL(&rl, "MAC address space exhausted.");
809 mac64 = 0;
810 }
811
812 return mac64;
813}
814
815static uint32_t
816ipam_get_unused_ip(struct ovn_datapath *od, uint32_t subnet, uint32_t mask)
817{
818 if (!od) {
819 return 0;
820 }
821
822 uint32_t ip = 0;
823
824 /* Find an unused IP address in subnet. x.x.x.1 is reserved for a
825 * logical router port. */
826 for (uint32_t i = 2; i < ~mask; i++) {
827 uint32_t tentative_ip = subnet + i;
828 if (!ipam_is_duplicate_ip(od, tentative_ip, false)) {
829 ip = tentative_ip;
830 break;
831 }
832 }
833
834 if (!ip) {
835 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
836 VLOG_WARN_RL( &rl, "Subnet address space has been exhausted.");
837 }
838
839 return ip;
840}
841
842static bool
843ipam_allocate_addresses(struct ovn_datapath *od, struct ovn_port *op,
844 ovs_be32 subnet, ovs_be32 mask)
845{
846 if (!od || !op || !op->nbsp) {
847 return false;
848 }
849
850 uint32_t ip = ipam_get_unused_ip(od, ntohl(subnet), ntohl(mask));
851 if (!ip) {
852 return false;
853 }
854
855 struct eth_addr mac;
856 uint64_t mac64 = ipam_get_unused_mac();
857 if (!mac64) {
858 return false;
859 }
860 eth_addr_from_uint64(mac64, &mac);
861
862 /* Add MAC/IP to MACAM/IPAM hmaps if both addresses were allocated
863 * successfully. */
864 ipam_insert_ip(od, ip, false);
865 ipam_insert_mac(&mac, false);
866
867 char *new_addr = xasprintf(ETH_ADDR_FMT" "IP_FMT,
868 ETH_ADDR_ARGS(mac), IP_ARGS(htonl(ip)));
869 nbrec_logical_switch_port_set_dynamic_addresses(op->nbsp, new_addr);
870 free(new_addr);
871
872 return true;
873}
874
875static void
876build_ipam(struct northd_context *ctx, struct hmap *datapaths,
877 struct hmap *ports)
878{
879 /* IPAM generally stands for IP address management. In non-virtualized
880 * world, MAC addresses come with the hardware. But, with virtualized
881 * workloads, they need to be assigned and managed. This function
882 * does both IP address management (ipam) and MAC address management
883 * (macam). */
884
885 if (!ctx->ovnnb_txn) {
886 return;
887 }
888
889 /* If the switch's other_config:subnet is set, allocate new addresses for
890 * ports that have the "dynamic" keyword in their addresses column. */
891 struct ovn_datapath *od;
892 HMAP_FOR_EACH (od, key_node, datapaths) {
893 if (od->nbs) {
894 const char *subnet_str = smap_get(&od->nbs->other_config,
895 "subnet");
896 if (!subnet_str) {
897 continue;
898 }
899
900 ovs_be32 subnet, mask;
901 char *error = ip_parse_masked(subnet_str, &subnet, &mask);
902 if (error || mask == OVS_BE32_MAX || !ip_is_cidr(mask)) {
903 static struct vlog_rate_limit rl
904 = VLOG_RATE_LIMIT_INIT(5, 1);
905 VLOG_WARN_RL(&rl, "bad 'subnet' %s", subnet_str);
906 free(error);
907 continue;
908 }
909
910 struct ovn_port *op;
911 for (size_t i = 0; i < od->nbs->n_ports; i++) {
912 const struct nbrec_logical_switch_port *nbsp =
913 od->nbs->ports[i];
914
915 if (!nbsp) {
916 continue;
917 }
918
919 op = ovn_port_find(ports, nbsp->name);
920 if (!op || (op->nbsp && op->peer)) {
921 /* Do not allocate addresses for logical switch ports that
922 * have a peer. */
923 continue;
924 }
925
926 for (size_t j = 0; j < nbsp->n_addresses; j++) {
927 if (!strcmp(nbsp->addresses[j], "dynamic")
928 && !nbsp->dynamic_addresses) {
929 if (!ipam_allocate_addresses(od, op, subnet, mask)
930 || !extract_lsp_addresses(nbsp->dynamic_addresses,
931 &op->lsp_addrs[op->n_lsp_addrs])) {
932 static struct vlog_rate_limit rl
933 = VLOG_RATE_LIMIT_INIT(1, 1);
934 VLOG_INFO_RL(&rl, "Failed to allocate address.");
935 } else {
936 op->n_lsp_addrs++;
937 }
938 break;
939 }
940 }
941 }
942 }
943 }
944}
945\f
946
5868eb24
BP
947static void
948join_logical_ports(struct northd_context *ctx,
949 struct hmap *datapaths, struct hmap *ports,
950 struct ovs_list *sb_only, struct ovs_list *nb_only,
951 struct ovs_list *both)
952{
953 hmap_init(ports);
417e7e66
BW
954 ovs_list_init(sb_only);
955 ovs_list_init(nb_only);
956 ovs_list_init(both);
5868eb24
BP
957
958 const struct sbrec_port_binding *sb;
959 SBREC_PORT_BINDING_FOR_EACH (sb, ctx->ovnsb_idl) {
960 struct ovn_port *op = ovn_port_create(ports, sb->logical_port,
9975d7be 961 NULL, NULL, sb);
417e7e66 962 ovs_list_push_back(sb_only, &op->list);
5868eb24
BP
963 }
964
965 struct ovn_datapath *od;
966 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
967 if (od->nbs) {
968 for (size_t i = 0; i < od->nbs->n_ports; i++) {
0ee00741
HK
969 const struct nbrec_logical_switch_port *nbsp
970 = od->nbs->ports[i];
971 struct ovn_port *op = ovn_port_find(ports, nbsp->name);
9975d7be 972 if (op) {
0ee00741 973 if (op->nbsp || op->nbrp) {
9975d7be
BP
974 static struct vlog_rate_limit rl
975 = VLOG_RATE_LIMIT_INIT(5, 1);
976 VLOG_WARN_RL(&rl, "duplicate logical port %s",
0ee00741 977 nbsp->name);
9975d7be
BP
978 continue;
979 }
0ee00741 980 op->nbsp = nbsp;
417e7e66
BW
981 ovs_list_remove(&op->list);
982 ovs_list_push_back(both, &op->list);
e93b43d6
JP
983
984 /* This port exists due to a SB binding, but should
985 * not have been initialized fully. */
986 ovs_assert(!op->n_lsp_addrs && !op->n_ps_addrs);
9975d7be 987 } else {
0ee00741 988 op = ovn_port_create(ports, nbsp->name, nbsp, NULL, NULL);
417e7e66 989 ovs_list_push_back(nb_only, &op->list);
9975d7be
BP
990 }
991
e93b43d6 992 op->lsp_addrs
0ee00741
HK
993 = xmalloc(sizeof *op->lsp_addrs * nbsp->n_addresses);
994 for (size_t j = 0; j < nbsp->n_addresses; j++) {
995 if (!strcmp(nbsp->addresses[j], "unknown")) {
e93b43d6
JP
996 continue;
997 }
8639f9be
ND
998 if (!strcmp(nbsp->addresses[j], "dynamic")) {
999 if (nbsp->dynamic_addresses) {
1000 if (!extract_lsp_addresses(nbsp->dynamic_addresses,
1001 &op->lsp_addrs[op->n_lsp_addrs])) {
1002 static struct vlog_rate_limit rl
1003 = VLOG_RATE_LIMIT_INIT(1, 1);
1004 VLOG_INFO_RL(&rl, "invalid syntax '%s' in "
1005 "logical switch port "
1006 "dynamic_addresses. No "
1007 "MAC address found",
1008 op->nbsp->dynamic_addresses);
1009 continue;
1010 }
1011 } else {
1012 continue;
1013 }
1014 } else if (!extract_lsp_addresses(nbsp->addresses[j],
e93b43d6
JP
1015 &op->lsp_addrs[op->n_lsp_addrs])) {
1016 static struct vlog_rate_limit rl
1017 = VLOG_RATE_LIMIT_INIT(1, 1);
1018 VLOG_INFO_RL(&rl, "invalid syntax '%s' in logical "
1019 "switch port addresses. No MAC "
1020 "address found",
0ee00741 1021 op->nbsp->addresses[j]);
e93b43d6
JP
1022 continue;
1023 }
1024 op->n_lsp_addrs++;
1025 }
1026
1027 op->ps_addrs
0ee00741
HK
1028 = xmalloc(sizeof *op->ps_addrs * nbsp->n_port_security);
1029 for (size_t j = 0; j < nbsp->n_port_security; j++) {
1030 if (!extract_lsp_addresses(nbsp->port_security[j],
e93b43d6
JP
1031 &op->ps_addrs[op->n_ps_addrs])) {
1032 static struct vlog_rate_limit rl
1033 = VLOG_RATE_LIMIT_INIT(1, 1);
1034 VLOG_INFO_RL(&rl, "invalid syntax '%s' in port "
1035 "security. No MAC address found",
0ee00741 1036 op->nbsp->port_security[j]);
e93b43d6
JP
1037 continue;
1038 }
1039 op->n_ps_addrs++;
1040 }
1041
9975d7be 1042 op->od = od;
8639f9be 1043 ipam_add_port_addresses(od, op);
9975d7be
BP
1044 }
1045 } else {
1046 for (size_t i = 0; i < od->nbr->n_ports; i++) {
0ee00741
HK
1047 const struct nbrec_logical_router_port *nbrp
1048 = od->nbr->ports[i];
9975d7be 1049
4685e523 1050 struct lport_addresses lrp_networks;
0ee00741 1051 if (!extract_lrp_networks(nbrp, &lrp_networks)) {
9975d7be
BP
1052 static struct vlog_rate_limit rl
1053 = VLOG_RATE_LIMIT_INIT(5, 1);
0ee00741 1054 VLOG_WARN_RL(&rl, "bad 'mac' %s", nbrp->mac);
9975d7be
BP
1055 continue;
1056 }
1057
4685e523 1058 if (!lrp_networks.n_ipv4_addrs && !lrp_networks.n_ipv6_addrs) {
9975d7be
BP
1059 continue;
1060 }
1061
0ee00741 1062 struct ovn_port *op = ovn_port_find(ports, nbrp->name);
9975d7be 1063 if (op) {
0ee00741 1064 if (op->nbsp || op->nbrp) {
9975d7be
BP
1065 static struct vlog_rate_limit rl
1066 = VLOG_RATE_LIMIT_INIT(5, 1);
1067 VLOG_WARN_RL(&rl, "duplicate logical router port %s",
0ee00741 1068 nbrp->name);
9975d7be
BP
1069 continue;
1070 }
0ee00741 1071 op->nbrp = nbrp;
417e7e66
BW
1072 ovs_list_remove(&op->list);
1073 ovs_list_push_back(both, &op->list);
4685e523
JP
1074
1075 /* This port exists but should not have been
1076 * initialized fully. */
1077 ovs_assert(!op->lrp_networks.n_ipv4_addrs
1078 && !op->lrp_networks.n_ipv6_addrs);
9975d7be 1079 } else {
0ee00741 1080 op = ovn_port_create(ports, nbrp->name, NULL, nbrp, NULL);
417e7e66 1081 ovs_list_push_back(nb_only, &op->list);
9975d7be
BP
1082 }
1083
4685e523 1084 op->lrp_networks = lrp_networks;
9975d7be 1085 op->od = od;
8639f9be 1086 ipam_add_port_addresses(op->od, op);
5868eb24 1087 }
9975d7be
BP
1088 }
1089 }
1090
1091 /* Connect logical router ports, and logical switch ports of type "router",
1092 * to their peers. */
1093 struct ovn_port *op;
1094 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741
HK
1095 if (op->nbsp && !strcmp(op->nbsp->type, "router")) {
1096 const char *peer_name = smap_get(&op->nbsp->options, "router-port");
9975d7be
BP
1097 if (!peer_name) {
1098 continue;
1099 }
1100
1101 struct ovn_port *peer = ovn_port_find(ports, peer_name);
0ee00741 1102 if (!peer || !peer->nbrp) {
9975d7be
BP
1103 continue;
1104 }
1105
1106 peer->peer = op;
1107 op->peer = peer;
86e98048
BP
1108 op->od->router_ports = xrealloc(
1109 op->od->router_ports,
1110 sizeof *op->od->router_ports * (op->od->n_router_ports + 1));
1111 op->od->router_ports[op->od->n_router_ports++] = op;
0ee00741 1112 } else if (op->nbrp && op->nbrp->peer) {
ad386c3f
BP
1113 struct ovn_port *peer = ovn_port_find(ports, op->nbrp->peer);
1114 if (peer) {
1115 if (peer->nbrp) {
1116 op->peer = peer;
60fa6dbb 1117 } else if (peer->nbsp) {
ad386c3f
BP
1118 /* An ovn_port for a switch port of type "router" does have
1119 * a router port as its peer (see the case above for
1120 * "router" ports), but this is set via options:router-port
1121 * in Logical_Switch_Port and does not involve the
1122 * Logical_Router_Port's 'peer' column. */
1123 static struct vlog_rate_limit rl =
1124 VLOG_RATE_LIMIT_INIT(5, 1);
1125 VLOG_WARN_RL(&rl, "Bad configuration: The peer of router "
1126 "port %s is a switch port", op->key);
1127 }
1128 }
5868eb24
BP
1129 }
1130 }
1131}
1132
1133static void
1134ovn_port_update_sbrec(const struct ovn_port *op)
1135{
1136 sbrec_port_binding_set_datapath(op->sb, op->od->sb);
0ee00741 1137 if (op->nbrp) {
c1645003 1138 /* If the router is for l3 gateway, it resides on a chassis
17bac0ff 1139 * and its port type is "l3gateway". */
c1645003
GS
1140 const char *chassis = smap_get(&op->od->nbr->options, "chassis");
1141 if (chassis) {
17bac0ff 1142 sbrec_port_binding_set_type(op->sb, "l3gateway");
c1645003
GS
1143 } else {
1144 sbrec_port_binding_set_type(op->sb, "patch");
1145 }
9975d7be
BP
1146
1147 const char *peer = op->peer ? op->peer->key : "<error>";
c1645003
GS
1148 struct smap new;
1149 smap_init(&new);
1150 smap_add(&new, "peer", peer);
1151 if (chassis) {
17bac0ff 1152 smap_add(&new, "l3gateway-chassis", chassis);
c1645003
GS
1153 }
1154 sbrec_port_binding_set_options(op->sb, &new);
1155 smap_destroy(&new);
9975d7be
BP
1156
1157 sbrec_port_binding_set_parent_port(op->sb, NULL);
1158 sbrec_port_binding_set_tag(op->sb, NULL, 0);
1159 sbrec_port_binding_set_mac(op->sb, NULL, 0);
1160 } else {
0ee00741
HK
1161 if (strcmp(op->nbsp->type, "router")) {
1162 sbrec_port_binding_set_type(op->sb, op->nbsp->type);
1163 sbrec_port_binding_set_options(op->sb, &op->nbsp->options);
9975d7be 1164 } else {
c1645003
GS
1165 const char *chassis = NULL;
1166 if (op->peer && op->peer->od && op->peer->od->nbr) {
1167 chassis = smap_get(&op->peer->od->nbr->options, "chassis");
1168 }
1169
1170 /* A switch port connected to a gateway router is also of
17bac0ff 1171 * type "l3gateway". */
c1645003 1172 if (chassis) {
17bac0ff 1173 sbrec_port_binding_set_type(op->sb, "l3gateway");
c1645003
GS
1174 } else {
1175 sbrec_port_binding_set_type(op->sb, "patch");
1176 }
9975d7be 1177
f99f67bd
BP
1178 const char *router_port = smap_get_def(&op->nbsp->options,
1179 "router-port", "<error>");
c1645003
GS
1180 struct smap new;
1181 smap_init(&new);
1182 smap_add(&new, "peer", router_port);
1183 if (chassis) {
17bac0ff 1184 smap_add(&new, "l3gateway-chassis", chassis);
c1645003
GS
1185 }
1186 sbrec_port_binding_set_options(op->sb, &new);
1187 smap_destroy(&new);
9975d7be 1188 }
0ee00741
HK
1189 sbrec_port_binding_set_parent_port(op->sb, op->nbsp->parent_name);
1190 sbrec_port_binding_set_tag(op->sb, op->nbsp->tag, op->nbsp->n_tag);
1191 sbrec_port_binding_set_mac(op->sb, (const char **) op->nbsp->addresses,
1192 op->nbsp->n_addresses);
9975d7be 1193 }
5868eb24
BP
1194}
1195
0bac7164 1196/* Updates the southbound Port_Binding table so that it contains the logical
80f408f4 1197 * switch ports specified by the northbound database.
0bac7164
BP
1198 *
1199 * Initializes 'ports' to contain a "struct ovn_port" for every logical port,
1200 * using the "struct ovn_datapath"s in 'datapaths' to look up logical
1201 * datapaths. */
5868eb24
BP
1202static void
1203build_ports(struct northd_context *ctx, struct hmap *datapaths,
1204 struct hmap *ports)
1205{
1206 struct ovs_list sb_only, nb_only, both;
1207
1208 join_logical_ports(ctx, datapaths, ports, &sb_only, &nb_only, &both);
1209
1210 /* For logical ports that are in both databases, update the southbound
1211 * record based on northbound data. Also index the in-use tunnel_keys. */
1212 struct ovn_port *op, *next;
1213 LIST_FOR_EACH_SAFE (op, next, list, &both) {
1214 ovn_port_update_sbrec(op);
1215
1216 add_tnlid(&op->od->port_tnlids, op->sb->tunnel_key);
1217 if (op->sb->tunnel_key > op->od->port_key_hint) {
1218 op->od->port_key_hint = op->sb->tunnel_key;
1219 }
1220 }
1221
1222 /* Add southbound record for each unmatched northbound record. */
1223 LIST_FOR_EACH_SAFE (op, next, list, &nb_only) {
1224 uint16_t tunnel_key = ovn_port_allocate_key(op->od);
1225 if (!tunnel_key) {
1226 continue;
1227 }
1228
1229 op->sb = sbrec_port_binding_insert(ctx->ovnsb_txn);
1230 ovn_port_update_sbrec(op);
1231
1232 sbrec_port_binding_set_logical_port(op->sb, op->key);
1233 sbrec_port_binding_set_tunnel_key(op->sb, tunnel_key);
1234 }
1235
1236 /* Delete southbound records without northbound matches. */
1237 LIST_FOR_EACH_SAFE(op, next, list, &sb_only) {
417e7e66 1238 ovs_list_remove(&op->list);
5868eb24
BP
1239 sbrec_port_binding_delete(op->sb);
1240 ovn_port_destroy(ports, op);
1241 }
1242}
1243\f
1244#define OVN_MIN_MULTICAST 32768
1245#define OVN_MAX_MULTICAST 65535
1246
1247struct multicast_group {
1248 const char *name;
1249 uint16_t key; /* OVN_MIN_MULTICAST...OVN_MAX_MULTICAST. */
1250};
1251
1252#define MC_FLOOD "_MC_flood"
1253static const struct multicast_group mc_flood = { MC_FLOOD, 65535 };
1254
1255#define MC_UNKNOWN "_MC_unknown"
1256static const struct multicast_group mc_unknown = { MC_UNKNOWN, 65534 };
1257
1258static bool
1259multicast_group_equal(const struct multicast_group *a,
1260 const struct multicast_group *b)
1261{
1262 return !strcmp(a->name, b->name) && a->key == b->key;
1263}
1264
1265/* Multicast group entry. */
1266struct ovn_multicast {
1267 struct hmap_node hmap_node; /* Index on 'datapath' and 'key'. */
1268 struct ovn_datapath *datapath;
1269 const struct multicast_group *group;
1270
1271 struct ovn_port **ports;
1272 size_t n_ports, allocated_ports;
1273};
1274
1275static uint32_t
1276ovn_multicast_hash(const struct ovn_datapath *datapath,
1277 const struct multicast_group *group)
1278{
1279 return hash_pointer(datapath, group->key);
1280}
1281
1282static struct ovn_multicast *
1283ovn_multicast_find(struct hmap *mcgroups, struct ovn_datapath *datapath,
1284 const struct multicast_group *group)
1285{
1286 struct ovn_multicast *mc;
1287
1288 HMAP_FOR_EACH_WITH_HASH (mc, hmap_node,
1289 ovn_multicast_hash(datapath, group), mcgroups) {
1290 if (mc->datapath == datapath
1291 && multicast_group_equal(mc->group, group)) {
1292 return mc;
4edcdcf4
RB
1293 }
1294 }
5868eb24
BP
1295 return NULL;
1296}
1297
1298static void
1299ovn_multicast_add(struct hmap *mcgroups, const struct multicast_group *group,
1300 struct ovn_port *port)
1301{
1302 struct ovn_datapath *od = port->od;
1303 struct ovn_multicast *mc = ovn_multicast_find(mcgroups, od, group);
1304 if (!mc) {
1305 mc = xmalloc(sizeof *mc);
1306 hmap_insert(mcgroups, &mc->hmap_node, ovn_multicast_hash(od, group));
1307 mc->datapath = od;
1308 mc->group = group;
1309 mc->n_ports = 0;
1310 mc->allocated_ports = 4;
1311 mc->ports = xmalloc(mc->allocated_ports * sizeof *mc->ports);
1312 }
1313 if (mc->n_ports >= mc->allocated_ports) {
1314 mc->ports = x2nrealloc(mc->ports, &mc->allocated_ports,
1315 sizeof *mc->ports);
1316 }
1317 mc->ports[mc->n_ports++] = port;
1318}
4edcdcf4 1319
5868eb24
BP
1320static void
1321ovn_multicast_destroy(struct hmap *mcgroups, struct ovn_multicast *mc)
1322{
1323 if (mc) {
1324 hmap_remove(mcgroups, &mc->hmap_node);
1325 free(mc->ports);
1326 free(mc);
1327 }
1328}
4edcdcf4 1329
5868eb24
BP
1330static void
1331ovn_multicast_update_sbrec(const struct ovn_multicast *mc,
1332 const struct sbrec_multicast_group *sb)
1333{
1334 struct sbrec_port_binding **ports = xmalloc(mc->n_ports * sizeof *ports);
1335 for (size_t i = 0; i < mc->n_ports; i++) {
1336 ports[i] = CONST_CAST(struct sbrec_port_binding *, mc->ports[i]->sb);
1337 }
1338 sbrec_multicast_group_set_ports(sb, ports, mc->n_ports);
1339 free(ports);
4edcdcf4 1340}
bd39395f 1341\f
48605550 1342/* Logical flow generation.
bd39395f 1343 *
48605550 1344 * This code generates the Logical_Flow table in the southbound database, as a
bd39395f
BP
1345 * function of most of the northbound database.
1346 */
1347
5868eb24
BP
1348struct ovn_lflow {
1349 struct hmap_node hmap_node;
bd39395f 1350
5868eb24 1351 struct ovn_datapath *od;
880fcd14 1352 enum ovn_stage stage;
5868eb24
BP
1353 uint16_t priority;
1354 char *match;
1355 char *actions;
bd39395f
BP
1356};
1357
1358static size_t
5868eb24 1359ovn_lflow_hash(const struct ovn_lflow *lflow)
bd39395f 1360{
5868eb24 1361 size_t hash = uuid_hash(&lflow->od->key);
880fcd14 1362 hash = hash_2words((lflow->stage << 16) | lflow->priority, hash);
5868eb24
BP
1363 hash = hash_string(lflow->match, hash);
1364 return hash_string(lflow->actions, hash);
bd39395f
BP
1365}
1366
5868eb24
BP
1367static bool
1368ovn_lflow_equal(const struct ovn_lflow *a, const struct ovn_lflow *b)
1369{
1370 return (a->od == b->od
880fcd14 1371 && a->stage == b->stage
5868eb24
BP
1372 && a->priority == b->priority
1373 && !strcmp(a->match, b->match)
1374 && !strcmp(a->actions, b->actions));
1375}
1376
1377static void
1378ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
880fcd14 1379 enum ovn_stage stage, uint16_t priority,
5868eb24 1380 char *match, char *actions)
bd39395f 1381{
5868eb24 1382 lflow->od = od;
880fcd14 1383 lflow->stage = stage;
5868eb24
BP
1384 lflow->priority = priority;
1385 lflow->match = match;
1386 lflow->actions = actions;
bd39395f
BP
1387}
1388
48605550 1389/* Adds a row with the specified contents to the Logical_Flow table. */
bd39395f 1390static void
5868eb24 1391ovn_lflow_add(struct hmap *lflow_map, struct ovn_datapath *od,
880fcd14 1392 enum ovn_stage stage, uint16_t priority,
5868eb24
BP
1393 const char *match, const char *actions)
1394{
9a9961d2
BP
1395 ovs_assert(ovn_stage_to_datapath_type(stage) == ovn_datapath_get_type(od));
1396
5868eb24 1397 struct ovn_lflow *lflow = xmalloc(sizeof *lflow);
880fcd14 1398 ovn_lflow_init(lflow, od, stage, priority,
5868eb24
BP
1399 xstrdup(match), xstrdup(actions));
1400 hmap_insert(lflow_map, &lflow->hmap_node, ovn_lflow_hash(lflow));
1401}
1402
1403static struct ovn_lflow *
1404ovn_lflow_find(struct hmap *lflows, struct ovn_datapath *od,
880fcd14 1405 enum ovn_stage stage, uint16_t priority,
5868eb24
BP
1406 const char *match, const char *actions)
1407{
1408 struct ovn_lflow target;
880fcd14 1409 ovn_lflow_init(&target, od, stage, priority,
5868eb24
BP
1410 CONST_CAST(char *, match), CONST_CAST(char *, actions));
1411
1412 struct ovn_lflow *lflow;
1413 HMAP_FOR_EACH_WITH_HASH (lflow, hmap_node, ovn_lflow_hash(&target),
1414 lflows) {
1415 if (ovn_lflow_equal(lflow, &target)) {
1416 return lflow;
bd39395f
BP
1417 }
1418 }
5868eb24
BP
1419 return NULL;
1420}
bd39395f 1421
5868eb24
BP
1422static void
1423ovn_lflow_destroy(struct hmap *lflows, struct ovn_lflow *lflow)
1424{
1425 if (lflow) {
1426 hmap_remove(lflows, &lflow->hmap_node);
1427 free(lflow->match);
1428 free(lflow->actions);
1429 free(lflow);
1430 }
bd39395f
BP
1431}
1432
bd39395f 1433/* Appends port security constraints on L2 address field 'eth_addr_field'
e93b43d6
JP
1434 * (e.g. "eth.src" or "eth.dst") to 'match'. 'ps_addrs', with 'n_ps_addrs'
1435 * elements, is the collection of port_security constraints from an
1436 * OVN_NB Logical_Switch_Port row generated by extract_lsp_addresses(). */
bd39395f 1437static void
685f4dfe 1438build_port_security_l2(const char *eth_addr_field,
e93b43d6
JP
1439 struct lport_addresses *ps_addrs,
1440 unsigned int n_ps_addrs,
685f4dfe 1441 struct ds *match)
bd39395f 1442{
e93b43d6
JP
1443 if (!n_ps_addrs) {
1444 return;
1445 }
bd39395f 1446
e93b43d6 1447 ds_put_format(match, " && %s == {", eth_addr_field);
f7cb14cd 1448
e93b43d6
JP
1449 for (size_t i = 0; i < n_ps_addrs; i++) {
1450 ds_put_format(match, "%s ", ps_addrs[i].ea_s);
bd39395f 1451 }
f7cb14cd 1452 ds_chomp(match, ' ');
bd39395f 1453 ds_put_cstr(match, "}");
bd39395f
BP
1454}
1455
685f4dfe
NS
1456static void
1457build_port_security_ipv6_nd_flow(
1458 struct ds *match, struct eth_addr ea, struct ipv6_netaddr *ipv6_addrs,
1459 int n_ipv6_addrs)
1460{
1461 ds_put_format(match, " && ip6 && nd && ((nd.sll == "ETH_ADDR_FMT" || "
1462 "nd.sll == "ETH_ADDR_FMT") || ((nd.tll == "ETH_ADDR_FMT" || "
1463 "nd.tll == "ETH_ADDR_FMT")", ETH_ADDR_ARGS(eth_addr_zero),
1464 ETH_ADDR_ARGS(ea), ETH_ADDR_ARGS(eth_addr_zero),
1465 ETH_ADDR_ARGS(ea));
1466 if (!n_ipv6_addrs) {
1467 ds_put_cstr(match, "))");
1468 return;
1469 }
1470
1471 char ip6_str[INET6_ADDRSTRLEN + 1];
1472 struct in6_addr lla;
1473 in6_generate_lla(ea, &lla);
1474 memset(ip6_str, 0, sizeof(ip6_str));
1475 ipv6_string_mapped(ip6_str, &lla);
1476 ds_put_format(match, " && (nd.target == %s", ip6_str);
1477
1478 for(int i = 0; i < n_ipv6_addrs; i++) {
1479 memset(ip6_str, 0, sizeof(ip6_str));
1480 ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr);
1481 ds_put_format(match, " || nd.target == %s", ip6_str);
1482 }
1483
1484 ds_put_format(match, ")))");
1485}
1486
1487static void
1488build_port_security_ipv6_flow(
1489 enum ovn_pipeline pipeline, struct ds *match, struct eth_addr ea,
1490 struct ipv6_netaddr *ipv6_addrs, int n_ipv6_addrs)
1491{
1492 char ip6_str[INET6_ADDRSTRLEN + 1];
1493
1494 ds_put_format(match, " && %s == {",
1495 pipeline == P_IN ? "ip6.src" : "ip6.dst");
1496
1497 /* Allow link-local address. */
1498 struct in6_addr lla;
1499 in6_generate_lla(ea, &lla);
1500 ipv6_string_mapped(ip6_str, &lla);
1501 ds_put_format(match, "%s, ", ip6_str);
1502
9e687b23
DL
1503 /* Allow ip6.dst=ff00::/8 for multicast packets */
1504 if (pipeline == P_OUT) {
1505 ds_put_cstr(match, "ff00::/8, ");
1506 }
685f4dfe
NS
1507 for(int i = 0; i < n_ipv6_addrs; i++) {
1508 ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr);
9e687b23 1509 ds_put_format(match, "%s, ", ip6_str);
685f4dfe 1510 }
9e687b23
DL
1511 /* Replace ", " by "}". */
1512 ds_chomp(match, ' ');
1513 ds_chomp(match, ',');
685f4dfe
NS
1514 ds_put_cstr(match, "}");
1515}
1516
1517/**
1518 * Build port security constraints on ARP and IPv6 ND fields
1519 * and add logical flows to S_SWITCH_IN_PORT_SEC_ND stage.
1520 *
1521 * For each port security of the logical port, following
1522 * logical flows are added
1523 * - If the port security has no IP (both IPv4 and IPv6) or
1524 * if it has IPv4 address(es)
1525 * - Priority 90 flow to allow ARP packets for known MAC addresses
1526 * in the eth.src and arp.spa fields. If the port security
1527 * has IPv4 addresses, allow known IPv4 addresses in the arp.tpa field.
1528 *
1529 * - If the port security has no IP (both IPv4 and IPv6) or
1530 * if it has IPv6 address(es)
1531 * - Priority 90 flow to allow IPv6 ND packets for known MAC addresses
1532 * in the eth.src and nd.sll/nd.tll fields. If the port security
1533 * has IPv6 addresses, allow known IPv6 addresses in the nd.target field
1534 * for IPv6 Neighbor Advertisement packet.
1535 *
1536 * - Priority 80 flow to drop ARP and IPv6 ND packets.
1537 */
1538static void
1539build_port_security_nd(struct ovn_port *op, struct hmap *lflows)
1540{
e93b43d6
JP
1541 struct ds match = DS_EMPTY_INITIALIZER;
1542
1543 for (size_t i = 0; i < op->n_ps_addrs; i++) {
1544 struct lport_addresses *ps = &op->ps_addrs[i];
685f4dfe 1545
e93b43d6 1546 bool no_ip = !(ps->n_ipv4_addrs || ps->n_ipv6_addrs);
685f4dfe 1547
e93b43d6
JP
1548 ds_clear(&match);
1549 if (ps->n_ipv4_addrs || no_ip) {
1550 ds_put_format(&match,
1551 "inport == %s && eth.src == %s && arp.sha == %s",
1552 op->json_key, ps->ea_s, ps->ea_s);
685f4dfe 1553
e93b43d6
JP
1554 if (ps->n_ipv4_addrs) {
1555 ds_put_cstr(&match, " && arp.spa == {");
f95523c0 1556 for (size_t j = 0; j < ps->n_ipv4_addrs; j++) {
7d9d86ad
NS
1557 /* When the netmask is applied, if the host portion is
1558 * non-zero, the host can only use the specified
1559 * address in the arp.spa. If zero, the host is allowed
1560 * to use any address in the subnet. */
f95523c0
JP
1561 if (ps->ipv4_addrs[j].plen == 32
1562 || ps->ipv4_addrs[j].addr & ~ps->ipv4_addrs[j].mask) {
1563 ds_put_cstr(&match, ps->ipv4_addrs[j].addr_s);
7d9d86ad 1564 } else {
e93b43d6 1565 ds_put_format(&match, "%s/%d",
f95523c0
JP
1566 ps->ipv4_addrs[j].network_s,
1567 ps->ipv4_addrs[j].plen);
7d9d86ad 1568 }
e93b43d6 1569 ds_put_cstr(&match, ", ");
685f4dfe
NS
1570 }
1571 ds_chomp(&match, ' ');
e93b43d6
JP
1572 ds_chomp(&match, ',');
1573 ds_put_cstr(&match, "}");
685f4dfe
NS
1574 }
1575 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90,
1576 ds_cstr(&match), "next;");
685f4dfe
NS
1577 }
1578
e93b43d6
JP
1579 if (ps->n_ipv6_addrs || no_ip) {
1580 ds_clear(&match);
1581 ds_put_format(&match, "inport == %s && eth.src == %s",
1582 op->json_key, ps->ea_s);
1583 build_port_security_ipv6_nd_flow(&match, ps->ea, ps->ipv6_addrs,
1584 ps->n_ipv6_addrs);
685f4dfe
NS
1585 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90,
1586 ds_cstr(&match), "next;");
685f4dfe 1587 }
685f4dfe
NS
1588 }
1589
e93b43d6
JP
1590 ds_clear(&match);
1591 ds_put_format(&match, "inport == %s && (arp || nd)", op->json_key);
685f4dfe 1592 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 80,
e93b43d6
JP
1593 ds_cstr(&match), "drop;");
1594 ds_destroy(&match);
685f4dfe
NS
1595}
1596
1597/**
1598 * Build port security constraints on IPv4 and IPv6 src and dst fields
1599 * and add logical flows to S_SWITCH_(IN/OUT)_PORT_SEC_IP stage.
1600 *
1601 * For each port security of the logical port, following
1602 * logical flows are added
1603 * - If the port security has IPv4 addresses,
1604 * - Priority 90 flow to allow IPv4 packets for known IPv4 addresses
1605 *
1606 * - If the port security has IPv6 addresses,
1607 * - Priority 90 flow to allow IPv6 packets for known IPv6 addresses
1608 *
1609 * - If the port security has IPv4 addresses or IPv6 addresses or both
1610 * - Priority 80 flow to drop all IPv4 and IPv6 traffic
1611 */
1612static void
1613build_port_security_ip(enum ovn_pipeline pipeline, struct ovn_port *op,
1614 struct hmap *lflows)
1615{
1616 char *port_direction;
1617 enum ovn_stage stage;
1618 if (pipeline == P_IN) {
1619 port_direction = "inport";
1620 stage = S_SWITCH_IN_PORT_SEC_IP;
1621 } else {
1622 port_direction = "outport";
1623 stage = S_SWITCH_OUT_PORT_SEC_IP;
1624 }
1625
e93b43d6
JP
1626 for (size_t i = 0; i < op->n_ps_addrs; i++) {
1627 struct lport_addresses *ps = &op->ps_addrs[i];
685f4dfe 1628
e93b43d6 1629 if (!(ps->n_ipv4_addrs || ps->n_ipv6_addrs)) {
685f4dfe
NS
1630 continue;
1631 }
1632
e93b43d6 1633 if (ps->n_ipv4_addrs) {
685f4dfe
NS
1634 struct ds match = DS_EMPTY_INITIALIZER;
1635 if (pipeline == P_IN) {
9e687b23
DL
1636 /* Permit use of the unspecified address for DHCP discovery */
1637 struct ds dhcp_match = DS_EMPTY_INITIALIZER;
1638 ds_put_format(&dhcp_match, "inport == %s"
e93b43d6 1639 " && eth.src == %s"
9e687b23
DL
1640 " && ip4.src == 0.0.0.0"
1641 " && ip4.dst == 255.255.255.255"
e93b43d6
JP
1642 " && udp.src == 68 && udp.dst == 67",
1643 op->json_key, ps->ea_s);
9e687b23
DL
1644 ovn_lflow_add(lflows, op->od, stage, 90,
1645 ds_cstr(&dhcp_match), "next;");
1646 ds_destroy(&dhcp_match);
e93b43d6 1647 ds_put_format(&match, "inport == %s && eth.src == %s"
9e687b23 1648 " && ip4.src == {", op->json_key,
e93b43d6 1649 ps->ea_s);
685f4dfe 1650 } else {
e93b43d6 1651 ds_put_format(&match, "outport == %s && eth.dst == %s"
685f4dfe 1652 " && ip4.dst == {255.255.255.255, 224.0.0.0/4, ",
e93b43d6 1653 op->json_key, ps->ea_s);
685f4dfe
NS
1654 }
1655
f95523c0
JP
1656 for (int j = 0; j < ps->n_ipv4_addrs; j++) {
1657 ovs_be32 mask = ps->ipv4_addrs[j].mask;
7d9d86ad
NS
1658 /* When the netmask is applied, if the host portion is
1659 * non-zero, the host can only use the specified
1660 * address. If zero, the host is allowed to use any
1661 * address in the subnet.
e93b43d6 1662 */
f95523c0
JP
1663 if (ps->ipv4_addrs[j].plen == 32
1664 || ps->ipv4_addrs[j].addr & ~mask) {
1665 ds_put_format(&match, "%s", ps->ipv4_addrs[j].addr_s);
1666 if (pipeline == P_OUT && ps->ipv4_addrs[j].plen != 32) {
e93b43d6
JP
1667 /* Host is also allowed to receive packets to the
1668 * broadcast address in the specified subnet. */
1669 ds_put_format(&match, ", %s",
f95523c0 1670 ps->ipv4_addrs[j].bcast_s);
7d9d86ad
NS
1671 }
1672 } else {
1673 /* host portion is zero */
f95523c0
JP
1674 ds_put_format(&match, "%s/%d", ps->ipv4_addrs[j].network_s,
1675 ps->ipv4_addrs[j].plen);
7d9d86ad
NS
1676 }
1677 ds_put_cstr(&match, ", ");
685f4dfe
NS
1678 }
1679
1680 /* Replace ", " by "}". */
1681 ds_chomp(&match, ' ');
1682 ds_chomp(&match, ',');
1683 ds_put_cstr(&match, "}");
1684 ovn_lflow_add(lflows, op->od, stage, 90, ds_cstr(&match), "next;");
1685 ds_destroy(&match);
685f4dfe
NS
1686 }
1687
e93b43d6 1688 if (ps->n_ipv6_addrs) {
685f4dfe 1689 struct ds match = DS_EMPTY_INITIALIZER;
9e687b23
DL
1690 if (pipeline == P_IN) {
1691 /* Permit use of unspecified address for duplicate address
1692 * detection */
1693 struct ds dad_match = DS_EMPTY_INITIALIZER;
1694 ds_put_format(&dad_match, "inport == %s"
e93b43d6 1695 " && eth.src == %s"
9e687b23
DL
1696 " && ip6.src == ::"
1697 " && ip6.dst == ff02::/16"
1698 " && icmp6.type == {131, 135, 143}", op->json_key,
e93b43d6 1699 ps->ea_s);
9e687b23
DL
1700 ovn_lflow_add(lflows, op->od, stage, 90,
1701 ds_cstr(&dad_match), "next;");
1702 ds_destroy(&dad_match);
1703 }
e93b43d6 1704 ds_put_format(&match, "%s == %s && %s == %s",
685f4dfe 1705 port_direction, op->json_key,
e93b43d6
JP
1706 pipeline == P_IN ? "eth.src" : "eth.dst", ps->ea_s);
1707 build_port_security_ipv6_flow(pipeline, &match, ps->ea,
1708 ps->ipv6_addrs, ps->n_ipv6_addrs);
685f4dfe
NS
1709 ovn_lflow_add(lflows, op->od, stage, 90,
1710 ds_cstr(&match), "next;");
1711 ds_destroy(&match);
685f4dfe
NS
1712 }
1713
e93b43d6
JP
1714 char *match = xasprintf("%s == %s && %s == %s && ip",
1715 port_direction, op->json_key,
1716 pipeline == P_IN ? "eth.src" : "eth.dst",
1717 ps->ea_s);
685f4dfe
NS
1718 ovn_lflow_add(lflows, op->od, stage, 80, match, "drop;");
1719 free(match);
1720 }
f2a715b5 1721
685f4dfe
NS
1722}
1723
95a9a275 1724static bool
80f408f4 1725lsp_is_enabled(const struct nbrec_logical_switch_port *lsp)
95a9a275 1726{
80f408f4 1727 return !lsp->enabled || *lsp->enabled;
95a9a275
RB
1728}
1729
4c7bf534 1730static bool
80f408f4 1731lsp_is_up(const struct nbrec_logical_switch_port *lsp)
4c7bf534 1732{
80f408f4 1733 return !lsp->up || *lsp->up;
4c7bf534
NS
1734}
1735
281977f7
NS
1736static bool
1737build_dhcpv4_action(struct ovn_port *op, ovs_be32 offer_ip,
1738 struct ds *options_action, struct ds *response_action)
1739{
1740 if (!op->nbsp->dhcpv4_options) {
1741 /* CMS has disabled native DHCPv4 for this lport. */
1742 return false;
1743 }
1744
1745 ovs_be32 host_ip, mask;
1746 char *error = ip_parse_masked(op->nbsp->dhcpv4_options->cidr, &host_ip,
1747 &mask);
1748 if (error || ((offer_ip ^ host_ip) & mask)) {
1749 /* Either
1750 * - cidr defined is invalid or
1751 * - the offer ip of the logical port doesn't belong to the cidr
1752 * defined in the DHCPv4 options.
1753 * */
1754 free(error);
1755 return false;
1756 }
1757
1758 const char *server_ip = smap_get(
1759 &op->nbsp->dhcpv4_options->options, "server_id");
1760 const char *server_mac = smap_get(
1761 &op->nbsp->dhcpv4_options->options, "server_mac");
1762 const char *lease_time = smap_get(
1763 &op->nbsp->dhcpv4_options->options, "lease_time");
1764 const char *router = smap_get(
1765 &op->nbsp->dhcpv4_options->options, "router");
1766
1767 if (!(server_ip && server_mac && lease_time && router)) {
1768 /* "server_id", "server_mac", "lease_time" and "router" should be
1769 * present in the dhcp_options. */
1770 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1771 VLOG_WARN_RL(&rl, "Required DHCPv4 options not defined for lport - %s",
1772 op->json_key);
1773 return false;
1774 }
1775
1776 struct smap dhcpv4_options = SMAP_INITIALIZER(&dhcpv4_options);
1777 smap_clone(&dhcpv4_options, &op->nbsp->dhcpv4_options->options);
1778
1779 /* server_mac is not DHCPv4 option, delete it from the smap. */
1780 smap_remove(&dhcpv4_options, "server_mac");
1781 char *netmask = xasprintf(IP_FMT, IP_ARGS(mask));
1782 smap_add(&dhcpv4_options, "netmask", netmask);
1783 free(netmask);
1784
1785 ds_put_format(options_action,
1786 REGBIT_DHCP_OPTS_RESULT" = put_dhcp_opts(offerip = "
1787 IP_FMT", ", IP_ARGS(offer_ip));
1788 struct smap_node *node;
1789 SMAP_FOR_EACH(node, &dhcpv4_options) {
1790 ds_put_format(options_action, "%s = %s, ", node->key, node->value);
1791 }
1792
1793 ds_chomp(options_action, ' ');
1794 ds_chomp(options_action, ',');
1795 ds_put_cstr(options_action, "); next;");
1796
1797 ds_put_format(response_action, "eth.dst = eth.src; eth.src = %s; "
1798 "ip4.dst = "IP_FMT"; ip4.src = %s; udp.src = 67; "
bf143492
JP
1799 "udp.dst = 68; outport = inport; flags.loopback = 1; "
1800 "output;",
281977f7
NS
1801 server_mac, IP_ARGS(offer_ip), server_ip);
1802
1803 smap_destroy(&dhcpv4_options);
1804 return true;
1805}
1806
33ac3c83
NS
1807static bool
1808build_dhcpv6_action(struct ovn_port *op, struct in6_addr *offer_ip,
1809 struct ds *options_action, struct ds *response_action)
1810{
1811 if (!op->nbsp->dhcpv6_options) {
1812 /* CMS has disabled native DHCPv6 for this lport. */
1813 return false;
1814 }
1815
1816 struct in6_addr host_ip, mask;
1817
1818 char *error = ipv6_parse_masked(op->nbsp->dhcpv6_options->cidr, &host_ip,
1819 &mask);
1820 if (error) {
1821 free(error);
1822 return false;
1823 }
1824 struct in6_addr ip6_mask = ipv6_addr_bitxor(offer_ip, &host_ip);
1825 ip6_mask = ipv6_addr_bitand(&ip6_mask, &mask);
1826 if (!ipv6_mask_is_any(&ip6_mask)) {
1827 /* offer_ip doesn't belongs to the cidr defined in lport's DHCPv6
1828 * options.*/
1829 return false;
1830 }
1831
1832 /* "server_id" should be the MAC address. */
1833 const char *server_mac = smap_get(&op->nbsp->dhcpv6_options->options,
1834 "server_id");
1835 struct eth_addr ea;
1836 if (!server_mac || !eth_addr_from_string(server_mac, &ea)) {
1837 /* "server_id" should be present in the dhcpv6_options. */
1838 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1839 VLOG_WARN_RL(&rl, "server_id not present in the DHCPv6 options"
1840 " for lport %s", op->json_key);
1841 return false;
1842 }
1843
1844 /* Get the link local IP of the DHCPv6 server from the server MAC. */
1845 struct in6_addr lla;
1846 in6_generate_lla(ea, &lla);
1847
1848 char server_ip[INET6_ADDRSTRLEN + 1];
1849 ipv6_string_mapped(server_ip, &lla);
1850
1851 char ia_addr[INET6_ADDRSTRLEN + 1];
1852 ipv6_string_mapped(ia_addr, offer_ip);
1853
1854 ds_put_format(options_action,
1855 REGBIT_DHCP_OPTS_RESULT" = put_dhcpv6_opts(ia_addr = %s, ",
1856 ia_addr);
1857 struct smap_node *node;
1858 SMAP_FOR_EACH (node, &op->nbsp->dhcpv6_options->options) {
1859 ds_put_format(options_action, "%s = %s, ", node->key, node->value);
1860 }
1861 ds_chomp(options_action, ' ');
1862 ds_chomp(options_action, ',');
1863 ds_put_cstr(options_action, "); next;");
1864
1865 ds_put_format(response_action, "eth.dst = eth.src; eth.src = %s; "
1866 "ip6.dst = ip6.src; ip6.src = %s; udp.src = 547; "
1867 "udp.dst = 546; outport = inport; flags.loopback = 1; "
1868 "output;",
1869 server_mac, server_ip);
1870 return true;
1871}
1872
78aab811
JP
1873static bool
1874has_stateful_acl(struct ovn_datapath *od)
1875{
9975d7be
BP
1876 for (size_t i = 0; i < od->nbs->n_acls; i++) {
1877 struct nbrec_acl *acl = od->nbs->acls[i];
78aab811
JP
1878 if (!strcmp(acl->action, "allow-related")) {
1879 return true;
1880 }
1881 }
1882
1883 return false;
1884}
1885
1886static void
2d018f9b
GS
1887build_pre_acls(struct ovn_datapath *od, struct hmap *lflows,
1888 struct hmap *ports)
78aab811
JP
1889{
1890 bool has_stateful = has_stateful_acl(od);
48fcdb47 1891 struct ovn_port *op;
78aab811
JP
1892
1893 /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are
1894 * allowed by default. */
880fcd14
BP
1895 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 0, "1", "next;");
1896 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 0, "1", "next;");
78aab811 1897
78aab811
JP
1898 /* If there are any stateful ACL rules in this dapapath, we must
1899 * send all IP packets through the conntrack action, which handles
1900 * defragmentation, in order to match L4 headers. */
1901 if (has_stateful) {
48fcdb47 1902 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 1903 if (op->od == od && !strcmp(op->nbsp->type, "router")) {
501f95e1
JP
1904 /* Can't use ct() for router ports. Consider the
1905 * following configuration: lp1(10.0.0.2) on
1906 * hostA--ls1--lr0--ls2--lp2(10.0.1.2) on hostB, For a
1907 * ping from lp1 to lp2, First, the response will go
1908 * through ct() with a zone for lp2 in the ls2 ingress
1909 * pipeline on hostB. That ct zone knows about this
1910 * connection. Next, it goes through ct() with the zone
1911 * for the router port in the egress pipeline of ls2 on
1912 * hostB. This zone does not know about the connection,
1913 * as the icmp request went through the logical router
1914 * on hostA, not hostB. This would only work with
1915 * distributed conntrack state across all chassis. */
1916 struct ds match_in = DS_EMPTY_INITIALIZER;
1917 struct ds match_out = DS_EMPTY_INITIALIZER;
1918
48fcdb47
WL
1919 ds_put_format(&match_in, "ip && inport == %s", op->json_key);
1920 ds_put_format(&match_out, "ip && outport == %s", op->json_key);
501f95e1
JP
1921 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110,
1922 ds_cstr(&match_in), "next;");
1923 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110,
1924 ds_cstr(&match_out), "next;");
48fcdb47
WL
1925
1926 ds_destroy(&match_in);
1927 ds_destroy(&match_out);
1928 }
1929 }
2d018f9b
GS
1930 /* Ingress and Egress Pre-ACL Table (Priority 110).
1931 *
1932 * Not to do conntrack on ND packets. */
1933 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110, "nd", "next;");
1934 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110, "nd", "next;");
48fcdb47 1935
78aab811
JP
1936 /* Ingress and Egress Pre-ACL Table (Priority 100).
1937 *
1938 * Regardless of whether the ACL is "from-lport" or "to-lport",
1939 * we need rules in both the ingress and egress table, because
facf8652
GS
1940 * the return traffic needs to be followed.
1941 *
1942 * 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send
1943 * it to conntrack for tracking and defragmentation. */
1944 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 100, "ip",
1945 REGBIT_CONNTRACK_DEFRAG" = 1; next;");
1946 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip",
1947 REGBIT_CONNTRACK_DEFRAG" = 1; next;");
2d018f9b
GS
1948 }
1949}
78aab811 1950
7a15be69
GS
1951/* For a 'key' of the form "IP:port" or just "IP", sets 'port' and
1952 * 'ip_address'. The caller must free() the memory allocated for
1953 * 'ip_address'. */
1954static void
1955ip_address_and_port_from_lb_key(const char *key, char **ip_address,
1956 uint16_t *port)
1957{
1958 char *ip_str, *start, *next;
1959 *ip_address = NULL;
1960 *port = 0;
1961
1962 next = start = xstrdup(key);
1963 ip_str = strsep(&next, ":");
1964 if (!ip_str || !ip_str[0]) {
1965 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1966 VLOG_WARN_RL(&rl, "bad ip address for load balancer key %s", key);
1967 free(start);
1968 return;
1969 }
1970
1971 ovs_be32 ip, mask;
1972 char *error = ip_parse_masked(ip_str, &ip, &mask);
1973 if (error || mask != OVS_BE32_MAX) {
1974 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1975 VLOG_WARN_RL(&rl, "bad ip address for load balancer key %s", key);
1976 free(start);
1977 free(error);
1978 return;
1979 }
1980
1981 int l4_port = 0;
1982 if (next && next[0]) {
1983 if (!str_to_int(next, 0, &l4_port) || l4_port < 0 || l4_port > 65535) {
1984 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1985 VLOG_WARN_RL(&rl, "bad ip port for load balancer key %s", key);
1986 free(start);
1987 return;
1988 }
1989 }
1990
1991 *port = l4_port;
1992 *ip_address = strdup(ip_str);
1993 free(start);
1994}
1995
1996static void
1997build_pre_lb(struct ovn_datapath *od, struct hmap *lflows)
1998{
1999 /* Allow all packets to go to next tables by default. */
2000 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 0, "1", "next;");
2001 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 0, "1", "next;");
2002
2003 struct sset all_ips = SSET_INITIALIZER(&all_ips);
61591ad9
GS
2004 bool vip_configured = false;
2005 for (int i = 0; i < od->nbs->n_load_balancer; i++) {
2006 struct nbrec_load_balancer *lb = od->nbs->load_balancer[i];
7a15be69
GS
2007 struct smap *vips = &lb->vips;
2008 struct smap_node *node;
7a15be69
GS
2009
2010 SMAP_FOR_EACH (node, vips) {
2011 vip_configured = true;
2012
2013 /* node->key contains IP:port or just IP. */
2014 char *ip_address = NULL;
2015 uint16_t port;
2016 ip_address_and_port_from_lb_key(node->key, &ip_address, &port);
2017 if (!ip_address) {
2018 continue;
2019 }
2020
2021 if (!sset_contains(&all_ips, ip_address)) {
2022 sset_add(&all_ips, ip_address);
2023 }
2024
2025 free(ip_address);
2026
2027 /* Ignore L4 port information in the key because fragmented packets
2028 * may not have L4 information. The pre-stateful table will send
2029 * the packet through ct() action to de-fragment. In stateful
2030 * table, we will eventually look at L4 information. */
2031 }
61591ad9 2032 }
7a15be69 2033
61591ad9
GS
2034 /* 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send
2035 * packet to conntrack for defragmentation. */
2036 const char *ip_address;
2037 SSET_FOR_EACH(ip_address, &all_ips) {
2038 char *match = xasprintf("ip && ip4.dst == %s", ip_address);
2039 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB,
2040 100, match, REGBIT_CONNTRACK_DEFRAG" = 1; next;");
2041 free(match);
2042 }
7a15be69 2043
61591ad9 2044 sset_destroy(&all_ips);
7a15be69 2045
61591ad9
GS
2046 if (vip_configured) {
2047 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB,
2048 100, "ip", REGBIT_CONNTRACK_DEFRAG" = 1; next;");
7a15be69
GS
2049 }
2050}
2051
facf8652
GS
2052static void
2053build_pre_stateful(struct ovn_datapath *od, struct hmap *lflows)
2054{
2055 /* Ingress and Egress pre-stateful Table (Priority 0): Packets are
2056 * allowed by default. */
2057 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 0, "1", "next;");
2058 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 0, "1", "next;");
2059
2060 /* If REGBIT_CONNTRACK_DEFRAG is set as 1, then the packets should be
2061 * sent to conntrack for tracking and defragmentation. */
2062 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 100,
2063 REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;");
2064 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 100,
2065 REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;");
2066}
2067
2d018f9b
GS
2068static void
2069build_acls(struct ovn_datapath *od, struct hmap *lflows)
2070{
2071 bool has_stateful = has_stateful_acl(od);
e75451fe 2072
2d018f9b
GS
2073 /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
2074 * default. A related rule at priority 1 is added below if there
2075 * are any stateful ACLs in this datapath. */
2076 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 0, "1", "next;");
2077 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 0, "1", "next;");
2078
2079 if (has_stateful) {
78aab811
JP
2080 /* Ingress and Egress ACL Table (Priority 1).
2081 *
2082 * By default, traffic is allowed. This is partially handled by
2083 * the Priority 0 ACL flows added earlier, but we also need to
2084 * commit IP flows. This is because, while the initiater's
2085 * direction may not have any stateful rules, the server's may
2086 * and then its return traffic would not have an associated
cc58e1f2
RB
2087 * conntrack entry and would return "+invalid".
2088 *
2089 * We use "ct_commit" for a connection that is not already known
2090 * by the connection tracker. Once a connection is committed,
2091 * subsequent packets will hit the flow at priority 0 that just
2092 * uses "next;"
2093 *
2094 * We also check for established connections that have ct_label[0]
2095 * set on them. That's a connection that was disallowed, but is
2096 * now allowed by policy again since it hit this default-allow flow.
2097 * We need to set ct_label[0]=0 to let the connection continue,
2098 * which will be done by ct_commit() in the "stateful" stage.
2099 * Subsequent packets will hit the flow at priority 0 that just
2100 * uses "next;". */
2101 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 1,
2102 "ip && (!ct.est || (ct.est && ct_label[0] == 1))",
2103 REGBIT_CONNTRACK_COMMIT" = 1; next;");
2104 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 1,
2105 "ip && (!ct.est || (ct.est && ct_label[0] == 1))",
2106 REGBIT_CONNTRACK_COMMIT" = 1; next;");
78aab811
JP
2107
2108 /* Ingress and Egress ACL Table (Priority 65535).
2109 *
cc58e1f2
RB
2110 * Always drop traffic that's in an invalid state. Also drop
2111 * reply direction packets for connections that have been marked
2112 * for deletion (bit 0 of ct_label is set).
2113 *
2114 * This is enforced at a higher priority than ACLs can be defined. */
880fcd14 2115 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
cc58e1f2
RB
2116 "ct.inv || (ct.est && ct.rpl && ct_label[0] == 1)",
2117 "drop;");
880fcd14 2118 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
cc58e1f2
RB
2119 "ct.inv || (ct.est && ct.rpl && ct_label[0] == 1)",
2120 "drop;");
78aab811
JP
2121
2122 /* Ingress and Egress ACL Table (Priority 65535).
2123 *
cc58e1f2
RB
2124 * Allow reply traffic that is part of an established
2125 * conntrack entry that has not been marked for deletion
2126 * (bit 0 of ct_label). We only match traffic in the
2127 * reply direction because we want traffic in the request
2128 * direction to hit the currently defined policy from ACLs.
2129 *
2130 * This is enforced at a higher priority than ACLs can be defined. */
880fcd14 2131 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
cc58e1f2
RB
2132 "ct.est && !ct.rel && !ct.new && !ct.inv "
2133 "&& ct.rpl && ct_label[0] == 0",
78aab811 2134 "next;");
880fcd14 2135 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
cc58e1f2
RB
2136 "ct.est && !ct.rel && !ct.new && !ct.inv "
2137 "&& ct.rpl && ct_label[0] == 0",
78aab811
JP
2138 "next;");
2139
2140 /* Ingress and Egress ACL Table (Priority 65535).
2141 *
cc58e1f2
RB
2142 * Allow traffic that is related to an existing conntrack entry that
2143 * has not been marked for deletion (bit 0 of ct_label).
2144 *
2145 * This is enforced at a higher priority than ACLs can be defined.
78aab811
JP
2146 *
2147 * NOTE: This does not support related data sessions (eg,
2148 * a dynamically negotiated FTP data channel), but will allow
2149 * related traffic such as an ICMP Port Unreachable through
2150 * that's generated from a non-listening UDP port. */
880fcd14 2151 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
cc58e1f2
RB
2152 "!ct.est && ct.rel && !ct.new && !ct.inv "
2153 "&& ct_label[0] == 0",
78aab811 2154 "next;");
880fcd14 2155 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
cc58e1f2
RB
2156 "!ct.est && ct.rel && !ct.new && !ct.inv "
2157 "&& ct_label[0] == 0",
78aab811 2158 "next;");
e75451fe
ZKL
2159
2160 /* Ingress and Egress ACL Table (Priority 65535).
2161 *
2162 * Not to do conntrack on ND packets. */
2163 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, "nd", "next;");
2164 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, "nd", "next;");
78aab811
JP
2165 }
2166
2167 /* Ingress or Egress ACL Table (Various priorities). */
9975d7be
BP
2168 for (size_t i = 0; i < od->nbs->n_acls; i++) {
2169 struct nbrec_acl *acl = od->nbs->acls[i];
78aab811 2170 bool ingress = !strcmp(acl->direction, "from-lport") ? true :false;
880fcd14 2171 enum ovn_stage stage = ingress ? S_SWITCH_IN_ACL : S_SWITCH_OUT_ACL;
78aab811 2172
cc58e1f2
RB
2173 if (!strcmp(acl->action, "allow")
2174 || !strcmp(acl->action, "allow-related")) {
78aab811
JP
2175 /* If there are any stateful flows, we must even commit "allow"
2176 * actions. This is because, while the initiater's
2177 * direction may not have any stateful rules, the server's
2178 * may and then its return traffic would not have an
2179 * associated conntrack entry and would return "+invalid". */
cc58e1f2
RB
2180 if (!has_stateful) {
2181 ovn_lflow_add(lflows, od, stage,
2182 acl->priority + OVN_ACL_PRI_OFFSET,
2183 acl->match, "next;");
2184 } else {
2185 struct ds match = DS_EMPTY_INITIALIZER;
2186
2187 /* Commit the connection tracking entry if it's a new
2188 * connection that matches this ACL. After this commit,
2189 * the reply traffic is allowed by a flow we create at
2190 * priority 65535, defined earlier.
2191 *
2192 * It's also possible that a known connection was marked for
2193 * deletion after a policy was deleted, but the policy was
2194 * re-added while that connection is still known. We catch
2195 * that case here and un-set ct_label[0] (which will be done
2196 * by ct_commit in the "stateful" stage) to indicate that the
2197 * connection should be allowed to resume.
2198 */
2199 ds_put_format(&match, "((ct.new && !ct.est)"
2200 " || (!ct.new && ct.est && !ct.rpl "
2201 "&& ct_label[0] == 1)) "
2202 "&& (%s)", acl->match);
2203 ovn_lflow_add(lflows, od, stage,
2204 acl->priority + OVN_ACL_PRI_OFFSET,
2205 ds_cstr(&match),
2206 REGBIT_CONNTRACK_COMMIT" = 1; next;");
2207
2208 /* Match on traffic in the request direction for an established
2209 * connection tracking entry that has not been marked for
2210 * deletion. There is no need to commit here, so we can just
2211 * proceed to the next table. We use this to ensure that this
2212 * connection is still allowed by the currently defined
2213 * policy. */
2214 ds_clear(&match);
2215 ds_put_format(&match,
2216 "!ct.new && ct.est && !ct.rpl"
2217 " && ct_label[0] == 0 && (%s)",
2218 acl->match);
2219 ovn_lflow_add(lflows, od, stage,
2220 acl->priority + OVN_ACL_PRI_OFFSET,
2221 ds_cstr(&match), "next;");
2222
2223 ds_destroy(&match);
2224 }
2225 } else if (!strcmp(acl->action, "drop")
2226 || !strcmp(acl->action, "reject")) {
78aab811
JP
2227 struct ds match = DS_EMPTY_INITIALIZER;
2228
cc58e1f2
RB
2229 /* XXX Need to support "reject", treat it as "drop;" for now. */
2230 if (!strcmp(acl->action, "reject")) {
2231 VLOG_INFO("reject is not a supported action");
2232 }
78aab811 2233
cc58e1f2
RB
2234 /* The implementation of "drop" differs if stateful ACLs are in
2235 * use for this datapath. In that case, the actions differ
2236 * depending on whether the connection was previously committed
2237 * to the connection tracker with ct_commit. */
2238 if (has_stateful) {
2239 /* If the packet is not part of an established connection, then
2240 * we can simply drop it. */
2241 ds_put_format(&match,
2242 "(!ct.est || (ct.est && ct_label[0] == 1)) "
2243 "&& (%s)",
2244 acl->match);
2245 ovn_lflow_add(lflows, od, stage, acl->priority +
2246 OVN_ACL_PRI_OFFSET, ds_cstr(&match), "drop;");
2247
2248 /* For an existing connection without ct_label set, we've
2249 * encountered a policy change. ACLs previously allowed
2250 * this connection and we committed the connection tracking
2251 * entry. Current policy says that we should drop this
2252 * connection. First, we set bit 0 of ct_label to indicate
2253 * that this connection is set for deletion. By not
2254 * specifying "next;", we implicitly drop the packet after
2255 * updating conntrack state. We would normally defer
2256 * ct_commit() to the "stateful" stage, but since we're
2257 * dropping the packet, we go ahead and do it here. */
2258 ds_clear(&match);
2259 ds_put_format(&match,
2260 "ct.est && ct_label[0] == 0 && (%s)",
2261 acl->match);
2262 ovn_lflow_add(lflows, od, stage,
2263 acl->priority + OVN_ACL_PRI_OFFSET,
2264 ds_cstr(&match), "ct_commit(ct_label=1/1);");
2265
2266 ds_destroy(&match);
2267 } else {
2268 /* There are no stateful ACLs in use on this datapath,
2269 * so a "drop" ACL is simply the "drop" logical flow action
2270 * in all cases. */
2271 ovn_lflow_add(lflows, od, stage,
2272 acl->priority + OVN_ACL_PRI_OFFSET,
2273 acl->match, "drop;");
2274 }
78aab811
JP
2275 }
2276 }
281977f7
NS
2277
2278 /* Add 34000 priority flow to allow DHCP reply from ovn-controller to all
2279 * logical ports of the datapath if the CMS has configured DHCPv4 options*/
2280 if (od->nbs && od->nbs->n_ports) {
2281 for (size_t i = 0; i < od->nbs->n_ports; i++) {
2282 if (od->nbs->ports[i]->dhcpv4_options) {
2283 const char *server_id = smap_get(
2284 &od->nbs->ports[i]->dhcpv4_options->options, "server_id");
2285 const char *server_mac = smap_get(
2286 &od->nbs->ports[i]->dhcpv4_options->options, "server_mac");
2287 const char *lease_time = smap_get(
2288 &od->nbs->ports[i]->dhcpv4_options->options, "lease_time");
2289 const char *router = smap_get(
2290 &od->nbs->ports[i]->dhcpv4_options->options, "router");
2291 if (server_id && server_mac && lease_time && router) {
2292 struct ds match = DS_EMPTY_INITIALIZER;
2293 const char *actions =
2294 has_stateful ? "ct_commit; next;" : "next;";
2295 ds_put_format(&match, "outport == \"%s\" && eth.src == %s "
2296 "&& ip4.src == %s && udp && udp.src == 67 "
2297 "&& udp.dst == 68", od->nbs->ports[i]->name,
2298 server_mac, server_id);
2299 ovn_lflow_add(
2300 lflows, od, S_SWITCH_OUT_ACL, 34000, ds_cstr(&match),
2301 actions);
2302 }
2303 }
33ac3c83
NS
2304
2305 if (od->nbs->ports[i]->dhcpv6_options) {
2306 const char *server_mac = smap_get(
2307 &od->nbs->ports[i]->dhcpv6_options->options, "server_id");
2308 struct eth_addr ea;
2309 if (server_mac && eth_addr_from_string(server_mac, &ea)) {
2310 /* Get the link local IP of the DHCPv6 server from the
2311 * server MAC. */
2312 struct in6_addr lla;
2313 in6_generate_lla(ea, &lla);
2314
2315 char server_ip[INET6_ADDRSTRLEN + 1];
2316 ipv6_string_mapped(server_ip, &lla);
2317
2318 struct ds match = DS_EMPTY_INITIALIZER;
2319 const char *actions = has_stateful ? "ct_commit; next;" :
2320 "next;";
2321 ds_put_format(&match, "outport == \"%s\" && eth.src == %s "
2322 "&& ip6.src == %s && udp && udp.src == 547 "
2323 "&& udp.dst == 546", od->nbs->ports[i]->name,
2324 server_mac, server_ip);
2325 ovn_lflow_add(
2326 lflows, od, S_SWITCH_OUT_ACL, 34000, ds_cstr(&match),
2327 actions);
2328 }
2329 }
281977f7
NS
2330 }
2331 }
78aab811
JP
2332}
2333
7a15be69
GS
2334static void
2335build_lb(struct ovn_datapath *od, struct hmap *lflows)
2336{
2337 /* Ingress and Egress LB Table (Priority 0): Packets are allowed by
2338 * default. */
2339 ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, 0, "1", "next;");
2340 ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, 0, "1", "next;");
2341
2342 if (od->nbs->load_balancer) {
2343 /* Ingress and Egress LB Table (Priority 65535).
2344 *
2345 * Send established traffic through conntrack for just NAT. */
2346 ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, UINT16_MAX,
2347 "ct.est && !ct.rel && !ct.new && !ct.inv",
2348 REGBIT_CONNTRACK_NAT" = 1; next;");
2349 ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, UINT16_MAX,
2350 "ct.est && !ct.rel && !ct.new && !ct.inv",
2351 REGBIT_CONNTRACK_NAT" = 1; next;");
2352 }
2353}
2354
fa313a8c
GS
2355static void
2356build_stateful(struct ovn_datapath *od, struct hmap *lflows)
2357{
2358 /* Ingress and Egress stateful Table (Priority 0): Packets are
2359 * allowed by default. */
2360 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 0, "1", "next;");
2361 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 0, "1", "next;");
2362
2363 /* If REGBIT_CONNTRACK_COMMIT is set as 1, then the packets should be
cc58e1f2
RB
2364 * committed to conntrack. We always set ct_label[0] to 0 here as
2365 * any packet that makes it this far is part of a connection we
2366 * want to allow to continue. */
fa313a8c 2367 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100,
cc58e1f2 2368 REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit(ct_label=0/1); next;");
fa313a8c 2369 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100,
cc58e1f2 2370 REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit(ct_label=0/1); next;");
7a15be69
GS
2371
2372 /* If REGBIT_CONNTRACK_NAT is set as 1, then packets should just be sent
2373 * through nat (without committing).
2374 *
2375 * REGBIT_CONNTRACK_COMMIT is set for new connections and
2376 * REGBIT_CONNTRACK_NAT is set for established connections. So they
2377 * don't overlap.
2378 */
2379 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100,
2380 REGBIT_CONNTRACK_NAT" == 1", "ct_lb;");
2381 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100,
2382 REGBIT_CONNTRACK_NAT" == 1", "ct_lb;");
2383
2384 /* Load balancing rules for new connections get committed to conntrack
2385 * table. So even if REGBIT_CONNTRACK_COMMIT is set in a previous table
2386 * a higher priority rule for load balancing below also commits the
2387 * connection, so it is okay if we do not hit the above match on
2388 * REGBIT_CONNTRACK_COMMIT. */
61591ad9
GS
2389 for (int i = 0; i < od->nbs->n_load_balancer; i++) {
2390 struct nbrec_load_balancer *lb = od->nbs->load_balancer[i];
7a15be69
GS
2391 struct smap *vips = &lb->vips;
2392 struct smap_node *node;
2393
2394 SMAP_FOR_EACH (node, vips) {
2395 uint16_t port = 0;
2396
2397 /* node->key contains IP:port or just IP. */
2398 char *ip_address = NULL;
2399 ip_address_and_port_from_lb_key(node->key, &ip_address, &port);
2400 if (!ip_address) {
2401 continue;
2402 }
2403
2404 /* New connections in Ingress table. */
2405 char *action = xasprintf("ct_lb(%s);", node->value);
2406 struct ds match = DS_EMPTY_INITIALIZER;
2407 ds_put_format(&match, "ct.new && ip && ip4.dst == %s", ip_address);
2408 if (port) {
2409 if (lb->protocol && !strcmp(lb->protocol, "udp")) {
2410 ds_put_format(&match, "&& udp && udp.dst == %d", port);
2411 } else {
2412 ds_put_format(&match, "&& tcp && tcp.dst == %d", port);
2413 }
2414 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL,
2415 120, ds_cstr(&match), action);
2416 } else {
2417 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL,
2418 110, ds_cstr(&match), action);
2419 }
2420
2421 ds_destroy(&match);
2422 free(action);
2423 }
2424 }
fa313a8c
GS
2425}
2426
bd39395f 2427static void
9975d7be
BP
2428build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
2429 struct hmap *lflows, struct hmap *mcgroups)
bd39395f 2430{
5cff6b99
BP
2431 /* This flow table structure is documented in ovn-northd(8), so please
2432 * update ovn-northd.8.xml if you change anything. */
2433
09b39248
JP
2434 struct ds match = DS_EMPTY_INITIALIZER;
2435 struct ds actions = DS_EMPTY_INITIALIZER;
2436
9975d7be 2437 /* Build pre-ACL and ACL tables for both ingress and egress.
685f4dfe 2438 * Ingress tables 3 and 4. Egress tables 0 and 1. */
5868eb24
BP
2439 struct ovn_datapath *od;
2440 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
2441 if (!od->nbs) {
2442 continue;
2443 }
2444
2d018f9b 2445 build_pre_acls(od, lflows, ports);
7a15be69 2446 build_pre_lb(od, lflows);
facf8652 2447 build_pre_stateful(od, lflows);
2d018f9b 2448 build_acls(od, lflows);
7a15be69 2449 build_lb(od, lflows);
fa313a8c 2450 build_stateful(od, lflows);
9975d7be
BP
2451 }
2452
2453 /* Logical switch ingress table 0: Admission control framework (priority
2454 * 100). */
2455 HMAP_FOR_EACH (od, key_node, datapaths) {
2456 if (!od->nbs) {
2457 continue;
2458 }
2459
bd39395f 2460 /* Logical VLANs not supported. */
685f4dfe 2461 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "vlan.present",
091e3af9 2462 "drop;");
bd39395f
BP
2463
2464 /* Broadcast/multicast source address is invalid. */
685f4dfe 2465 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "eth.src[40]",
091e3af9 2466 "drop;");
bd39395f 2467
35060cdc
BP
2468 /* Port security flows have priority 50 (see below) and will continue
2469 * to the next table if packet source is acceptable. */
bd39395f
BP
2470 }
2471
685f4dfe
NS
2472 /* Logical switch ingress table 0: Ingress port security - L2
2473 * (priority 50).
2474 * Ingress table 1: Ingress port security - IP (priority 90 and 80)
2475 * Ingress table 2: Ingress port security - ND (priority 90 and 80)
2476 */
5868eb24
BP
2477 struct ovn_port *op;
2478 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 2479 if (!op->nbsp) {
9975d7be
BP
2480 continue;
2481 }
2482
0ee00741 2483 if (!lsp_is_enabled(op->nbsp)) {
96af668a
BP
2484 /* Drop packets from disabled logical ports (since logical flow
2485 * tables are default-drop). */
2486 continue;
2487 }
2488
09b39248 2489 ds_clear(&match);
9975d7be 2490 ds_put_format(&match, "inport == %s", op->json_key);
e93b43d6
JP
2491 build_port_security_l2("eth.src", op->ps_addrs, op->n_ps_addrs,
2492 &match);
685f4dfe 2493 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_L2, 50,
96af668a 2494 ds_cstr(&match), "next;");
685f4dfe 2495
0ee00741 2496 if (op->nbsp->n_port_security) {
685f4dfe
NS
2497 build_port_security_ip(P_IN, op, lflows);
2498 build_port_security_nd(op, lflows);
2499 }
2500 }
2501
2502 /* Ingress table 1 and 2: Port security - IP and ND, by default goto next.
2503 * (priority 0)*/
2504 HMAP_FOR_EACH (od, key_node, datapaths) {
2505 if (!od->nbs) {
2506 continue;
2507 }
2508
2509 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_ND, 0, "1", "next;");
2510 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_IP, 0, "1", "next;");
5868eb24 2511 }
445a266a 2512
281977f7
NS
2513 /* Ingress table 9: ARP/ND responder, skip requests coming from localnet
2514 * ports. (priority 100). */
fa128126 2515 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 2516 if (!op->nbsp) {
fa128126
HZ
2517 continue;
2518 }
2519
0ee00741 2520 if (!strcmp(op->nbsp->type, "localnet")) {
09b39248
JP
2521 ds_clear(&match);
2522 ds_put_format(&match, "inport == %s", op->json_key);
e75451fe 2523 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100,
09b39248 2524 ds_cstr(&match), "next;");
fa128126
HZ
2525 }
2526 }
2527
94300e09 2528 /* Ingress table 9: ARP/ND responder, reply for known IPs.
fa128126 2529 * (priority 50). */
57d143eb 2530 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 2531 if (!op->nbsp) {
57d143eb
HZ
2532 continue;
2533 }
2534
4c7bf534 2535 /*
e75451fe 2536 * Add ARP/ND reply flows if either the
4c7bf534
NS
2537 * - port is up or
2538 * - port type is router
2539 */
0ee00741 2540 if (!lsp_is_up(op->nbsp) && strcmp(op->nbsp->type, "router")) {
4c7bf534
NS
2541 continue;
2542 }
2543
e93b43d6
JP
2544 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
2545 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
09b39248 2546 ds_clear(&match);
e93b43d6
JP
2547 ds_put_format(&match, "arp.tpa == %s && arp.op == 1",
2548 op->lsp_addrs[i].ipv4_addrs[j].addr_s);
09b39248
JP
2549 ds_clear(&actions);
2550 ds_put_format(&actions,
57d143eb 2551 "eth.dst = eth.src; "
e93b43d6 2552 "eth.src = %s; "
57d143eb
HZ
2553 "arp.op = 2; /* ARP reply */ "
2554 "arp.tha = arp.sha; "
e93b43d6 2555 "arp.sha = %s; "
57d143eb 2556 "arp.tpa = arp.spa; "
e93b43d6 2557 "arp.spa = %s; "
57d143eb 2558 "outport = inport; "
bf143492 2559 "flags.loopback = 1; "
57d143eb 2560 "output;",
e93b43d6
JP
2561 op->lsp_addrs[i].ea_s, op->lsp_addrs[i].ea_s,
2562 op->lsp_addrs[i].ipv4_addrs[j].addr_s);
e75451fe 2563 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50,
09b39248 2564 ds_cstr(&match), ds_cstr(&actions));
57d143eb 2565 }
7dc88496 2566
6fdb7cd6
JP
2567 /* For ND solicitations, we need to listen for both the
2568 * unicast IPv6 address and its all-nodes multicast address,
2569 * but always respond with the unicast IPv6 address. */
2570 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
09b39248 2571 ds_clear(&match);
6fdb7cd6
JP
2572 ds_put_format(&match,
2573 "nd_ns && ip6.dst == {%s, %s} && nd.target == %s",
2574 op->lsp_addrs[i].ipv6_addrs[j].addr_s,
2575 op->lsp_addrs[i].ipv6_addrs[j].sn_addr_s,
2576 op->lsp_addrs[i].ipv6_addrs[j].addr_s);
2577
09b39248
JP
2578 ds_clear(&actions);
2579 ds_put_format(&actions,
6fdb7cd6
JP
2580 "nd_na { "
2581 "eth.src = %s; "
2582 "ip6.src = %s; "
2583 "nd.target = %s; "
2584 "nd.tll = %s; "
2585 "outport = inport; "
bf143492 2586 "flags.loopback = 1; "
6fdb7cd6
JP
2587 "output; "
2588 "};",
2589 op->lsp_addrs[i].ea_s,
2590 op->lsp_addrs[i].ipv6_addrs[j].addr_s,
2591 op->lsp_addrs[i].ipv6_addrs[j].addr_s,
2592 op->lsp_addrs[i].ea_s);
e75451fe 2593 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50,
09b39248 2594 ds_cstr(&match), ds_cstr(&actions));
e75451fe 2595 }
57d143eb
HZ
2596 }
2597 }
2598
94300e09 2599 /* Ingress table 9: ARP/ND responder, by default goto next.
fa128126
HZ
2600 * (priority 0)*/
2601 HMAP_FOR_EACH (od, key_node, datapaths) {
2602 if (!od->nbs) {
2603 continue;
2604 }
2605
e75451fe 2606 ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_ND_RSP, 0, "1", "next;");
fa128126
HZ
2607 }
2608
281977f7
NS
2609 /* Logical switch ingress table 10 and 11: DHCP options and response
2610 * priority 100 flows. */
2611 HMAP_FOR_EACH (op, key_node, ports) {
2612 if (!op->nbsp) {
2613 continue;
2614 }
2615
2616 if (!lsp_is_enabled(op->nbsp) || !strcmp(op->nbsp->type, "router")) {
2617 /* Don't add the DHCP flows if the port is not enabled or if the
2618 * port is a router port. */
2619 continue;
2620 }
2621
33ac3c83
NS
2622 if (!op->nbsp->dhcpv4_options && !op->nbsp->dhcpv6_options) {
2623 /* CMS has disabled both native DHCPv4 and DHCPv6 for this lport.
2624 */
281977f7
NS
2625 continue;
2626 }
2627
2628 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
2629 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
2630 struct ds options_action = DS_EMPTY_INITIALIZER;
2631 struct ds response_action = DS_EMPTY_INITIALIZER;
2632 if (build_dhcpv4_action(
2633 op, op->lsp_addrs[i].ipv4_addrs[j].addr,
2634 &options_action, &response_action)) {
2635 struct ds match = DS_EMPTY_INITIALIZER;
2636 ds_put_format(
2637 &match, "inport == %s && eth.src == %s && "
2638 "ip4.src == 0.0.0.0 && ip4.dst == 255.255.255.255 && "
2639 "udp.src == 68 && udp.dst == 67", op->json_key,
2640 op->lsp_addrs[i].ea_s);
2641
2642 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS,
2643 100, ds_cstr(&match),
2644 ds_cstr(&options_action));
2645 /* If REGBIT_DHCP_OPTS_RESULT is set, it means the
2646 * put_dhcp_opts action is successful */
2647 ds_put_cstr(&match, " && "REGBIT_DHCP_OPTS_RESULT);
2648 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_RESPONSE,
2649 100, ds_cstr(&match),
2650 ds_cstr(&response_action));
2651 ds_destroy(&match);
2652 ds_destroy(&options_action);
2653 ds_destroy(&response_action);
2654 break;
2655 }
2656 }
33ac3c83
NS
2657
2658 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
2659 struct ds options_action = DS_EMPTY_INITIALIZER;
2660 struct ds response_action = DS_EMPTY_INITIALIZER;
2661 if (build_dhcpv6_action(
2662 op, &op->lsp_addrs[i].ipv6_addrs[j].addr,
2663 &options_action, &response_action)) {
2664 struct ds match = DS_EMPTY_INITIALIZER;
2665 ds_put_format(
2666 &match, "inport == %s && eth.src == %s"
2667 " && ip6.dst == ff02::1:2 && udp.src == 546 &&"
2668 " udp.dst == 547", op->json_key,
2669 op->lsp_addrs[i].ea_s);
2670
2671 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS, 100,
2672 ds_cstr(&match), ds_cstr(&options_action));
2673
2674 /* If REGBIT_DHCP_OPTS_RESULT is set to 1, it means the
2675 * put_dhcpv6_opts action is successful */
2676 ds_put_cstr(&match, " && "REGBIT_DHCP_OPTS_RESULT);
2677 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_RESPONSE, 100,
2678 ds_cstr(&match), ds_cstr(&response_action));
2679 ds_destroy(&match);
2680 ds_destroy(&options_action);
2681 ds_destroy(&response_action);
2682 break;
2683 }
2684 }
281977f7
NS
2685 }
2686 }
2687
2688 /* Ingress table 10 and 11: DHCP options and response, by default goto next.
2689 * (priority 0). */
2690
2691 HMAP_FOR_EACH (od, key_node, datapaths) {
2692 if (!od->nbs) {
2693 continue;
2694 }
2695
2696 ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_OPTIONS, 0, "1", "next;");
2697 ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_RESPONSE, 0, "1", "next;");
2698 }
2699
2700 /* Ingress table 12: Destination lookup, broadcast and multicast handling
5868eb24
BP
2701 * (priority 100). */
2702 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 2703 if (!op->nbsp) {
9975d7be
BP
2704 continue;
2705 }
2706
0ee00741 2707 if (lsp_is_enabled(op->nbsp)) {
9975d7be 2708 ovn_multicast_add(mcgroups, &mc_flood, op);
445a266a 2709 }
5868eb24
BP
2710 }
2711 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
2712 if (!od->nbs) {
2713 continue;
2714 }
2715
2716 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 100, "eth.mcast",
5868eb24 2717 "outport = \""MC_FLOOD"\"; output;");
bd39395f 2718 }
bd39395f 2719
281977f7 2720 /* Ingress table 12: Destination lookup, unicast handling (priority 50), */
5868eb24 2721 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 2722 if (!op->nbsp) {
9975d7be
BP
2723 continue;
2724 }
2725
0ee00741 2726 for (size_t i = 0; i < op->nbsp->n_addresses; i++) {
74ff3298 2727 struct eth_addr mac;
5868eb24 2728
0ee00741 2729 if (eth_addr_from_string(op->nbsp->addresses[i], &mac)) {
09b39248 2730 ds_clear(&match);
9975d7be
BP
2731 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
2732 ETH_ADDR_ARGS(mac));
5868eb24 2733
09b39248 2734 ds_clear(&actions);
9975d7be
BP
2735 ds_put_format(&actions, "outport = %s; output;", op->json_key);
2736 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
5868eb24 2737 ds_cstr(&match), ds_cstr(&actions));
0ee00741
HK
2738 } else if (!strcmp(op->nbsp->addresses[i], "unknown")) {
2739 if (lsp_is_enabled(op->nbsp)) {
9975d7be 2740 ovn_multicast_add(mcgroups, &mc_unknown, op);
96af668a
BP
2741 op->od->has_unknown = true;
2742 }
8639f9be
ND
2743 } else if (!strcmp(op->nbsp->addresses[i], "dynamic")) {
2744 if (!op->nbsp->dynamic_addresses
2745 || !eth_addr_from_string(op->nbsp->dynamic_addresses,
2746 &mac)) {
2747 continue;
2748 }
2749 ds_clear(&match);
2750 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
2751 ETH_ADDR_ARGS(mac));
2752
2753 ds_clear(&actions);
2754 ds_put_format(&actions, "outport = %s; output;", op->json_key);
2755 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
2756 ds_cstr(&match), ds_cstr(&actions));
5868eb24
BP
2757 } else {
2758 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
445a266a 2759
2fa326a3
BP
2760 VLOG_INFO_RL(&rl,
2761 "%s: invalid syntax '%s' in addresses column",
0ee00741 2762 op->nbsp->name, op->nbsp->addresses[i]);
445a266a
BP
2763 }
2764 }
bd39395f
BP
2765 }
2766
281977f7 2767 /* Ingress table 12: Destination lookup for unknown MACs (priority 0). */
5868eb24 2768 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
2769 if (!od->nbs) {
2770 continue;
2771 }
2772
5868eb24 2773 if (od->has_unknown) {
9975d7be 2774 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1",
5868eb24 2775 "outport = \""MC_UNKNOWN"\"; output;");
445a266a 2776 }
bd39395f
BP
2777 }
2778
94300e09
JP
2779 /* Egress tables 6: Egress port security - IP (priority 0)
2780 * Egress table 7: Egress port security L2 - multicast/broadcast
2781 * (priority 100). */
5868eb24 2782 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
2783 if (!od->nbs) {
2784 continue;
2785 }
2786
685f4dfe
NS
2787 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_IP, 0, "1", "next;");
2788 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_L2, 100, "eth.mcast",
091e3af9 2789 "output;");
48f42f3a
RB
2790 }
2791
94300e09 2792 /* Egress table 6: Egress port security - IP (priorities 90 and 80)
685f4dfe
NS
2793 * if port security enabled.
2794 *
94300e09 2795 * Egress table 7: Egress port security - L2 (priorities 50 and 150).
d770a830
BP
2796 *
2797 * Priority 50 rules implement port security for enabled logical port.
2798 *
2799 * Priority 150 rules drop packets to disabled logical ports, so that they
2800 * don't even receive multicast or broadcast packets. */
5868eb24 2801 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 2802 if (!op->nbsp) {
9975d7be
BP
2803 continue;
2804 }
2805
09b39248 2806 ds_clear(&match);
9975d7be 2807 ds_put_format(&match, "outport == %s", op->json_key);
0ee00741 2808 if (lsp_is_enabled(op->nbsp)) {
e93b43d6
JP
2809 build_port_security_l2("eth.dst", op->ps_addrs, op->n_ps_addrs,
2810 &match);
685f4dfe 2811 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 50,
d770a830
BP
2812 ds_cstr(&match), "output;");
2813 } else {
685f4dfe 2814 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 150,
d770a830
BP
2815 ds_cstr(&match), "drop;");
2816 }
eb00399e 2817
0ee00741 2818 if (op->nbsp->n_port_security) {
685f4dfe
NS
2819 build_port_security_ip(P_OUT, op, lflows);
2820 }
eb00399e 2821 }
09b39248
JP
2822
2823 ds_destroy(&match);
2824 ds_destroy(&actions);
9975d7be 2825}
eb00399e 2826
9975d7be
BP
2827static bool
2828lrport_is_enabled(const struct nbrec_logical_router_port *lrport)
2829{
2830 return !lrport->enabled || *lrport->enabled;
2831}
2832
4685e523
JP
2833/* Returns a string of the IP address of the router port 'op' that
2834 * overlaps with 'ip_s". If one is not found, returns NULL.
2835 *
2836 * The caller must not free the returned string. */
2837static const char *
2838find_lrp_member_ip(const struct ovn_port *op, const char *ip_s)
2839{
6fdb7cd6 2840 bool is_ipv4 = strchr(ip_s, '.') ? true : false;
4685e523 2841
6fdb7cd6
JP
2842 if (is_ipv4) {
2843 ovs_be32 ip;
4685e523 2844
6fdb7cd6
JP
2845 if (!ip_parse(ip_s, &ip)) {
2846 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2847 VLOG_WARN_RL(&rl, "bad ip address %s", ip_s);
2848 return NULL;
2849 }
4685e523 2850
6fdb7cd6
JP
2851 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
2852 const struct ipv4_netaddr *na = &op->lrp_networks.ipv4_addrs[i];
2853
2854 if (!((na->network ^ ip) & na->mask)) {
2855 /* There should be only 1 interface that matches the
2856 * supplied IP. Otherwise, it's a configuration error,
2857 * because subnets of a router's interfaces should NOT
2858 * overlap. */
2859 return na->addr_s;
2860 }
2861 }
2862 } else {
2863 struct in6_addr ip6;
2864
2865 if (!ipv6_parse(ip_s, &ip6)) {
2866 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2867 VLOG_WARN_RL(&rl, "bad ipv6 address %s", ip_s);
2868 return NULL;
2869 }
2870
2871 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
2872 const struct ipv6_netaddr *na = &op->lrp_networks.ipv6_addrs[i];
2873 struct in6_addr xor_addr = ipv6_addr_bitxor(&na->network, &ip6);
2874 struct in6_addr and_addr = ipv6_addr_bitand(&xor_addr, &na->mask);
2875
2876 if (ipv6_is_zero(&and_addr)) {
2877 /* There should be only 1 interface that matches the
2878 * supplied IP. Otherwise, it's a configuration error,
2879 * because subnets of a router's interfaces should NOT
2880 * overlap. */
2881 return na->addr_s;
2882 }
4685e523
JP
2883 }
2884 }
2885
2886 return NULL;
2887}
2888
9975d7be 2889static void
0bac7164 2890add_route(struct hmap *lflows, const struct ovn_port *op,
4685e523
JP
2891 const char *lrp_addr_s, const char *network_s, int plen,
2892 const char *gateway)
9975d7be 2893{
6fdb7cd6 2894 bool is_ipv4 = strchr(network_s, '.') ? true : false;
a63f7235 2895 struct ds match = DS_EMPTY_INITIALIZER;
6fdb7cd6 2896
a63f7235
JP
2897 /* IPv6 link-local addresses must be scoped to the local router port. */
2898 if (!is_ipv4) {
2899 struct in6_addr network;
2900 ovs_assert(ipv6_parse(network_s, &network));
2901 if (in6_is_lla(&network)) {
2902 ds_put_format(&match, "inport == %s && ", op->json_key);
2903 }
2904 }
2905 ds_put_format(&match, "ip%s.dst == %s/%d", is_ipv4 ? "4" : "6",
2906 network_s, plen);
9975d7be
BP
2907
2908 struct ds actions = DS_EMPTY_INITIALIZER;
6fdb7cd6
JP
2909 ds_put_format(&actions, "ip.ttl--; %sreg0 = ", is_ipv4 ? "" : "xx");
2910
9975d7be 2911 if (gateway) {
c9bdf7bd 2912 ds_put_cstr(&actions, gateway);
9975d7be 2913 } else {
6fdb7cd6 2914 ds_put_format(&actions, "ip%s.dst", is_ipv4 ? "4" : "6");
9975d7be 2915 }
4685e523 2916 ds_put_format(&actions, "; "
6fdb7cd6 2917 "%sreg1 = %s; "
4685e523 2918 "eth.src = %s; "
0bac7164 2919 "outport = %s; "
bf143492 2920 "flags.loopback = 1; "
0bac7164 2921 "next;",
6fdb7cd6 2922 is_ipv4 ? "" : "xx",
4685e523
JP
2923 lrp_addr_s,
2924 op->lrp_networks.ea_s,
2925 op->json_key);
9975d7be
BP
2926
2927 /* The priority here is calculated to implement longest-prefix-match
2928 * routing. */
a63f7235
JP
2929 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_ROUTING, plen,
2930 ds_cstr(&match), ds_cstr(&actions));
2931 ds_destroy(&match);
9975d7be 2932 ds_destroy(&actions);
9975d7be
BP
2933}
2934
28dc3fe9
SR
2935static void
2936build_static_route_flow(struct hmap *lflows, struct ovn_datapath *od,
2937 struct hmap *ports,
2938 const struct nbrec_logical_router_static_route *route)
2939{
6fdb7cd6 2940 ovs_be32 nexthop;
4685e523 2941 const char *lrp_addr_s;
6fdb7cd6
JP
2942 unsigned int plen;
2943 bool is_ipv4;
28dc3fe9 2944
6fdb7cd6
JP
2945 /* Verify that the next hop is an IP address with an all-ones mask. */
2946 char *error = ip_parse_cidr(route->nexthop, &nexthop, &plen);
2947 if (!error) {
2948 if (plen != 32) {
2949 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2950 VLOG_WARN_RL(&rl, "bad next hop mask %s", route->nexthop);
2951 return;
2952 }
2953 is_ipv4 = true;
2954 } else {
28dc3fe9 2955 free(error);
6fdb7cd6
JP
2956
2957 struct in6_addr ip6;
2958 char *error = ipv6_parse_cidr(route->nexthop, &ip6, &plen);
2959 if (!error) {
2960 if (plen != 128) {
2961 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2962 VLOG_WARN_RL(&rl, "bad next hop mask %s", route->nexthop);
2963 return;
2964 }
2965 is_ipv4 = false;
2966 } else {
2967 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2968 VLOG_WARN_RL(&rl, "bad next hop ip address %s", route->nexthop);
2969 free(error);
2970 return;
2971 }
28dc3fe9
SR
2972 }
2973
6fdb7cd6
JP
2974 char *prefix_s;
2975 if (is_ipv4) {
2976 ovs_be32 prefix;
2977 /* Verify that ip prefix is a valid IPv4 address. */
2978 error = ip_parse_cidr(route->ip_prefix, &prefix, &plen);
2979 if (error) {
2980 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2981 VLOG_WARN_RL(&rl, "bad 'ip_prefix' in static routes %s",
2982 route->ip_prefix);
2983 free(error);
2984 return;
2985 }
2986 prefix_s = xasprintf(IP_FMT, IP_ARGS(prefix & be32_prefix_mask(plen)));
2987 } else {
2988 /* Verify that ip prefix is a valid IPv6 address. */
2989 struct in6_addr prefix;
2990 error = ipv6_parse_cidr(route->ip_prefix, &prefix, &plen);
2991 if (error) {
2992 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2993 VLOG_WARN_RL(&rl, "bad 'ip_prefix' in static routes %s",
2994 route->ip_prefix);
2995 free(error);
2996 return;
2997 }
2998 struct in6_addr mask = ipv6_create_mask(plen);
2999 struct in6_addr network = ipv6_addr_bitand(&prefix, &mask);
3000 prefix_s = xmalloc(INET6_ADDRSTRLEN);
3001 inet_ntop(AF_INET6, &network, prefix_s, INET6_ADDRSTRLEN);
28dc3fe9
SR
3002 }
3003
3004 /* Find the outgoing port. */
3005 struct ovn_port *out_port = NULL;
3006 if (route->output_port) {
3007 out_port = ovn_port_find(ports, route->output_port);
3008 if (!out_port) {
3009 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3010 VLOG_WARN_RL(&rl, "Bad out port %s for static route %s",
3011 route->output_port, route->ip_prefix);
6fdb7cd6 3012 goto free_prefix_s;
28dc3fe9 3013 }
4685e523 3014 lrp_addr_s = find_lrp_member_ip(out_port, route->nexthop);
28dc3fe9
SR
3015 } else {
3016 /* output_port is not specified, find the
3017 * router port matching the next hop. */
3018 int i;
3019 for (i = 0; i < od->nbr->n_ports; i++) {
3020 struct nbrec_logical_router_port *lrp = od->nbr->ports[i];
3021 out_port = ovn_port_find(ports, lrp->name);
3022 if (!out_port) {
3023 /* This should not happen. */
3024 continue;
3025 }
3026
4685e523
JP
3027 lrp_addr_s = find_lrp_member_ip(out_port, route->nexthop);
3028 if (lrp_addr_s) {
28dc3fe9
SR
3029 break;
3030 }
3031 }
28dc3fe9
SR
3032 }
3033
4685e523
JP
3034 if (!lrp_addr_s) {
3035 /* There is no matched out port. */
3036 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3037 VLOG_WARN_RL(&rl, "No path for static route %s; next hop %s",
3038 route->ip_prefix, route->nexthop);
6fdb7cd6 3039 goto free_prefix_s;
4685e523
JP
3040 }
3041
6fdb7cd6
JP
3042 add_route(lflows, out_port, lrp_addr_s, prefix_s, plen, route->nexthop);
3043
3044free_prefix_s:
c9bdf7bd 3045 free(prefix_s);
28dc3fe9
SR
3046}
3047
4685e523 3048static void
6fdb7cd6 3049op_put_v4_networks(struct ds *ds, const struct ovn_port *op, bool add_bcast)
4685e523
JP
3050{
3051 if (!add_bcast && op->lrp_networks.n_ipv4_addrs == 1) {
3052 ds_put_format(ds, "%s", op->lrp_networks.ipv4_addrs[0].addr_s);
3053 return;
3054 }
3055
3056 ds_put_cstr(ds, "{");
3057 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
3058 ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].addr_s);
3059 if (add_bcast) {
3060 ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].bcast_s);
3061 }
3062 }
3063 ds_chomp(ds, ' ');
3064 ds_chomp(ds, ',');
3065 ds_put_cstr(ds, "}");
3066}
3067
6fdb7cd6
JP
3068static void
3069op_put_v6_networks(struct ds *ds, const struct ovn_port *op)
3070{
3071 if (op->lrp_networks.n_ipv6_addrs == 1) {
3072 ds_put_format(ds, "%s", op->lrp_networks.ipv6_addrs[0].addr_s);
3073 return;
3074 }
3075
3076 ds_put_cstr(ds, "{");
3077 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
3078 ds_put_format(ds, "%s, ", op->lrp_networks.ipv6_addrs[i].addr_s);
3079 }
3080 ds_chomp(ds, ' ');
3081 ds_chomp(ds, ',');
3082 ds_put_cstr(ds, "}");
3083}
3084
9975d7be
BP
3085static void
3086build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
3087 struct hmap *lflows)
3088{
3089 /* This flow table structure is documented in ovn-northd(8), so please
3090 * update ovn-northd.8.xml if you change anything. */
3091
09b39248
JP
3092 struct ds match = DS_EMPTY_INITIALIZER;
3093 struct ds actions = DS_EMPTY_INITIALIZER;
3094
9975d7be
BP
3095 /* Logical router ingress table 0: Admission control framework. */
3096 struct ovn_datapath *od;
3097 HMAP_FOR_EACH (od, key_node, datapaths) {
3098 if (!od->nbr) {
3099 continue;
3100 }
3101
3102 /* Logical VLANs not supported.
3103 * Broadcast/multicast source address is invalid. */
3104 ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100,
3105 "vlan.present || eth.src[40]", "drop;");
3106 }
3107
3108 /* Logical router ingress table 0: match (priority 50). */
3109 struct ovn_port *op;
3110 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 3111 if (!op->nbrp) {
9975d7be
BP
3112 continue;
3113 }
3114
0ee00741 3115 if (!lrport_is_enabled(op->nbrp)) {
9975d7be
BP
3116 /* Drop packets from disabled logical ports (since logical flow
3117 * tables are default-drop). */
3118 continue;
3119 }
3120
09b39248 3121 ds_clear(&match);
4685e523
JP
3122 ds_put_format(&match, "(eth.mcast || eth.dst == %s) && inport == %s",
3123 op->lrp_networks.ea_s, op->json_key);
9975d7be 3124 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
09b39248 3125 ds_cstr(&match), "next;");
9975d7be
BP
3126 }
3127
3128 /* Logical router ingress table 1: IP Input. */
78aab811 3129 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
3130 if (!od->nbr) {
3131 continue;
3132 }
3133
3134 /* L3 admission control: drop multicast and broadcast source, localhost
3135 * source or destination, and zero network source or destination
3136 * (priority 100). */
3137 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100,
3138 "ip4.mcast || "
3139 "ip4.src == 255.255.255.255 || "
3140 "ip4.src == 127.0.0.0/8 || "
3141 "ip4.dst == 127.0.0.0/8 || "
3142 "ip4.src == 0.0.0.0/8 || "
3143 "ip4.dst == 0.0.0.0/8",
3144 "drop;");
3145
0bac7164
BP
3146 /* ARP reply handling. Use ARP replies to populate the logical
3147 * router's ARP table. */
3148 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "arp.op == 2",
3149 "put_arp(inport, arp.spa, arp.sha);");
3150
9975d7be
BP
3151 /* Drop Ethernet local broadcast. By definition this traffic should
3152 * not be forwarded.*/
3153 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
3154 "eth.bcast", "drop;");
3155
9975d7be
BP
3156 /* TTL discard.
3157 *
3158 * XXX Need to send ICMP time exceeded if !ip.later_frag. */
09b39248
JP
3159 ds_clear(&match);
3160 ds_put_cstr(&match, "ip4 && ip.ttl == {0, 1}");
3161 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30,
3162 ds_cstr(&match), "drop;");
9975d7be 3163
c34a87b6
JP
3164 /* ND advertisement handling. Use advertisements to populate
3165 * the logical router's ARP/ND table. */
3166 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "nd_na",
3167 "put_nd(inport, nd.target, nd.tll);");
3168
3169 /* Lean from neighbor solicitations that were not directed at
3170 * us. (A priority-90 flow will respond to requests to us and
3171 * learn the sender's mac address. */
3172 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 80, "nd_ns",
3173 "put_nd(inport, ip6.src, nd.sll);");
3174
9975d7be
BP
3175 /* Pass other traffic not already handled to the next table for
3176 * routing. */
3177 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;");
78aab811
JP
3178 }
3179
6fdb7cd6 3180 /* Logical router ingress table 1: IP Input for IPv4. */
9975d7be 3181 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 3182 if (!op->nbrp) {
9975d7be
BP
3183 continue;
3184 }
3185
9975d7be 3186
6fdb7cd6
JP
3187 if (op->lrp_networks.n_ipv4_addrs) {
3188 /* L3 admission control: drop packets that originate from an
3189 * IPv4 address owned by the router or a broadcast address
3190 * known to the router (priority 100). */
3191 ds_clear(&match);
3192 ds_put_cstr(&match, "ip4.src == ");
3193 op_put_v4_networks(&match, op, true);
3194 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
3195 ds_cstr(&match), "drop;");
3196
3197 /* ICMP echo reply. These flows reply to ICMP echo requests
3198 * received for the router's IP address. Since packets only
3199 * get here as part of the logical router datapath, the inport
3200 * (i.e. the incoming locally attached net) does not matter.
3201 * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */
3202 ds_clear(&match);
3203 ds_put_cstr(&match, "ip4.dst == ");
3204 op_put_v4_networks(&match, op, false);
3205 ds_put_cstr(&match, " && icmp4.type == 8 && icmp4.code == 0");
3206
3207 ds_clear(&actions);
3208 ds_put_format(&actions,
3209 "ip4.dst <-> ip4.src; "
3210 "ip.ttl = 255; "
3211 "icmp4.type = 0; "
bf143492 3212 "flags.loopback = 1; "
6fdb7cd6
JP
3213 "next; ");
3214 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
3215 ds_cstr(&match), ds_cstr(&actions));
3216 }
dd7652e6 3217
9975d7be
BP
3218 /* ARP reply. These flows reply to ARP requests for the router's own
3219 * IP address. */
4685e523
JP
3220 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
3221 ds_clear(&match);
3222 ds_put_format(&match,
3223 "inport == %s && arp.tpa == %s && arp.op == 1",
3224 op->json_key, op->lrp_networks.ipv4_addrs[i].addr_s);
3225
3226 ds_clear(&actions);
3227 ds_put_format(&actions,
3228 "eth.dst = eth.src; "
3229 "eth.src = %s; "
3230 "arp.op = 2; /* ARP reply */ "
3231 "arp.tha = arp.sha; "
3232 "arp.sha = %s; "
3233 "arp.tpa = arp.spa; "
3234 "arp.spa = %s; "
3235 "outport = %s; "
bf143492 3236 "flags.loopback = 1; "
4685e523
JP
3237 "output;",
3238 op->lrp_networks.ea_s,
3239 op->lrp_networks.ea_s,
3240 op->lrp_networks.ipv4_addrs[i].addr_s,
3241 op->json_key);
3242 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
3243 ds_cstr(&match), ds_cstr(&actions));
3244 }
9975d7be 3245
dde5ea7b
GS
3246 ovs_be32 *snat_ips = xmalloc(sizeof *snat_ips * op->od->nbr->n_nat);
3247 size_t n_snat_ips = 0;
de297547
GS
3248 for (int i = 0; i < op->od->nbr->n_nat; i++) {
3249 const struct nbrec_nat *nat;
3250
3251 nat = op->od->nbr->nat[i];
3252
de297547
GS
3253 ovs_be32 ip;
3254 if (!ip_parse(nat->external_ip, &ip) || !ip) {
3255 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
dde5ea7b 3256 VLOG_WARN_RL(&rl, "bad ip address %s in nat configuration "
de297547
GS
3257 "for router %s", nat->external_ip, op->key);
3258 continue;
3259 }
3260
dde5ea7b
GS
3261 if (!strcmp(nat->type, "snat")) {
3262 snat_ips[n_snat_ips++] = ip;
3263 continue;
3264 }
3265
3266 /* ARP handling for external IP addresses.
3267 *
3268 * DNAT IP addresses are external IP addresses that need ARP
3269 * handling. */
09b39248
JP
3270 ds_clear(&match);
3271 ds_put_format(&match,
3272 "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1",
3273 op->json_key, IP_ARGS(ip));
4685e523 3274
09b39248
JP
3275 ds_clear(&actions);
3276 ds_put_format(&actions,
de297547 3277 "eth.dst = eth.src; "
4685e523 3278 "eth.src = %s; "
de297547
GS
3279 "arp.op = 2; /* ARP reply */ "
3280 "arp.tha = arp.sha; "
4685e523 3281 "arp.sha = %s; "
de297547
GS
3282 "arp.tpa = arp.spa; "
3283 "arp.spa = "IP_FMT"; "
3284 "outport = %s; "
bf143492 3285 "flags.loopback = 1; "
de297547 3286 "output;",
4685e523
JP
3287 op->lrp_networks.ea_s,
3288 op->lrp_networks.ea_s,
de297547
GS
3289 IP_ARGS(ip),
3290 op->json_key);
3291 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
09b39248 3292 ds_cstr(&match), ds_cstr(&actions));
de297547
GS
3293 }
3294
4685e523
JP
3295 ds_clear(&match);
3296 ds_put_cstr(&match, "ip4.dst == {");
3297 bool has_drop_ips = false;
3298 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
49da9ec0 3299 bool snat_ip_is_router_ip = false;
dde5ea7b
GS
3300 for (int j = 0; j < n_snat_ips; j++) {
3301 /* Packets to SNAT IPs should not be dropped. */
3302 if (op->lrp_networks.ipv4_addrs[i].addr == snat_ips[j]) {
49da9ec0
CSV
3303 snat_ip_is_router_ip = true;
3304 break;
4685e523 3305 }
4ef48e9d 3306 }
49da9ec0
CSV
3307 if (snat_ip_is_router_ip) {
3308 continue;
3309 }
4685e523
JP
3310 ds_put_format(&match, "%s, ",
3311 op->lrp_networks.ipv4_addrs[i].addr_s);
3312 has_drop_ips = true;
4ef48e9d 3313 }
4685e523
JP
3314 ds_chomp(&match, ' ');
3315 ds_chomp(&match, ',');
3316 ds_put_cstr(&match, "}");
4ef48e9d 3317
4685e523
JP
3318 if (has_drop_ips) {
3319 /* Drop IP traffic to this router. */
09b39248
JP
3320 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
3321 ds_cstr(&match), "drop;");
4ef48e9d 3322 }
4685e523 3323
dde5ea7b 3324 free(snat_ips);
9975d7be
BP
3325 }
3326
6fdb7cd6
JP
3327 /* Logical router ingress table 1: IP Input for IPv6. */
3328 HMAP_FOR_EACH (op, key_node, ports) {
3329 if (!op->nbrp) {
3330 continue;
3331 }
3332
3333 if (op->lrp_networks.n_ipv6_addrs) {
3334 /* L3 admission control: drop packets that originate from an
3335 * IPv6 address owned by the router (priority 100). */
3336 ds_clear(&match);
3337 ds_put_cstr(&match, "ip6.src == ");
3338 op_put_v6_networks(&match, op);
3339 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
3340 ds_cstr(&match), "drop;");
3341
3342 /* ICMPv6 echo reply. These flows reply to echo requests
3343 * received for the router's IP address. */
3344 ds_clear(&match);
3345 ds_put_cstr(&match, "ip6.dst == ");
3346 op_put_v6_networks(&match, op);
3347 ds_put_cstr(&match, " && icmp6.type == 128 && icmp6.code == 0");
3348
3349 ds_clear(&actions);
3350 ds_put_cstr(&actions,
3351 "ip6.dst <-> ip6.src; "
3352 "ip.ttl = 255; "
3353 "icmp6.type = 129; "
bf143492 3354 "flags.loopback = 1; "
6fdb7cd6
JP
3355 "next; ");
3356 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
3357 ds_cstr(&match), ds_cstr(&actions));
3358
3359 /* Drop IPv6 traffic to this router. */
3360 ds_clear(&match);
3361 ds_put_cstr(&match, "ip6.dst == ");
3362 op_put_v6_networks(&match, op);
3363 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
3364 ds_cstr(&match), "drop;");
3365 }
3366
3367 /* ND reply. These flows reply to ND solicitations for the
3368 * router's own IP address. */
3369 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
3370 ds_clear(&match);
3371 ds_put_format(&match,
3372 "inport == %s && nd_ns && ip6.dst == {%s, %s} "
3373 "&& nd.target == %s",
3374 op->json_key,
3375 op->lrp_networks.ipv6_addrs[i].addr_s,
3376 op->lrp_networks.ipv6_addrs[i].sn_addr_s,
3377 op->lrp_networks.ipv6_addrs[i].addr_s);
3378
3379 ds_clear(&actions);
3380 ds_put_format(&actions,
c34a87b6 3381 "put_nd(inport, ip6.src, nd.sll); "
6fdb7cd6
JP
3382 "nd_na { "
3383 "eth.src = %s; "
3384 "ip6.src = %s; "
3385 "nd.target = %s; "
3386 "nd.tll = %s; "
3387 "outport = inport; "
bf143492 3388 "flags.loopback = 1; "
6fdb7cd6
JP
3389 "output; "
3390 "};",
3391 op->lrp_networks.ea_s,
3392 op->lrp_networks.ipv6_addrs[i].addr_s,
3393 op->lrp_networks.ipv6_addrs[i].addr_s,
3394 op->lrp_networks.ea_s);
3395 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
3396 ds_cstr(&match), ds_cstr(&actions));
3397 }
3398 }
3399
de297547
GS
3400 /* NAT in Gateway routers. */
3401 HMAP_FOR_EACH (od, key_node, datapaths) {
3402 if (!od->nbr) {
3403 continue;
3404 }
3405
3406 /* Packets are allowed by default. */
3407 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;");
3408 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;");
3409 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;");
3410
3411 /* NAT rules are only valid on Gateway routers. */
3412 if (!smap_get(&od->nbr->options, "chassis")) {
3413 continue;
3414 }
3415
3416 for (int i = 0; i < od->nbr->n_nat; i++) {
3417 const struct nbrec_nat *nat;
3418
3419 nat = od->nbr->nat[i];
3420
3421 ovs_be32 ip, mask;
3422
3423 char *error = ip_parse_masked(nat->external_ip, &ip, &mask);
3424 if (error || mask != OVS_BE32_MAX) {
3425 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3426 VLOG_WARN_RL(&rl, "bad external ip %s for nat",
3427 nat->external_ip);
3428 free(error);
3429 continue;
3430 }
3431
3432 /* Check the validity of nat->logical_ip. 'logical_ip' can
3433 * be a subnet when the type is "snat". */
3434 error = ip_parse_masked(nat->logical_ip, &ip, &mask);
3435 if (!strcmp(nat->type, "snat")) {
3436 if (error) {
3437 static struct vlog_rate_limit rl =
3438 VLOG_RATE_LIMIT_INIT(5, 1);
3439 VLOG_WARN_RL(&rl, "bad ip network or ip %s for snat "
3440 "in router "UUID_FMT"",
3441 nat->logical_ip, UUID_ARGS(&od->key));
3442 free(error);
3443 continue;
3444 }
3445 } else {
3446 if (error || mask != OVS_BE32_MAX) {
3447 static struct vlog_rate_limit rl =
3448 VLOG_RATE_LIMIT_INIT(5, 1);
3449 VLOG_WARN_RL(&rl, "bad ip %s for dnat in router "
3450 ""UUID_FMT"", nat->logical_ip, UUID_ARGS(&od->key));
3451 free(error);
3452 continue;
3453 }
3454 }
3455
de297547
GS
3456 /* Ingress UNSNAT table: It is for already established connections'
3457 * reverse traffic. i.e., SNAT has already been done in egress
3458 * pipeline and now the packet has entered the ingress pipeline as
3459 * part of a reply. We undo the SNAT here.
3460 *
3461 * Undoing SNAT has to happen before DNAT processing. This is
3462 * because when the packet was DNATed in ingress pipeline, it did
3463 * not know about the possibility of eventual additional SNAT in
3464 * egress pipeline. */
3465 if (!strcmp(nat->type, "snat")
3466 || !strcmp(nat->type, "dnat_and_snat")) {
09b39248
JP
3467 ds_clear(&match);
3468 ds_put_format(&match, "ip && ip4.dst == %s", nat->external_ip);
de297547 3469 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 100,
09b39248 3470 ds_cstr(&match), "ct_snat; next;");
de297547
GS
3471 }
3472
3473 /* Ingress DNAT table: Packets enter the pipeline with destination
3474 * IP address that needs to be DNATted from a external IP address
3475 * to a logical IP address. */
3476 if (!strcmp(nat->type, "dnat")
3477 || !strcmp(nat->type, "dnat_and_snat")) {
3478 /* Packet when it goes from the initiator to destination.
3479 * We need to zero the inport because the router can
3480 * send the packet back through the same interface. */
09b39248
JP
3481 ds_clear(&match);
3482 ds_put_format(&match, "ip && ip4.dst == %s", nat->external_ip);
3483 ds_clear(&actions);
bf143492 3484 ds_put_format(&actions,"flags.loopback = 1; ct_dnat(%s);",
09b39248 3485 nat->logical_ip);
de297547 3486 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100,
09b39248 3487 ds_cstr(&match), ds_cstr(&actions));
de297547
GS
3488 }
3489
3490 /* Egress SNAT table: Packets enter the egress pipeline with
3491 * source ip address that needs to be SNATted to a external ip
3492 * address. */
3493 if (!strcmp(nat->type, "snat")
3494 || !strcmp(nat->type, "dnat_and_snat")) {
09b39248
JP
3495 ds_clear(&match);
3496 ds_put_format(&match, "ip && ip4.src == %s", nat->logical_ip);
3497 ds_clear(&actions);
3498 ds_put_format(&actions, "ct_snat(%s);", nat->external_ip);
de297547
GS
3499
3500 /* The priority here is calculated such that the
3501 * nat->logical_ip with the longest mask gets a higher
3502 * priority. */
3503 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT,
09b39248
JP
3504 count_1bits(ntohl(mask)) + 1,
3505 ds_cstr(&match), ds_cstr(&actions));
de297547
GS
3506 }
3507 }
3508
3509 /* Re-circulate every packet through the DNAT zone.
3510 * This helps with two things.
3511 *
3512 * 1. Any packet that needs to be unDNATed in the reverse
3513 * direction gets unDNATed. Ideally this could be done in
3514 * the egress pipeline. But since the gateway router
3515 * does not have any feature that depends on the source
3516 * ip address being external IP address for IP routing,
3517 * we can do it here, saving a future re-circulation.
3518 *
3519 * 2. Any packet that was sent through SNAT zone in the
3520 * previous table automatically gets re-circulated to get
3521 * back the new destination IP address that is needed for
3522 * routing in the openflow pipeline. */
3523 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50,
bf143492 3524 "ip", "flags.loopback = 1; ct_dnat;");
de297547
GS
3525 }
3526
94300e09 3527 /* Logical router ingress table 4: IP Routing.
9975d7be
BP
3528 *
3529 * A packet that arrives at this table is an IP packet that should be
6fdb7cd6
JP
3530 * routed to the address in 'ip[46].dst'. This table sets outport to
3531 * the correct output port, eth.src to the output port's MAC
3532 * address, and '[xx]reg0' to the next-hop IP address (leaving
3533 * 'ip[46].dst', the packet’s final destination, unchanged), and
3534 * advances to the next table for ARP/ND resolution. */
9975d7be 3535 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 3536 if (!op->nbrp) {
9975d7be
BP
3537 continue;
3538 }
3539
4685e523
JP
3540 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
3541 add_route(lflows, op, op->lrp_networks.ipv4_addrs[i].addr_s,
3542 op->lrp_networks.ipv4_addrs[i].network_s,
3543 op->lrp_networks.ipv4_addrs[i].plen, NULL);
3544 }
6fdb7cd6
JP
3545
3546 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
3547 add_route(lflows, op, op->lrp_networks.ipv6_addrs[i].addr_s,
3548 op->lrp_networks.ipv6_addrs[i].network_s,
3549 op->lrp_networks.ipv6_addrs[i].plen, NULL);
3550 }
9975d7be 3551 }
4685e523 3552
6fdb7cd6 3553 /* Convert the static routes to flows. */
9975d7be
BP
3554 HMAP_FOR_EACH (od, key_node, datapaths) {
3555 if (!od->nbr) {
3556 continue;
3557 }
3558
28dc3fe9
SR
3559 for (int i = 0; i < od->nbr->n_static_routes; i++) {
3560 const struct nbrec_logical_router_static_route *route;
3561
3562 route = od->nbr->static_routes[i];
3563 build_static_route_flow(lflows, od, ports, route);
3564 }
9975d7be 3565 }
6fdb7cd6 3566
9975d7be
BP
3567 /* XXX destination unreachable */
3568
94300e09 3569 /* Local router ingress table 5: ARP Resolution.
9975d7be
BP
3570 *
3571 * Any packet that reaches this table is an IP packet whose next-hop IP
3572 * address is in reg0. (ip4.dst is the final destination.) This table
3573 * resolves the IP address in reg0 into an output port in outport and an
3574 * Ethernet address in eth.dst. */
3575 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 3576 if (op->nbrp) {
6fdb7cd6
JP
3577 /* This is a logical router port. If next-hop IP address in
3578 * '[xx]reg0' matches IP address of this router port, then
3579 * the packet is intended to eventually be sent to this
3580 * logical port. Set the destination mac address using this
3581 * port's mac address.
509afdc3
GS
3582 *
3583 * The packet is still in peer's logical pipeline. So the match
3584 * should be on peer's outport. */
6fdb7cd6
JP
3585 if (op->peer && op->nbrp->peer) {
3586 if (op->lrp_networks.n_ipv4_addrs) {
3587 ds_clear(&match);
3588 ds_put_format(&match, "outport == %s && reg0 == ",
3589 op->peer->json_key);
3590 op_put_v4_networks(&match, op, false);
3591
3592 ds_clear(&actions);
3593 ds_put_format(&actions, "eth.dst = %s; next;",
3594 op->lrp_networks.ea_s);
3595 ovn_lflow_add(lflows, op->peer->od, S_ROUTER_IN_ARP_RESOLVE,
3596 100, ds_cstr(&match), ds_cstr(&actions));
3597 }
4685e523 3598
6fdb7cd6
JP
3599 if (op->lrp_networks.n_ipv6_addrs) {
3600 ds_clear(&match);
3601 ds_put_format(&match, "outport == %s && xxreg0 == ",
3602 op->peer->json_key);
3603 op_put_v6_networks(&match, op);
3604
3605 ds_clear(&actions);
3606 ds_put_format(&actions, "eth.dst = %s; next;",
3607 op->lrp_networks.ea_s);
3608 ovn_lflow_add(lflows, op->peer->od, S_ROUTER_IN_ARP_RESOLVE,
3609 100, ds_cstr(&match), ds_cstr(&actions));
3610 }
509afdc3 3611 }
0ee00741 3612 } else if (op->od->n_router_ports && strcmp(op->nbsp->type, "router")) {
75cf9d2b
GS
3613 /* This is a logical switch port that backs a VM or a container.
3614 * Extract its addresses. For each of the address, go through all
3615 * the router ports attached to the switch (to which this port
3616 * connects) and if the address in question is reachable from the
6fdb7cd6 3617 * router port, add an ARP/ND entry in that router's pipeline. */
75cf9d2b 3618
e93b43d6 3619 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
4685e523 3620 const char *ea_s = op->lsp_addrs[i].ea_s;
e93b43d6 3621 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
4685e523 3622 const char *ip_s = op->lsp_addrs[i].ipv4_addrs[j].addr_s;
e93b43d6 3623 for (size_t k = 0; k < op->od->n_router_ports; k++) {
80f408f4
JP
3624 /* Get the Logical_Router_Port that the
3625 * Logical_Switch_Port is connected to, as
3626 * 'peer'. */
86e98048 3627 const char *peer_name = smap_get(
0ee00741 3628 &op->od->router_ports[k]->nbsp->options,
86e98048
BP
3629 "router-port");
3630 if (!peer_name) {
3631 continue;
3632 }
3633
e93b43d6 3634 struct ovn_port *peer = ovn_port_find(ports, peer_name);
0ee00741 3635 if (!peer || !peer->nbrp) {
86e98048
BP
3636 continue;
3637 }
3638
4685e523 3639 if (!find_lrp_member_ip(peer, ip_s)) {
86e98048
BP
3640 continue;
3641 }
3642
09b39248 3643 ds_clear(&match);
e93b43d6 3644 ds_put_format(&match, "outport == %s && reg0 == %s",
4685e523
JP
3645 peer->json_key, ip_s);
3646
09b39248 3647 ds_clear(&actions);
4685e523 3648 ds_put_format(&actions, "eth.dst = %s; next;", ea_s);
86e98048 3649 ovn_lflow_add(lflows, peer->od,
09b39248
JP
3650 S_ROUTER_IN_ARP_RESOLVE, 100,
3651 ds_cstr(&match), ds_cstr(&actions));
86e98048 3652 }
9975d7be 3653 }
6fdb7cd6
JP
3654
3655 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
3656 const char *ip_s = op->lsp_addrs[i].ipv6_addrs[j].addr_s;
3657 for (size_t k = 0; k < op->od->n_router_ports; k++) {
3658 /* Get the Logical_Router_Port that the
3659 * Logical_Switch_Port is connected to, as
3660 * 'peer'. */
3661 const char *peer_name = smap_get(
3662 &op->od->router_ports[k]->nbsp->options,
3663 "router-port");
3664 if (!peer_name) {
3665 continue;
3666 }
3667
3668 struct ovn_port *peer = ovn_port_find(ports, peer_name);
3669 if (!peer || !peer->nbrp) {
3670 continue;
3671 }
3672
3673 if (!find_lrp_member_ip(peer, ip_s)) {
3674 continue;
3675 }
3676
3677 ds_clear(&match);
3678 ds_put_format(&match, "outport == %s && xxreg0 == %s",
3679 peer->json_key, ip_s);
3680
3681 ds_clear(&actions);
3682 ds_put_format(&actions, "eth.dst = %s; next;", ea_s);
3683 ovn_lflow_add(lflows, peer->od,
3684 S_ROUTER_IN_ARP_RESOLVE, 100,
3685 ds_cstr(&match), ds_cstr(&actions));
3686 }
3687 }
9975d7be 3688 }
0ee00741 3689 } else if (!strcmp(op->nbsp->type, "router")) {
75cf9d2b
GS
3690 /* This is a logical switch port that connects to a router. */
3691
3692 /* The peer of this switch port is the router port for which
3693 * we need to add logical flows such that it can resolve
3694 * ARP entries for all the other router ports connected to
3695 * the switch in question. */
3696
0ee00741 3697 const char *peer_name = smap_get(&op->nbsp->options,
75cf9d2b
GS
3698 "router-port");
3699 if (!peer_name) {
3700 continue;
3701 }
3702
3703 struct ovn_port *peer = ovn_port_find(ports, peer_name);
0ee00741 3704 if (!peer || !peer->nbrp) {
75cf9d2b
GS
3705 continue;
3706 }
3707
4685e523 3708 for (size_t i = 0; i < op->od->n_router_ports; i++) {
75cf9d2b 3709 const char *router_port_name = smap_get(
0ee00741 3710 &op->od->router_ports[i]->nbsp->options,
75cf9d2b
GS
3711 "router-port");
3712 struct ovn_port *router_port = ovn_port_find(ports,
3713 router_port_name);
0ee00741 3714 if (!router_port || !router_port->nbrp) {
75cf9d2b
GS
3715 continue;
3716 }
3717
3718 /* Skip the router port under consideration. */
3719 if (router_port == peer) {
3720 continue;
3721 }
3722
6fdb7cd6
JP
3723 if (router_port->lrp_networks.n_ipv4_addrs) {
3724 ds_clear(&match);
3725 ds_put_format(&match, "outport == %s && reg0 == ",
3726 peer->json_key);
3727 op_put_v4_networks(&match, router_port, false);
3728
3729 ds_clear(&actions);
3730 ds_put_format(&actions, "eth.dst = %s; next;",
3731 router_port->lrp_networks.ea_s);
3732 ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE,
3733 100, ds_cstr(&match), ds_cstr(&actions));
3734 }
4685e523 3735
6fdb7cd6
JP
3736 if (router_port->lrp_networks.n_ipv6_addrs) {
3737 ds_clear(&match);
3738 ds_put_format(&match, "outport == %s && xxreg0 == ",
3739 peer->json_key);
3740 op_put_v6_networks(&match, router_port);
3741
3742 ds_clear(&actions);
3743 ds_put_format(&actions, "eth.dst = %s; next;",
3744 router_port->lrp_networks.ea_s);
3745 ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE,
3746 100, ds_cstr(&match), ds_cstr(&actions));
3747 }
75cf9d2b 3748 }
9975d7be
BP
3749 }
3750 }
75cf9d2b 3751
0bac7164
BP
3752 HMAP_FOR_EACH (od, key_node, datapaths) {
3753 if (!od->nbr) {
3754 continue;
3755 }
3756
3757 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "1",
3758 "get_arp(outport, reg0); next;");
c34a87b6
JP
3759 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip4",
3760 "get_arp(outport, reg0); next;");
3761
3762 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip6",
3763 "get_nd(outport, xxreg0); next;");
0bac7164
BP
3764 }
3765
94300e09 3766 /* Local router ingress table 6: ARP request.
0bac7164
BP
3767 *
3768 * In the common case where the Ethernet destination has been resolved,
94300e09
JP
3769 * this table outputs the packet (priority 0). Otherwise, it composes
3770 * and sends an ARP request (priority 100). */
0bac7164
BP
3771 HMAP_FOR_EACH (od, key_node, datapaths) {
3772 if (!od->nbr) {
3773 continue;
3774 }
3775
3776 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100,
3777 "eth.dst == 00:00:00:00:00:00",
3778 "arp { "
3779 "eth.dst = ff:ff:ff:ff:ff:ff; "
3780 "arp.spa = reg1; "
3781 "arp.op = 1; " /* ARP request */
3782 "output; "
3783 "};");
3784 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1", "output;");
3785 }
9975d7be 3786
de297547 3787 /* Logical router egress table 1: Delivery (priority 100).
9975d7be
BP
3788 *
3789 * Priority 100 rules deliver packets to enabled logical ports. */
3790 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 3791 if (!op->nbrp) {
9975d7be
BP
3792 continue;
3793 }
3794
0ee00741 3795 if (!lrport_is_enabled(op->nbrp)) {
9975d7be
BP
3796 /* Drop packets to disabled logical ports (since logical flow
3797 * tables are default-drop). */
3798 continue;
3799 }
3800
09b39248
JP
3801 ds_clear(&match);
3802 ds_put_format(&match, "outport == %s", op->json_key);
9975d7be 3803 ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100,
09b39248 3804 ds_cstr(&match), "output;");
9975d7be 3805 }
09b39248
JP
3806
3807 ds_destroy(&match);
3808 ds_destroy(&actions);
9975d7be
BP
3809}
3810
3811/* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
3812 * constructing their contents based on the OVN_NB database. */
3813static void
3814build_lflows(struct northd_context *ctx, struct hmap *datapaths,
3815 struct hmap *ports)
3816{
3817 struct hmap lflows = HMAP_INITIALIZER(&lflows);
3818 struct hmap mcgroups = HMAP_INITIALIZER(&mcgroups);
3819
3820 build_lswitch_flows(datapaths, ports, &lflows, &mcgroups);
3821 build_lrouter_flows(datapaths, ports, &lflows);
3822
5868eb24
BP
3823 /* Push changes to the Logical_Flow table to database. */
3824 const struct sbrec_logical_flow *sbflow, *next_sbflow;
3825 SBREC_LOGICAL_FLOW_FOR_EACH_SAFE (sbflow, next_sbflow, ctx->ovnsb_idl) {
3826 struct ovn_datapath *od
3827 = ovn_datapath_from_sbrec(datapaths, sbflow->logical_datapath);
3828 if (!od) {
3829 sbrec_logical_flow_delete(sbflow);
3830 continue;
eb00399e 3831 }
eb00399e 3832
9975d7be 3833 enum ovn_datapath_type dp_type = od->nbs ? DP_SWITCH : DP_ROUTER;
880fcd14
BP
3834 enum ovn_pipeline pipeline
3835 = !strcmp(sbflow->pipeline, "ingress") ? P_IN : P_OUT;
5868eb24 3836 struct ovn_lflow *lflow = ovn_lflow_find(
880fcd14
BP
3837 &lflows, od, ovn_stage_build(dp_type, pipeline, sbflow->table_id),
3838 sbflow->priority, sbflow->match, sbflow->actions);
5868eb24
BP
3839 if (lflow) {
3840 ovn_lflow_destroy(&lflows, lflow);
3841 } else {
3842 sbrec_logical_flow_delete(sbflow);
4edcdcf4
RB
3843 }
3844 }
5868eb24
BP
3845 struct ovn_lflow *lflow, *next_lflow;
3846 HMAP_FOR_EACH_SAFE (lflow, next_lflow, hmap_node, &lflows) {
880fcd14
BP
3847 enum ovn_pipeline pipeline = ovn_stage_get_pipeline(lflow->stage);
3848 uint8_t table = ovn_stage_get_table(lflow->stage);
3849
5868eb24
BP
3850 sbflow = sbrec_logical_flow_insert(ctx->ovnsb_txn);
3851 sbrec_logical_flow_set_logical_datapath(sbflow, lflow->od->sb);
9975d7be
BP
3852 sbrec_logical_flow_set_pipeline(
3853 sbflow, pipeline == P_IN ? "ingress" : "egress");
880fcd14 3854 sbrec_logical_flow_set_table_id(sbflow, table);
5868eb24
BP
3855 sbrec_logical_flow_set_priority(sbflow, lflow->priority);
3856 sbrec_logical_flow_set_match(sbflow, lflow->match);
3857 sbrec_logical_flow_set_actions(sbflow, lflow->actions);
091e3af9 3858
880fcd14
BP
3859 const struct smap ids = SMAP_CONST1(&ids, "stage-name",
3860 ovn_stage_to_str(lflow->stage));
aaf881c6 3861 sbrec_logical_flow_set_external_ids(sbflow, &ids);
091e3af9 3862
5868eb24 3863 ovn_lflow_destroy(&lflows, lflow);
eb00399e 3864 }
5868eb24
BP
3865 hmap_destroy(&lflows);
3866
3867 /* Push changes to the Multicast_Group table to database. */
3868 const struct sbrec_multicast_group *sbmc, *next_sbmc;
3869 SBREC_MULTICAST_GROUP_FOR_EACH_SAFE (sbmc, next_sbmc, ctx->ovnsb_idl) {
3870 struct ovn_datapath *od = ovn_datapath_from_sbrec(datapaths,
3871 sbmc->datapath);
3872 if (!od) {
3873 sbrec_multicast_group_delete(sbmc);
3874 continue;
3875 }
eb00399e 3876
5868eb24
BP
3877 struct multicast_group group = { .name = sbmc->name,
3878 .key = sbmc->tunnel_key };
3879 struct ovn_multicast *mc = ovn_multicast_find(&mcgroups, od, &group);
3880 if (mc) {
3881 ovn_multicast_update_sbrec(mc, sbmc);
3882 ovn_multicast_destroy(&mcgroups, mc);
3883 } else {
3884 sbrec_multicast_group_delete(sbmc);
3885 }
3886 }
3887 struct ovn_multicast *mc, *next_mc;
3888 HMAP_FOR_EACH_SAFE (mc, next_mc, hmap_node, &mcgroups) {
3889 sbmc = sbrec_multicast_group_insert(ctx->ovnsb_txn);
3890 sbrec_multicast_group_set_datapath(sbmc, mc->datapath->sb);
3891 sbrec_multicast_group_set_name(sbmc, mc->group->name);
3892 sbrec_multicast_group_set_tunnel_key(sbmc, mc->group->key);
3893 ovn_multicast_update_sbrec(mc, sbmc);
3894 ovn_multicast_destroy(&mcgroups, mc);
4edcdcf4 3895 }
5868eb24 3896 hmap_destroy(&mcgroups);
4edcdcf4 3897}
ea382567
RB
3898
3899/* OVN_Northbound and OVN_Southbound have an identical Address_Set table.
3900 * We always update OVN_Southbound to match the current data in
3901 * OVN_Northbound, so that the address sets used in Logical_Flows in
3902 * OVN_Southbound is checked against the proper set.*/
3903static void
3904sync_address_sets(struct northd_context *ctx)
3905{
3906 struct shash sb_address_sets = SHASH_INITIALIZER(&sb_address_sets);
3907
3908 const struct sbrec_address_set *sb_address_set;
3909 SBREC_ADDRESS_SET_FOR_EACH (sb_address_set, ctx->ovnsb_idl) {
3910 shash_add(&sb_address_sets, sb_address_set->name, sb_address_set);
3911 }
3912
3913 const struct nbrec_address_set *nb_address_set;
3914 NBREC_ADDRESS_SET_FOR_EACH (nb_address_set, ctx->ovnnb_idl) {
3915 sb_address_set = shash_find_and_delete(&sb_address_sets,
3916 nb_address_set->name);
3917 if (!sb_address_set) {
3918 sb_address_set = sbrec_address_set_insert(ctx->ovnsb_txn);
3919 sbrec_address_set_set_name(sb_address_set, nb_address_set->name);
3920 }
3921
3922 sbrec_address_set_set_addresses(sb_address_set,
3923 /* "char **" is not compatible with "const char **" */
3924 (const char **) nb_address_set->addresses,
3925 nb_address_set->n_addresses);
3926 }
3927
3928 struct shash_node *node, *next;
3929 SHASH_FOR_EACH_SAFE (node, next, &sb_address_sets) {
3930 sbrec_address_set_delete(node->data);
3931 shash_delete(&sb_address_sets, node);
3932 }
3933 shash_destroy(&sb_address_sets);
3934}
5868eb24 3935\f
4edcdcf4 3936static void
fa183acc 3937ovnnb_db_run(struct northd_context *ctx, struct ovsdb_idl_loop *sb_loop)
4edcdcf4 3938{
fa183acc 3939 if (!ctx->ovnsb_txn || !ovsdb_idl_has_ever_connected(ctx->ovnnb_idl)) {
331e7aef
NS
3940 return;
3941 }
5868eb24
BP
3942 struct hmap datapaths, ports;
3943 build_datapaths(ctx, &datapaths);
3944 build_ports(ctx, &datapaths, &ports);
8639f9be 3945 build_ipam(ctx, &datapaths, &ports);
5868eb24
BP
3946 build_lflows(ctx, &datapaths, &ports);
3947
ea382567
RB
3948 sync_address_sets(ctx);
3949
5868eb24
BP
3950 struct ovn_datapath *dp, *next_dp;
3951 HMAP_FOR_EACH_SAFE (dp, next_dp, key_node, &datapaths) {
3952 ovn_datapath_destroy(&datapaths, dp);
3953 }
3954 hmap_destroy(&datapaths);
3955
3956 struct ovn_port *port, *next_port;
3957 HMAP_FOR_EACH_SAFE (port, next_port, key_node, &ports) {
3958 ovn_port_destroy(&ports, port);
3959 }
3960 hmap_destroy(&ports);
fa183acc
BP
3961
3962 /* Copy nb_cfg from northbound to southbound database.
3963 *
3964 * Also set up to update sb_cfg once our southbound transaction commits. */
3965 const struct nbrec_nb_global *nb = nbrec_nb_global_first(ctx->ovnnb_idl);
3966 const struct sbrec_sb_global *sb = sbrec_sb_global_first(ctx->ovnsb_idl);
3967 if (nb && sb) {
3968 sbrec_sb_global_set_nb_cfg(sb, nb->nb_cfg);
3969 sb_loop->next_cfg = nb->nb_cfg;
3970 }
8639f9be
ND
3971
3972 cleanup_macam(&macam);
ac0630a2
RB
3973}
3974
fa183acc
BP
3975/* Handle changes to the 'chassis' column of the 'Port_Binding' table. When
3976 * this column is not empty, it means we need to set the corresponding logical
3977 * port as 'up' in the northbound DB. */
ac0630a2 3978static void
fa183acc 3979update_logical_port_status(struct northd_context *ctx)
ac0630a2 3980{
fc3113bc 3981 struct hmap lports_hmap;
5868eb24 3982 const struct sbrec_port_binding *sb;
0ee00741 3983 const struct nbrec_logical_switch_port *nbsp;
fc3113bc
RB
3984
3985 struct lport_hash_node {
3986 struct hmap_node node;
0ee00741 3987 const struct nbrec_logical_switch_port *nbsp;
4ec3d7c7 3988 } *hash_node;
f93818dd 3989
fc3113bc 3990 hmap_init(&lports_hmap);
f93818dd 3991
0ee00741 3992 NBREC_LOGICAL_SWITCH_PORT_FOR_EACH(nbsp, ctx->ovnnb_idl) {
fc3113bc 3993 hash_node = xzalloc(sizeof *hash_node);
0ee00741
HK
3994 hash_node->nbsp = nbsp;
3995 hmap_insert(&lports_hmap, &hash_node->node, hash_string(nbsp->name, 0));
fc3113bc
RB
3996 }
3997
5868eb24 3998 SBREC_PORT_BINDING_FOR_EACH(sb, ctx->ovnsb_idl) {
0ee00741 3999 nbsp = NULL;
fc3113bc 4000 HMAP_FOR_EACH_WITH_HASH(hash_node, node,
5868eb24
BP
4001 hash_string(sb->logical_port, 0),
4002 &lports_hmap) {
0ee00741
HK
4003 if (!strcmp(sb->logical_port, hash_node->nbsp->name)) {
4004 nbsp = hash_node->nbsp;
fc3113bc
RB
4005 break;
4006 }
f93818dd
RB
4007 }
4008
0ee00741 4009 if (!nbsp) {
dcda6e0d 4010 /* The logical port doesn't exist for this port binding. This can
2e2762d4 4011 * happen under normal circumstances when ovn-northd hasn't gotten
dcda6e0d 4012 * around to pruning the Port_Binding yet. */
f93818dd
RB
4013 continue;
4014 }
4015
0ee00741 4016 if (sb->chassis && (!nbsp->up || !*nbsp->up)) {
f93818dd 4017 bool up = true;
0ee00741
HK
4018 nbrec_logical_switch_port_set_up(nbsp, &up, 1);
4019 } else if (!sb->chassis && (!nbsp->up || *nbsp->up)) {
f93818dd 4020 bool up = false;
0ee00741 4021 nbrec_logical_switch_port_set_up(nbsp, &up, 1);
f93818dd
RB
4022 }
4023 }
fc3113bc 4024
4ec3d7c7 4025 HMAP_FOR_EACH_POP(hash_node, node, &lports_hmap) {
fc3113bc
RB
4026 free(hash_node);
4027 }
4028 hmap_destroy(&lports_hmap);
ac0630a2 4029}
45f98d4c 4030
281977f7
NS
4031static struct dhcp_opts_map supported_dhcp_opts[] = {
4032 OFFERIP,
4033 DHCP_OPT_NETMASK,
4034 DHCP_OPT_ROUTER,
4035 DHCP_OPT_DNS_SERVER,
4036 DHCP_OPT_LOG_SERVER,
4037 DHCP_OPT_LPR_SERVER,
4038 DHCP_OPT_SWAP_SERVER,
4039 DHCP_OPT_POLICY_FILTER,
4040 DHCP_OPT_ROUTER_SOLICITATION,
4041 DHCP_OPT_NIS_SERVER,
4042 DHCP_OPT_NTP_SERVER,
4043 DHCP_OPT_SERVER_ID,
4044 DHCP_OPT_TFTP_SERVER,
4045 DHCP_OPT_CLASSLESS_STATIC_ROUTE,
4046 DHCP_OPT_MS_CLASSLESS_STATIC_ROUTE,
4047 DHCP_OPT_IP_FORWARD_ENABLE,
4048 DHCP_OPT_ROUTER_DISCOVERY,
4049 DHCP_OPT_ETHERNET_ENCAP,
4050 DHCP_OPT_DEFAULT_TTL,
4051 DHCP_OPT_TCP_TTL,
4052 DHCP_OPT_MTU,
4053 DHCP_OPT_LEASE_TIME,
4054 DHCP_OPT_T1,
4055 DHCP_OPT_T2
4056};
4057
33ac3c83
NS
4058static struct dhcp_opts_map supported_dhcpv6_opts[] = {
4059 DHCPV6_OPT_IA_ADDR,
4060 DHCPV6_OPT_SERVER_ID,
4061 DHCPV6_OPT_DOMAIN_SEARCH,
4062 DHCPV6_OPT_DNS_SERVER
4063};
4064
281977f7
NS
4065static void
4066check_and_add_supported_dhcp_opts_to_sb_db(struct northd_context *ctx)
4067{
4068 struct hmap dhcp_opts_to_add = HMAP_INITIALIZER(&dhcp_opts_to_add);
4069 for (size_t i = 0; (i < sizeof(supported_dhcp_opts) /
4070 sizeof(supported_dhcp_opts[0])); i++) {
4071 hmap_insert(&dhcp_opts_to_add, &supported_dhcp_opts[i].hmap_node,
4072 dhcp_opt_hash(supported_dhcp_opts[i].name));
4073 }
4074
4075 const struct sbrec_dhcp_options *opt_row, *opt_row_next;
4076 SBREC_DHCP_OPTIONS_FOR_EACH_SAFE(opt_row, opt_row_next, ctx->ovnsb_idl) {
4077 struct dhcp_opts_map *dhcp_opt =
4078 dhcp_opts_find(&dhcp_opts_to_add, opt_row->name);
4079 if (dhcp_opt) {
4080 hmap_remove(&dhcp_opts_to_add, &dhcp_opt->hmap_node);
4081 } else {
4082 sbrec_dhcp_options_delete(opt_row);
4083 }
4084 }
4085
4086 struct dhcp_opts_map *opt;
4087 HMAP_FOR_EACH (opt, hmap_node, &dhcp_opts_to_add) {
4088 struct sbrec_dhcp_options *sbrec_dhcp_option =
4089 sbrec_dhcp_options_insert(ctx->ovnsb_txn);
4090 sbrec_dhcp_options_set_name(sbrec_dhcp_option, opt->name);
4091 sbrec_dhcp_options_set_code(sbrec_dhcp_option, opt->code);
4092 sbrec_dhcp_options_set_type(sbrec_dhcp_option, opt->type);
4093 }
4094
4095 hmap_destroy(&dhcp_opts_to_add);
4096}
4097
33ac3c83
NS
4098static void
4099check_and_add_supported_dhcpv6_opts_to_sb_db(struct northd_context *ctx)
4100{
4101 struct hmap dhcpv6_opts_to_add = HMAP_INITIALIZER(&dhcpv6_opts_to_add);
4102 for (size_t i = 0; (i < sizeof(supported_dhcpv6_opts) /
4103 sizeof(supported_dhcpv6_opts[0])); i++) {
4104 hmap_insert(&dhcpv6_opts_to_add, &supported_dhcpv6_opts[i].hmap_node,
4105 dhcp_opt_hash(supported_dhcpv6_opts[i].name));
4106 }
4107
4108 const struct sbrec_dhcpv6_options *opt_row, *opt_row_next;
4109 SBREC_DHCPV6_OPTIONS_FOR_EACH_SAFE(opt_row, opt_row_next, ctx->ovnsb_idl) {
4110 struct dhcp_opts_map *dhcp_opt =
4111 dhcp_opts_find(&dhcpv6_opts_to_add, opt_row->name);
4112 if (dhcp_opt) {
4113 hmap_remove(&dhcpv6_opts_to_add, &dhcp_opt->hmap_node);
4114 } else {
4115 sbrec_dhcpv6_options_delete(opt_row);
4116 }
4117 }
4118
4119 struct dhcp_opts_map *opt;
4120 HMAP_FOR_EACH(opt, hmap_node, &dhcpv6_opts_to_add) {
4121 struct sbrec_dhcpv6_options *sbrec_dhcpv6_option =
4122 sbrec_dhcpv6_options_insert(ctx->ovnsb_txn);
4123 sbrec_dhcpv6_options_set_name(sbrec_dhcpv6_option, opt->name);
4124 sbrec_dhcpv6_options_set_code(sbrec_dhcpv6_option, opt->code);
4125 sbrec_dhcpv6_options_set_type(sbrec_dhcpv6_option, opt->type);
4126 }
4127
4128 hmap_destroy(&dhcpv6_opts_to_add);
4129}
4130
fa183acc
BP
4131/* Updates the sb_cfg and hv_cfg columns in the northbound NB_Global table. */
4132static void
4133update_northbound_cfg(struct northd_context *ctx,
4134 struct ovsdb_idl_loop *sb_loop)
4135{
4136 /* Update northbound sb_cfg if appropriate. */
4137 const struct nbrec_nb_global *nbg = nbrec_nb_global_first(ctx->ovnnb_idl);
4138 int64_t sb_cfg = sb_loop->cur_cfg;
4139 if (nbg && sb_cfg && nbg->sb_cfg != sb_cfg) {
4140 nbrec_nb_global_set_sb_cfg(nbg, sb_cfg);
4141 }
4142
4143 /* Update northbound hv_cfg if appropriate. */
4144 if (nbg) {
4145 /* Find minimum nb_cfg among all chassis. */
4146 const struct sbrec_chassis *chassis;
4147 int64_t hv_cfg = nbg->nb_cfg;
4148 SBREC_CHASSIS_FOR_EACH (chassis, ctx->ovnsb_idl) {
4149 if (chassis->nb_cfg < hv_cfg) {
4150 hv_cfg = chassis->nb_cfg;
4151 }
4152 }
4153
4154 /* Update hv_cfg. */
4155 if (nbg->hv_cfg != hv_cfg) {
4156 nbrec_nb_global_set_hv_cfg(nbg, hv_cfg);
4157 }
4158 }
4159}
4160
4161/* Handle a fairly small set of changes in the southbound database. */
4162static void
4163ovnsb_db_run(struct northd_context *ctx, struct ovsdb_idl_loop *sb_loop)
4164{
4165 if (!ctx->ovnnb_txn || !ovsdb_idl_has_ever_connected(ctx->ovnsb_idl)) {
4166 return;
4167 }
4168
4169 update_logical_port_status(ctx);
4170 update_northbound_cfg(ctx, sb_loop);
4171}
4172\f
ac0630a2
RB
4173static void
4174parse_options(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
4175{
4176 enum {
67d9b930 4177 DAEMON_OPTION_ENUMS,
ac0630a2
RB
4178 VLOG_OPTION_ENUMS,
4179 };
4180 static const struct option long_options[] = {
ec78987f 4181 {"ovnsb-db", required_argument, NULL, 'd'},
ac0630a2
RB
4182 {"ovnnb-db", required_argument, NULL, 'D'},
4183 {"help", no_argument, NULL, 'h'},
4184 {"options", no_argument, NULL, 'o'},
4185 {"version", no_argument, NULL, 'V'},
67d9b930 4186 DAEMON_LONG_OPTIONS,
ac0630a2
RB
4187 VLOG_LONG_OPTIONS,
4188 STREAM_SSL_LONG_OPTIONS,
4189 {NULL, 0, NULL, 0},
4190 };
4191 char *short_options = ovs_cmdl_long_options_to_short_options(long_options);
4192
4193 for (;;) {
4194 int c;
4195
4196 c = getopt_long(argc, argv, short_options, long_options, NULL);
4197 if (c == -1) {
4198 break;
4199 }
4200
4201 switch (c) {
67d9b930 4202 DAEMON_OPTION_HANDLERS;
ac0630a2
RB
4203 VLOG_OPTION_HANDLERS;
4204 STREAM_SSL_OPTION_HANDLERS;
4205
4206 case 'd':
ec78987f 4207 ovnsb_db = optarg;
ac0630a2
RB
4208 break;
4209
4210 case 'D':
4211 ovnnb_db = optarg;
4212 break;
4213
4214 case 'h':
4215 usage();
4216 exit(EXIT_SUCCESS);
4217
4218 case 'o':
4219 ovs_cmdl_print_options(long_options);
4220 exit(EXIT_SUCCESS);
4221
4222 case 'V':
4223 ovs_print_version(0, 0);
4224 exit(EXIT_SUCCESS);
4225
4226 default:
4227 break;
4228 }
4229 }
4230
ec78987f 4231 if (!ovnsb_db) {
60bdd011 4232 ovnsb_db = default_sb_db();
ac0630a2
RB
4233 }
4234
4235 if (!ovnnb_db) {
60bdd011 4236 ovnnb_db = default_nb_db();
ac0630a2
RB
4237 }
4238
4239 free(short_options);
4240}
4241
5868eb24
BP
4242static void
4243add_column_noalert(struct ovsdb_idl *idl,
4244 const struct ovsdb_idl_column *column)
4245{
4246 ovsdb_idl_add_column(idl, column);
4247 ovsdb_idl_omit_alert(idl, column);
4248}
4249
ac0630a2
RB
4250int
4251main(int argc, char *argv[])
4252{
ac0630a2 4253 int res = EXIT_SUCCESS;
7b303ff9
AW
4254 struct unixctl_server *unixctl;
4255 int retval;
4256 bool exiting;
ac0630a2
RB
4257
4258 fatal_ignore_sigpipe();
4259 set_program_name(argv[0]);
485f0696 4260 service_start(&argc, &argv);
ac0630a2 4261 parse_options(argc, argv);
67d9b930 4262
e91b927d 4263 daemonize_start(false);
7b303ff9
AW
4264
4265 retval = unixctl_server_create(NULL, &unixctl);
4266 if (retval) {
4267 exit(EXIT_FAILURE);
4268 }
4269 unixctl_command_register("exit", "", 0, 0, ovn_northd_exit, &exiting);
4270
4271 daemonize_complete();
67d9b930 4272
ac0630a2 4273 nbrec_init();
ec78987f 4274 sbrec_init();
ac0630a2 4275
fa183acc 4276 /* We want to detect (almost) all changes to the ovn-nb db. */
331e7aef
NS
4277 struct ovsdb_idl_loop ovnnb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
4278 ovsdb_idl_create(ovnnb_db, &nbrec_idl_class, true, true));
fa183acc
BP
4279 ovsdb_idl_omit_alert(ovnnb_idl_loop.idl, &nbrec_nb_global_col_sb_cfg);
4280 ovsdb_idl_omit_alert(ovnnb_idl_loop.idl, &nbrec_nb_global_col_hv_cfg);
331e7aef 4281
fa183acc 4282 /* We want to detect only selected changes to the ovn-sb db. */
331e7aef
NS
4283 struct ovsdb_idl_loop ovnsb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
4284 ovsdb_idl_create(ovnsb_db, &sbrec_idl_class, false, true));
4285
fa183acc
BP
4286 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_sb_global);
4287 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_sb_global_col_nb_cfg);
4288
331e7aef
NS
4289 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_logical_flow);
4290 add_column_noalert(ovnsb_idl_loop.idl,
4291 &sbrec_logical_flow_col_logical_datapath);
4292 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_pipeline);
4293 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_table_id);
4294 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_priority);
4295 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_match);
4296 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_actions);
4297
4298 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_multicast_group);
4299 add_column_noalert(ovnsb_idl_loop.idl,
4300 &sbrec_multicast_group_col_datapath);
4301 add_column_noalert(ovnsb_idl_loop.idl,
4302 &sbrec_multicast_group_col_tunnel_key);
4303 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_name);
4304 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_ports);
4305
4306 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_datapath_binding);
4307 add_column_noalert(ovnsb_idl_loop.idl,
4308 &sbrec_datapath_binding_col_tunnel_key);
4309 add_column_noalert(ovnsb_idl_loop.idl,
4310 &sbrec_datapath_binding_col_external_ids);
4311
4312 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_port_binding);
4313 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_datapath);
4314 add_column_noalert(ovnsb_idl_loop.idl,
4315 &sbrec_port_binding_col_logical_port);
4316 add_column_noalert(ovnsb_idl_loop.idl,
4317 &sbrec_port_binding_col_tunnel_key);
4318 add_column_noalert(ovnsb_idl_loop.idl,
4319 &sbrec_port_binding_col_parent_port);
4320 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_tag);
4321 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_type);
4322 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_options);
4323 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_mac);
4324 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_port_binding_col_chassis);
281977f7
NS
4325 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_dhcp_options);
4326 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_code);
4327 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_type);
4328 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_name);
33ac3c83
NS
4329 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_dhcpv6_options);
4330 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_code);
4331 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_type);
4332 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_name);
ea382567
RB
4333 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_address_set);
4334 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_name);
4335 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_addresses);
4336
fa183acc
BP
4337 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_chassis);
4338 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_nb_cfg);
4339
331e7aef 4340 /* Main loop. */
7b303ff9
AW
4341 exiting = false;
4342 while (!exiting) {
331e7aef
NS
4343 struct northd_context ctx = {
4344 .ovnnb_idl = ovnnb_idl_loop.idl,
4345 .ovnnb_txn = ovsdb_idl_loop_run(&ovnnb_idl_loop),
4346 .ovnsb_idl = ovnsb_idl_loop.idl,
4347 .ovnsb_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop),
4348 };
ac0630a2 4349
fa183acc
BP
4350 ovnnb_db_run(&ctx, &ovnsb_idl_loop);
4351 ovnsb_db_run(&ctx, &ovnsb_idl_loop);
281977f7
NS
4352 if (ctx.ovnsb_txn) {
4353 check_and_add_supported_dhcp_opts_to_sb_db(&ctx);
33ac3c83 4354 check_and_add_supported_dhcpv6_opts_to_sb_db(&ctx);
281977f7 4355 }
f93818dd 4356
331e7aef
NS
4357 unixctl_server_run(unixctl);
4358 unixctl_server_wait(unixctl);
4359 if (exiting) {
4360 poll_immediate_wake();
ac0630a2 4361 }
331e7aef
NS
4362 ovsdb_idl_loop_commit_and_wait(&ovnnb_idl_loop);
4363 ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);
ac0630a2 4364
331e7aef 4365 poll_block();
485f0696
GS
4366 if (should_service_stop()) {
4367 exiting = true;
4368 }
ac0630a2
RB
4369 }
4370
7b303ff9 4371 unixctl_server_destroy(unixctl);
331e7aef
NS
4372 ovsdb_idl_loop_destroy(&ovnnb_idl_loop);
4373 ovsdb_idl_loop_destroy(&ovnsb_idl_loop);
485f0696 4374 service_stop();
ac0630a2
RB
4375
4376 exit(res);
4377}
7b303ff9
AW
4378
4379static void
4380ovn_northd_exit(struct unixctl_conn *conn, int argc OVS_UNUSED,
4381 const char *argv[] OVS_UNUSED, void *exiting_)
4382{
4383 bool *exiting = exiting_;
4384 *exiting = true;
4385
4386 unixctl_command_reply(conn, NULL);
4387}