]> git.proxmox.com Git - mirror_ovs.git/blame - ovn/northd/ovn-northd.c
json: Move from lib to include/openvswitch.
[mirror_ovs.git] / ovn / northd / ovn-northd.c
CommitLineData
ac0630a2
RB
1/*
2 * Licensed under the Apache License, Version 2.0 (the "License");
3 * you may not use this file except in compliance with the License.
4 * You may obtain a copy of the License at:
5 *
6 * http://www.apache.org/licenses/LICENSE-2.0
7 *
8 * Unless required by applicable law or agreed to in writing, software
9 * distributed under the License is distributed on an "AS IS" BASIS,
10 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 * See the License for the specific language governing permissions and
12 * limitations under the License.
13 */
14
15#include <config.h>
16
17#include <getopt.h>
18#include <stdlib.h>
19#include <stdio.h>
20
21#include "command-line.h"
67d9b930 22#include "daemon.h"
ac0630a2 23#include "dirs.h"
3e8a2ad1 24#include "openvswitch/dynamic-string.h"
ac0630a2 25#include "fatal-signal.h"
4edcdcf4 26#include "hash.h"
ee89ea7b
TW
27#include "openvswitch/hmap.h"
28#include "openvswitch/json.h"
bd39395f 29#include "ovn/lib/lex.h"
e3df8838
BP
30#include "ovn/lib/ovn-nb-idl.h"
31#include "ovn/lib/ovn-sb-idl.h"
218351dd 32#include "ovn/lib/ovn-util.h"
064d7f84 33#include "packets.h"
ac0630a2 34#include "poll-loop.h"
5868eb24 35#include "smap.h"
7a15be69 36#include "sset.h"
ac0630a2
RB
37#include "stream.h"
38#include "stream-ssl.h"
7b303ff9 39#include "unixctl.h"
ac0630a2 40#include "util.h"
4edcdcf4 41#include "uuid.h"
ac0630a2
RB
42#include "openvswitch/vlog.h"
43
2e2762d4 44VLOG_DEFINE_THIS_MODULE(ovn_northd);
ac0630a2 45
7b303ff9
AW
46static unixctl_cb_func ovn_northd_exit;
47
2e2762d4 48struct northd_context {
f93818dd 49 struct ovsdb_idl *ovnnb_idl;
ec78987f 50 struct ovsdb_idl *ovnsb_idl;
f93818dd 51 struct ovsdb_idl_txn *ovnnb_txn;
3c78b3ca 52 struct ovsdb_idl_txn *ovnsb_txn;
f93818dd
RB
53};
54
ac0630a2 55static const char *ovnnb_db;
ec78987f 56static const char *ovnsb_db;
ac0630a2 57
60bdd011
RM
58static const char *default_nb_db(void);
59static const char *default_sb_db(void);
880fcd14
BP
60\f
61/* Pipeline stages. */
ac0630a2 62
880fcd14
BP
63/* The two pipelines in an OVN logical flow table. */
64enum ovn_pipeline {
65 P_IN, /* Ingress pipeline. */
66 P_OUT /* Egress pipeline. */
67};
091e3af9 68
880fcd14
BP
69/* The two purposes for which ovn-northd uses OVN logical datapaths. */
70enum ovn_datapath_type {
71 DP_SWITCH, /* OVN logical switch. */
72 DP_ROUTER /* OVN logical router. */
091e3af9
JP
73};
74
880fcd14
BP
75/* Returns an "enum ovn_stage" built from the arguments.
76 *
77 * (It's better to use ovn_stage_build() for type-safety reasons, but inline
78 * functions can't be used in enums or switch cases.) */
79#define OVN_STAGE_BUILD(DP_TYPE, PIPELINE, TABLE) \
80 (((DP_TYPE) << 9) | ((PIPELINE) << 8) | (TABLE))
81
82/* A stage within an OVN logical switch or router.
091e3af9 83 *
880fcd14
BP
84 * An "enum ovn_stage" indicates whether the stage is part of a logical switch
85 * or router, whether the stage is part of the ingress or egress pipeline, and
86 * the table within that pipeline. The first three components are combined to
685f4dfe 87 * form the stage's full name, e.g. S_SWITCH_IN_PORT_SEC_L2,
880fcd14
BP
88 * S_ROUTER_OUT_DELIVERY. */
89enum ovn_stage {
e0c9e58b
JP
90#define PIPELINE_STAGES \
91 /* Logical switch ingress stages. */ \
685f4dfe
NS
92 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_L2, 0, "ls_in_port_sec_l2") \
93 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_IP, 1, "ls_in_port_sec_ip") \
94 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_ND, 2, "ls_in_port_sec_nd") \
95 PIPELINE_STAGE(SWITCH, IN, PRE_ACL, 3, "ls_in_pre_acl") \
7a15be69
GS
96 PIPELINE_STAGE(SWITCH, IN, PRE_LB, 4, "ls_in_pre_lb") \
97 PIPELINE_STAGE(SWITCH, IN, PRE_STATEFUL, 5, "ls_in_pre_stateful") \
98 PIPELINE_STAGE(SWITCH, IN, ACL, 6, "ls_in_acl") \
99 PIPELINE_STAGE(SWITCH, IN, LB, 7, "ls_in_lb") \
100 PIPELINE_STAGE(SWITCH, IN, STATEFUL, 8, "ls_in_stateful") \
101 PIPELINE_STAGE(SWITCH, IN, ARP_ND_RSP, 9, "ls_in_arp_rsp") \
102 PIPELINE_STAGE(SWITCH, IN, L2_LKUP, 10, "ls_in_l2_lkup") \
e0c9e58b
JP
103 \
104 /* Logical switch egress stages. */ \
7a15be69
GS
105 PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 0, "ls_out_pre_lb") \
106 PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 1, "ls_out_pre_acl") \
107 PIPELINE_STAGE(SWITCH, OUT, PRE_STATEFUL, 2, "ls_out_pre_stateful") \
108 PIPELINE_STAGE(SWITCH, OUT, LB, 3, "ls_out_lb") \
109 PIPELINE_STAGE(SWITCH, OUT, ACL, 4, "ls_out_acl") \
110 PIPELINE_STAGE(SWITCH, OUT, STATEFUL, 5, "ls_out_stateful") \
111 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_IP, 6, "ls_out_port_sec_ip") \
112 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_L2, 7, "ls_out_port_sec_l2") \
e0c9e58b
JP
113 \
114 /* Logical router ingress stages. */ \
115 PIPELINE_STAGE(ROUTER, IN, ADMISSION, 0, "lr_in_admission") \
116 PIPELINE_STAGE(ROUTER, IN, IP_INPUT, 1, "lr_in_ip_input") \
de297547
GS
117 PIPELINE_STAGE(ROUTER, IN, UNSNAT, 2, "lr_in_unsnat") \
118 PIPELINE_STAGE(ROUTER, IN, DNAT, 3, "lr_in_dnat") \
119 PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 4, "lr_in_ip_routing") \
120 PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 5, "lr_in_arp_resolve") \
121 PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 6, "lr_in_arp_request") \
e0c9e58b
JP
122 \
123 /* Logical router egress stages. */ \
de297547
GS
124 PIPELINE_STAGE(ROUTER, OUT, SNAT, 0, "lr_out_snat") \
125 PIPELINE_STAGE(ROUTER, OUT, DELIVERY, 1, "lr_out_delivery")
880fcd14
BP
126
127#define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
128 S_##DP_TYPE##_##PIPELINE##_##STAGE \
129 = OVN_STAGE_BUILD(DP_##DP_TYPE, P_##PIPELINE, TABLE),
130 PIPELINE_STAGES
131#undef PIPELINE_STAGE
091e3af9
JP
132};
133
6bb4a18e
JP
134/* Due to various hard-coded priorities need to implement ACLs, the
135 * northbound database supports a smaller range of ACL priorities than
136 * are available to logical flows. This value is added to an ACL
137 * priority to determine the ACL's logical flow priority. */
138#define OVN_ACL_PRI_OFFSET 1000
139
facf8652 140#define REGBIT_CONNTRACK_DEFRAG "reg0[0]"
fa313a8c 141#define REGBIT_CONNTRACK_COMMIT "reg0[1]"
7a15be69 142#define REGBIT_CONNTRACK_NAT "reg0[2]"
facf8652 143
880fcd14
BP
144/* Returns an "enum ovn_stage" built from the arguments. */
145static enum ovn_stage
146ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline,
147 uint8_t table)
148{
149 return OVN_STAGE_BUILD(dp_type, pipeline, table);
150}
151
152/* Returns the pipeline to which 'stage' belongs. */
153static enum ovn_pipeline
154ovn_stage_get_pipeline(enum ovn_stage stage)
155{
156 return (stage >> 8) & 1;
157}
158
159/* Returns the table to which 'stage' belongs. */
160static uint8_t
161ovn_stage_get_table(enum ovn_stage stage)
162{
163 return stage & 0xff;
164}
165
166/* Returns a string name for 'stage'. */
167static const char *
168ovn_stage_to_str(enum ovn_stage stage)
169{
170 switch (stage) {
171#define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
172 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return NAME;
173 PIPELINE_STAGES
174#undef PIPELINE_STAGE
175 default: return "<unknown>";
176 }
177}
9a9961d2
BP
178
179/* Returns the type of the datapath to which a flow with the given 'stage' may
180 * be added. */
181static enum ovn_datapath_type
182ovn_stage_to_datapath_type(enum ovn_stage stage)
183{
184 switch (stage) {
185#define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
186 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return DP_##DP_TYPE;
187 PIPELINE_STAGES
188#undef PIPELINE_STAGE
189 default: OVS_NOT_REACHED();
190 }
191}
880fcd14 192\f
ac0630a2
RB
193static void
194usage(void)
195{
196 printf("\
197%s: OVN northbound management daemon\n\
198usage: %s [OPTIONS]\n\
199\n\
200Options:\n\
201 --ovnnb-db=DATABASE connect to ovn-nb database at DATABASE\n\
202 (default: %s)\n\
ec78987f 203 --ovnsb-db=DATABASE connect to ovn-sb database at DATABASE\n\
ac0630a2
RB
204 (default: %s)\n\
205 -h, --help display this help message\n\
206 -o, --options list available options\n\
207 -V, --version display version information\n\
60bdd011 208", program_name, program_name, default_nb_db(), default_sb_db());
67d9b930 209 daemon_usage();
ac0630a2
RB
210 vlog_usage();
211 stream_usage("database", true, true, false);
212}
213\f
5868eb24
BP
214struct tnlid_node {
215 struct hmap_node hmap_node;
216 uint32_t tnlid;
217};
218
219static void
220destroy_tnlids(struct hmap *tnlids)
4edcdcf4 221{
4ec3d7c7
DDP
222 struct tnlid_node *node;
223 HMAP_FOR_EACH_POP (node, hmap_node, tnlids) {
5868eb24
BP
224 free(node);
225 }
226 hmap_destroy(tnlids);
227}
228
229static void
230add_tnlid(struct hmap *set, uint32_t tnlid)
231{
232 struct tnlid_node *node = xmalloc(sizeof *node);
233 hmap_insert(set, &node->hmap_node, hash_int(tnlid, 0));
234 node->tnlid = tnlid;
4edcdcf4
RB
235}
236
4edcdcf4 237static bool
5868eb24 238tnlid_in_use(const struct hmap *set, uint32_t tnlid)
4edcdcf4 239{
5868eb24
BP
240 const struct tnlid_node *node;
241 HMAP_FOR_EACH_IN_BUCKET (node, hmap_node, hash_int(tnlid, 0), set) {
242 if (node->tnlid == tnlid) {
243 return true;
244 }
245 }
246 return false;
247}
4edcdcf4 248
5868eb24
BP
249static uint32_t
250allocate_tnlid(struct hmap *set, const char *name, uint32_t max,
251 uint32_t *hint)
252{
253 for (uint32_t tnlid = *hint + 1; tnlid != *hint;
254 tnlid = tnlid + 1 <= max ? tnlid + 1 : 1) {
255 if (!tnlid_in_use(set, tnlid)) {
256 add_tnlid(set, tnlid);
257 *hint = tnlid;
258 return tnlid;
259 }
4edcdcf4
RB
260 }
261
5868eb24
BP
262 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
263 VLOG_WARN_RL(&rl, "all %s tunnel ids exhausted", name);
264 return 0;
265}
266\f
9975d7be
BP
267/* The 'key' comes from nbs->header_.uuid or nbr->header_.uuid or
268 * sb->external_ids:logical-switch. */
5868eb24
BP
269struct ovn_datapath {
270 struct hmap_node key_node; /* Index on 'key'. */
9975d7be 271 struct uuid key; /* (nbs/nbr)->header_.uuid. */
4edcdcf4 272
9975d7be
BP
273 const struct nbrec_logical_switch *nbs; /* May be NULL. */
274 const struct nbrec_logical_router *nbr; /* May be NULL. */
5868eb24 275 const struct sbrec_datapath_binding *sb; /* May be NULL. */
4edcdcf4 276
5868eb24 277 struct ovs_list list; /* In list of similar records. */
4edcdcf4 278
9975d7be 279 /* Logical switch data. */
86e98048
BP
280 struct ovn_port **router_ports;
281 size_t n_router_ports;
9975d7be 282
5868eb24
BP
283 struct hmap port_tnlids;
284 uint32_t port_key_hint;
285
286 bool has_unknown;
287};
288
289static struct ovn_datapath *
290ovn_datapath_create(struct hmap *datapaths, const struct uuid *key,
9975d7be
BP
291 const struct nbrec_logical_switch *nbs,
292 const struct nbrec_logical_router *nbr,
5868eb24
BP
293 const struct sbrec_datapath_binding *sb)
294{
295 struct ovn_datapath *od = xzalloc(sizeof *od);
296 od->key = *key;
297 od->sb = sb;
9975d7be
BP
298 od->nbs = nbs;
299 od->nbr = nbr;
5868eb24
BP
300 hmap_init(&od->port_tnlids);
301 od->port_key_hint = 0;
302 hmap_insert(datapaths, &od->key_node, uuid_hash(&od->key));
303 return od;
304}
305
306static void
307ovn_datapath_destroy(struct hmap *datapaths, struct ovn_datapath *od)
308{
309 if (od) {
310 /* Don't remove od->list. It is used within build_datapaths() as a
311 * private list and once we've exited that function it is not safe to
312 * use it. */
313 hmap_remove(datapaths, &od->key_node);
314 destroy_tnlids(&od->port_tnlids);
86e98048 315 free(od->router_ports);
5868eb24
BP
316 free(od);
317 }
318}
319
9a9961d2
BP
320/* Returns 'od''s datapath type. */
321static enum ovn_datapath_type
322ovn_datapath_get_type(const struct ovn_datapath *od)
323{
324 return od->nbs ? DP_SWITCH : DP_ROUTER;
325}
326
5868eb24
BP
327static struct ovn_datapath *
328ovn_datapath_find(struct hmap *datapaths, const struct uuid *uuid)
329{
330 struct ovn_datapath *od;
331
332 HMAP_FOR_EACH_WITH_HASH (od, key_node, uuid_hash(uuid), datapaths) {
333 if (uuid_equals(uuid, &od->key)) {
334 return od;
335 }
336 }
337 return NULL;
338}
339
340static struct ovn_datapath *
341ovn_datapath_from_sbrec(struct hmap *datapaths,
342 const struct sbrec_datapath_binding *sb)
343{
344 struct uuid key;
345
9975d7be
BP
346 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
347 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
5868eb24
BP
348 return NULL;
349 }
350 return ovn_datapath_find(datapaths, &key);
351}
352
5412db30
J
353static bool
354lrouter_is_enabled(const struct nbrec_logical_router *lrouter)
355{
356 return !lrouter->enabled || *lrouter->enabled;
357}
358
5868eb24
BP
359static void
360join_datapaths(struct northd_context *ctx, struct hmap *datapaths,
361 struct ovs_list *sb_only, struct ovs_list *nb_only,
362 struct ovs_list *both)
363{
364 hmap_init(datapaths);
417e7e66
BW
365 ovs_list_init(sb_only);
366 ovs_list_init(nb_only);
367 ovs_list_init(both);
5868eb24
BP
368
369 const struct sbrec_datapath_binding *sb, *sb_next;
370 SBREC_DATAPATH_BINDING_FOR_EACH_SAFE (sb, sb_next, ctx->ovnsb_idl) {
371 struct uuid key;
9975d7be
BP
372 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
373 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
374 ovsdb_idl_txn_add_comment(
375 ctx->ovnsb_txn,
376 "deleting Datapath_Binding "UUID_FMT" that lacks "
377 "external-ids:logical-switch and "
378 "external-ids:logical-router",
379 UUID_ARGS(&sb->header_.uuid));
5868eb24
BP
380 sbrec_datapath_binding_delete(sb);
381 continue;
382 }
383
384 if (ovn_datapath_find(datapaths, &key)) {
385 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
9975d7be
BP
386 VLOG_INFO_RL(
387 &rl, "deleting Datapath_Binding "UUID_FMT" with "
388 "duplicate external-ids:logical-switch/router "UUID_FMT,
389 UUID_ARGS(&sb->header_.uuid), UUID_ARGS(&key));
5868eb24
BP
390 sbrec_datapath_binding_delete(sb);
391 continue;
392 }
393
394 struct ovn_datapath *od = ovn_datapath_create(datapaths, &key,
9975d7be 395 NULL, NULL, sb);
417e7e66 396 ovs_list_push_back(sb_only, &od->list);
5868eb24
BP
397 }
398
9975d7be
BP
399 const struct nbrec_logical_switch *nbs;
400 NBREC_LOGICAL_SWITCH_FOR_EACH (nbs, ctx->ovnnb_idl) {
5868eb24 401 struct ovn_datapath *od = ovn_datapath_find(datapaths,
9975d7be 402 &nbs->header_.uuid);
5868eb24 403 if (od) {
9975d7be 404 od->nbs = nbs;
417e7e66
BW
405 ovs_list_remove(&od->list);
406 ovs_list_push_back(both, &od->list);
5868eb24 407 } else {
9975d7be
BP
408 od = ovn_datapath_create(datapaths, &nbs->header_.uuid,
409 nbs, NULL, NULL);
417e7e66 410 ovs_list_push_back(nb_only, &od->list);
5868eb24
BP
411 }
412 }
9975d7be
BP
413
414 const struct nbrec_logical_router *nbr;
415 NBREC_LOGICAL_ROUTER_FOR_EACH (nbr, ctx->ovnnb_idl) {
5412db30
J
416 if (!lrouter_is_enabled(nbr)) {
417 continue;
418 }
419
9975d7be
BP
420 struct ovn_datapath *od = ovn_datapath_find(datapaths,
421 &nbr->header_.uuid);
422 if (od) {
423 if (!od->nbs) {
424 od->nbr = nbr;
417e7e66
BW
425 ovs_list_remove(&od->list);
426 ovs_list_push_back(both, &od->list);
9975d7be
BP
427 } else {
428 /* Can't happen! */
429 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
430 VLOG_WARN_RL(&rl,
431 "duplicate UUID "UUID_FMT" in OVN_Northbound",
432 UUID_ARGS(&nbr->header_.uuid));
433 continue;
434 }
435 } else {
436 od = ovn_datapath_create(datapaths, &nbr->header_.uuid,
437 NULL, nbr, NULL);
417e7e66 438 ovs_list_push_back(nb_only, &od->list);
9975d7be 439 }
9975d7be 440 }
5868eb24
BP
441}
442
443static uint32_t
444ovn_datapath_allocate_key(struct hmap *dp_tnlids)
445{
446 static uint32_t hint;
447 return allocate_tnlid(dp_tnlids, "datapath", (1u << 24) - 1, &hint);
448}
449
0bac7164
BP
450/* Updates the southbound Datapath_Binding table so that it contains the
451 * logical switches and routers specified by the northbound database.
452 *
453 * Initializes 'datapaths' to contain a "struct ovn_datapath" for every logical
454 * switch and router. */
5868eb24
BP
455static void
456build_datapaths(struct northd_context *ctx, struct hmap *datapaths)
457{
458 struct ovs_list sb_only, nb_only, both;
459
460 join_datapaths(ctx, datapaths, &sb_only, &nb_only, &both);
461
417e7e66 462 if (!ovs_list_is_empty(&nb_only)) {
5868eb24
BP
463 /* First index the in-use datapath tunnel IDs. */
464 struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
465 struct ovn_datapath *od;
466 LIST_FOR_EACH (od, list, &both) {
467 add_tnlid(&dp_tnlids, od->sb->tunnel_key);
468 }
469
470 /* Add southbound record for each unmatched northbound record. */
471 LIST_FOR_EACH (od, list, &nb_only) {
472 uint16_t tunnel_key = ovn_datapath_allocate_key(&dp_tnlids);
473 if (!tunnel_key) {
474 break;
475 }
476
477 od->sb = sbrec_datapath_binding_insert(ctx->ovnsb_txn);
478
5868eb24 479 char uuid_s[UUID_LEN + 1];
9975d7be
BP
480 sprintf(uuid_s, UUID_FMT, UUID_ARGS(&od->key));
481 const char *key = od->nbs ? "logical-switch" : "logical-router";
482 const struct smap id = SMAP_CONST1(&id, key, uuid_s);
aaf881c6 483 sbrec_datapath_binding_set_external_ids(od->sb, &id);
5868eb24
BP
484
485 sbrec_datapath_binding_set_tunnel_key(od->sb, tunnel_key);
486 }
487 destroy_tnlids(&dp_tnlids);
488 }
489
490 /* Delete southbound records without northbound matches. */
491 struct ovn_datapath *od, *next;
492 LIST_FOR_EACH_SAFE (od, next, list, &sb_only) {
417e7e66 493 ovs_list_remove(&od->list);
5868eb24
BP
494 sbrec_datapath_binding_delete(od->sb);
495 ovn_datapath_destroy(datapaths, od);
496 }
497}
498\f
499struct ovn_port {
500 struct hmap_node key_node; /* Index on 'key'. */
9975d7be
BP
501 char *key; /* nbs->name, nbr->name, sb->logical_port. */
502 char *json_key; /* 'key', quoted for use in JSON. */
5868eb24 503
9975d7be
BP
504 const struct sbrec_port_binding *sb; /* May be NULL. */
505
e93b43d6 506 /* Logical switch port data. */
0ee00741 507 const struct nbrec_logical_switch_port *nbsp; /* May be NULL. */
e93b43d6
JP
508
509 struct lport_addresses *lsp_addrs; /* Logical switch port addresses. */
510 unsigned int n_lsp_addrs;
511
512 struct lport_addresses *ps_addrs; /* Port security addresses. */
513 unsigned int n_ps_addrs;
514
9975d7be 515 /* Logical router port data. */
0ee00741 516 const struct nbrec_logical_router_port *nbrp; /* May be NULL. */
e93b43d6 517
4685e523 518 struct lport_addresses lrp_networks;
c9bdf7bd 519
ad386c3f
BP
520 /* The port's peer:
521 *
522 * - A switch port S of type "router" has a router port R as a peer,
523 * and R in turn has S has its peer.
524 *
525 * - Two connected logical router ports have each other as peer. */
9975d7be 526 struct ovn_port *peer;
5868eb24
BP
527
528 struct ovn_datapath *od;
529
530 struct ovs_list list; /* In list of similar records. */
531};
532
533static struct ovn_port *
534ovn_port_create(struct hmap *ports, const char *key,
0ee00741
HK
535 const struct nbrec_logical_switch_port *nbsp,
536 const struct nbrec_logical_router_port *nbrp,
5868eb24
BP
537 const struct sbrec_port_binding *sb)
538{
539 struct ovn_port *op = xzalloc(sizeof *op);
9975d7be
BP
540
541 struct ds json_key = DS_EMPTY_INITIALIZER;
542 json_string_escape(key, &json_key);
543 op->json_key = ds_steal_cstr(&json_key);
544
545 op->key = xstrdup(key);
5868eb24 546 op->sb = sb;
0ee00741
HK
547 op->nbsp = nbsp;
548 op->nbrp = nbrp;
5868eb24
BP
549 hmap_insert(ports, &op->key_node, hash_string(op->key, 0));
550 return op;
551}
552
553static void
554ovn_port_destroy(struct hmap *ports, struct ovn_port *port)
555{
556 if (port) {
557 /* Don't remove port->list. It is used within build_ports() as a
558 * private list and once we've exited that function it is not safe to
559 * use it. */
560 hmap_remove(ports, &port->key_node);
e93b43d6
JP
561
562 for (int i = 0; i < port->n_lsp_addrs; i++) {
563 destroy_lport_addresses(&port->lsp_addrs[i]);
564 }
565 free(port->lsp_addrs);
566
567 for (int i = 0; i < port->n_ps_addrs; i++) {
568 destroy_lport_addresses(&port->ps_addrs[i]);
569 }
570 free(port->ps_addrs);
571
4685e523 572 destroy_lport_addresses(&port->lrp_networks);
9975d7be
BP
573 free(port->json_key);
574 free(port->key);
5868eb24
BP
575 free(port);
576 }
577}
578
579static struct ovn_port *
580ovn_port_find(struct hmap *ports, const char *name)
581{
582 struct ovn_port *op;
583
584 HMAP_FOR_EACH_WITH_HASH (op, key_node, hash_string(name, 0), ports) {
585 if (!strcmp(op->key, name)) {
586 return op;
587 }
588 }
589 return NULL;
590}
591
592static uint32_t
593ovn_port_allocate_key(struct ovn_datapath *od)
594{
595 return allocate_tnlid(&od->port_tnlids, "port",
596 (1u << 15) - 1, &od->port_key_hint);
597}
598
599static void
600join_logical_ports(struct northd_context *ctx,
601 struct hmap *datapaths, struct hmap *ports,
602 struct ovs_list *sb_only, struct ovs_list *nb_only,
603 struct ovs_list *both)
604{
605 hmap_init(ports);
417e7e66
BW
606 ovs_list_init(sb_only);
607 ovs_list_init(nb_only);
608 ovs_list_init(both);
5868eb24
BP
609
610 const struct sbrec_port_binding *sb;
611 SBREC_PORT_BINDING_FOR_EACH (sb, ctx->ovnsb_idl) {
612 struct ovn_port *op = ovn_port_create(ports, sb->logical_port,
9975d7be 613 NULL, NULL, sb);
417e7e66 614 ovs_list_push_back(sb_only, &op->list);
5868eb24
BP
615 }
616
617 struct ovn_datapath *od;
618 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
619 if (od->nbs) {
620 for (size_t i = 0; i < od->nbs->n_ports; i++) {
0ee00741
HK
621 const struct nbrec_logical_switch_port *nbsp
622 = od->nbs->ports[i];
623 struct ovn_port *op = ovn_port_find(ports, nbsp->name);
9975d7be 624 if (op) {
0ee00741 625 if (op->nbsp || op->nbrp) {
9975d7be
BP
626 static struct vlog_rate_limit rl
627 = VLOG_RATE_LIMIT_INIT(5, 1);
628 VLOG_WARN_RL(&rl, "duplicate logical port %s",
0ee00741 629 nbsp->name);
9975d7be
BP
630 continue;
631 }
0ee00741 632 op->nbsp = nbsp;
417e7e66
BW
633 ovs_list_remove(&op->list);
634 ovs_list_push_back(both, &op->list);
e93b43d6
JP
635
636 /* This port exists due to a SB binding, but should
637 * not have been initialized fully. */
638 ovs_assert(!op->n_lsp_addrs && !op->n_ps_addrs);
9975d7be 639 } else {
0ee00741 640 op = ovn_port_create(ports, nbsp->name, nbsp, NULL, NULL);
417e7e66 641 ovs_list_push_back(nb_only, &op->list);
9975d7be
BP
642 }
643
e93b43d6 644 op->lsp_addrs
0ee00741
HK
645 = xmalloc(sizeof *op->lsp_addrs * nbsp->n_addresses);
646 for (size_t j = 0; j < nbsp->n_addresses; j++) {
647 if (!strcmp(nbsp->addresses[j], "unknown")) {
e93b43d6
JP
648 continue;
649 }
0ee00741 650 if (!extract_lsp_addresses(nbsp->addresses[j],
e93b43d6
JP
651 &op->lsp_addrs[op->n_lsp_addrs])) {
652 static struct vlog_rate_limit rl
653 = VLOG_RATE_LIMIT_INIT(1, 1);
654 VLOG_INFO_RL(&rl, "invalid syntax '%s' in logical "
655 "switch port addresses. No MAC "
656 "address found",
0ee00741 657 op->nbsp->addresses[j]);
e93b43d6
JP
658 continue;
659 }
660 op->n_lsp_addrs++;
661 }
662
663 op->ps_addrs
0ee00741
HK
664 = xmalloc(sizeof *op->ps_addrs * nbsp->n_port_security);
665 for (size_t j = 0; j < nbsp->n_port_security; j++) {
666 if (!extract_lsp_addresses(nbsp->port_security[j],
e93b43d6
JP
667 &op->ps_addrs[op->n_ps_addrs])) {
668 static struct vlog_rate_limit rl
669 = VLOG_RATE_LIMIT_INIT(1, 1);
670 VLOG_INFO_RL(&rl, "invalid syntax '%s' in port "
671 "security. No MAC address found",
0ee00741 672 op->nbsp->port_security[j]);
e93b43d6
JP
673 continue;
674 }
675 op->n_ps_addrs++;
676 }
677
9975d7be
BP
678 op->od = od;
679 }
680 } else {
681 for (size_t i = 0; i < od->nbr->n_ports; i++) {
0ee00741
HK
682 const struct nbrec_logical_router_port *nbrp
683 = od->nbr->ports[i];
9975d7be 684
4685e523 685 struct lport_addresses lrp_networks;
0ee00741 686 if (!extract_lrp_networks(nbrp, &lrp_networks)) {
9975d7be
BP
687 static struct vlog_rate_limit rl
688 = VLOG_RATE_LIMIT_INIT(5, 1);
0ee00741 689 VLOG_WARN_RL(&rl, "bad 'mac' %s", nbrp->mac);
9975d7be
BP
690 continue;
691 }
692
4685e523 693 if (!lrp_networks.n_ipv4_addrs && !lrp_networks.n_ipv6_addrs) {
9975d7be
BP
694 continue;
695 }
696
0ee00741 697 struct ovn_port *op = ovn_port_find(ports, nbrp->name);
9975d7be 698 if (op) {
0ee00741 699 if (op->nbsp || op->nbrp) {
9975d7be
BP
700 static struct vlog_rate_limit rl
701 = VLOG_RATE_LIMIT_INIT(5, 1);
702 VLOG_WARN_RL(&rl, "duplicate logical router port %s",
0ee00741 703 nbrp->name);
9975d7be
BP
704 continue;
705 }
0ee00741 706 op->nbrp = nbrp;
417e7e66
BW
707 ovs_list_remove(&op->list);
708 ovs_list_push_back(both, &op->list);
4685e523
JP
709
710 /* This port exists but should not have been
711 * initialized fully. */
712 ovs_assert(!op->lrp_networks.n_ipv4_addrs
713 && !op->lrp_networks.n_ipv6_addrs);
9975d7be 714 } else {
0ee00741 715 op = ovn_port_create(ports, nbrp->name, NULL, nbrp, NULL);
417e7e66 716 ovs_list_push_back(nb_only, &op->list);
9975d7be
BP
717 }
718
4685e523 719 op->lrp_networks = lrp_networks;
9975d7be 720 op->od = od;
5868eb24 721 }
9975d7be
BP
722 }
723 }
724
725 /* Connect logical router ports, and logical switch ports of type "router",
726 * to their peers. */
727 struct ovn_port *op;
728 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741
HK
729 if (op->nbsp && !strcmp(op->nbsp->type, "router")) {
730 const char *peer_name = smap_get(&op->nbsp->options, "router-port");
9975d7be
BP
731 if (!peer_name) {
732 continue;
733 }
734
735 struct ovn_port *peer = ovn_port_find(ports, peer_name);
0ee00741 736 if (!peer || !peer->nbrp) {
9975d7be
BP
737 continue;
738 }
739
740 peer->peer = op;
741 op->peer = peer;
86e98048
BP
742 op->od->router_ports = xrealloc(
743 op->od->router_ports,
744 sizeof *op->od->router_ports * (op->od->n_router_ports + 1));
745 op->od->router_ports[op->od->n_router_ports++] = op;
0ee00741 746 } else if (op->nbrp && op->nbrp->peer) {
ad386c3f
BP
747 struct ovn_port *peer = ovn_port_find(ports, op->nbrp->peer);
748 if (peer) {
749 if (peer->nbrp) {
750 op->peer = peer;
751 } else {
752 /* An ovn_port for a switch port of type "router" does have
753 * a router port as its peer (see the case above for
754 * "router" ports), but this is set via options:router-port
755 * in Logical_Switch_Port and does not involve the
756 * Logical_Router_Port's 'peer' column. */
757 static struct vlog_rate_limit rl =
758 VLOG_RATE_LIMIT_INIT(5, 1);
759 VLOG_WARN_RL(&rl, "Bad configuration: The peer of router "
760 "port %s is a switch port", op->key);
761 }
762 }
5868eb24
BP
763 }
764 }
765}
766
767static void
768ovn_port_update_sbrec(const struct ovn_port *op)
769{
770 sbrec_port_binding_set_datapath(op->sb, op->od->sb);
0ee00741 771 if (op->nbrp) {
c1645003
GS
772 /* If the router is for l3 gateway, it resides on a chassis
773 * and its port type is "gateway". */
774 const char *chassis = smap_get(&op->od->nbr->options, "chassis");
775 if (chassis) {
776 sbrec_port_binding_set_type(op->sb, "gateway");
777 } else {
778 sbrec_port_binding_set_type(op->sb, "patch");
779 }
9975d7be
BP
780
781 const char *peer = op->peer ? op->peer->key : "<error>";
c1645003
GS
782 struct smap new;
783 smap_init(&new);
784 smap_add(&new, "peer", peer);
785 if (chassis) {
786 smap_add(&new, "gateway-chassis", chassis);
787 }
788 sbrec_port_binding_set_options(op->sb, &new);
789 smap_destroy(&new);
9975d7be
BP
790
791 sbrec_port_binding_set_parent_port(op->sb, NULL);
792 sbrec_port_binding_set_tag(op->sb, NULL, 0);
793 sbrec_port_binding_set_mac(op->sb, NULL, 0);
794 } else {
0ee00741
HK
795 if (strcmp(op->nbsp->type, "router")) {
796 sbrec_port_binding_set_type(op->sb, op->nbsp->type);
797 sbrec_port_binding_set_options(op->sb, &op->nbsp->options);
9975d7be 798 } else {
c1645003
GS
799 const char *chassis = NULL;
800 if (op->peer && op->peer->od && op->peer->od->nbr) {
801 chassis = smap_get(&op->peer->od->nbr->options, "chassis");
802 }
803
804 /* A switch port connected to a gateway router is also of
805 * type "gateway". */
806 if (chassis) {
807 sbrec_port_binding_set_type(op->sb, "gateway");
808 } else {
809 sbrec_port_binding_set_type(op->sb, "patch");
810 }
9975d7be 811
0ee00741 812 const char *router_port = smap_get(&op->nbsp->options,
9975d7be
BP
813 "router-port");
814 if (!router_port) {
815 router_port = "<error>";
816 }
c1645003
GS
817 struct smap new;
818 smap_init(&new);
819 smap_add(&new, "peer", router_port);
820 if (chassis) {
821 smap_add(&new, "gateway-chassis", chassis);
822 }
823 sbrec_port_binding_set_options(op->sb, &new);
824 smap_destroy(&new);
9975d7be 825 }
0ee00741
HK
826 sbrec_port_binding_set_parent_port(op->sb, op->nbsp->parent_name);
827 sbrec_port_binding_set_tag(op->sb, op->nbsp->tag, op->nbsp->n_tag);
828 sbrec_port_binding_set_mac(op->sb, (const char **) op->nbsp->addresses,
829 op->nbsp->n_addresses);
9975d7be 830 }
5868eb24
BP
831}
832
0bac7164 833/* Updates the southbound Port_Binding table so that it contains the logical
80f408f4 834 * switch ports specified by the northbound database.
0bac7164
BP
835 *
836 * Initializes 'ports' to contain a "struct ovn_port" for every logical port,
837 * using the "struct ovn_datapath"s in 'datapaths' to look up logical
838 * datapaths. */
5868eb24
BP
839static void
840build_ports(struct northd_context *ctx, struct hmap *datapaths,
841 struct hmap *ports)
842{
843 struct ovs_list sb_only, nb_only, both;
844
845 join_logical_ports(ctx, datapaths, ports, &sb_only, &nb_only, &both);
846
847 /* For logical ports that are in both databases, update the southbound
848 * record based on northbound data. Also index the in-use tunnel_keys. */
849 struct ovn_port *op, *next;
850 LIST_FOR_EACH_SAFE (op, next, list, &both) {
851 ovn_port_update_sbrec(op);
852
853 add_tnlid(&op->od->port_tnlids, op->sb->tunnel_key);
854 if (op->sb->tunnel_key > op->od->port_key_hint) {
855 op->od->port_key_hint = op->sb->tunnel_key;
856 }
857 }
858
859 /* Add southbound record for each unmatched northbound record. */
860 LIST_FOR_EACH_SAFE (op, next, list, &nb_only) {
861 uint16_t tunnel_key = ovn_port_allocate_key(op->od);
862 if (!tunnel_key) {
863 continue;
864 }
865
866 op->sb = sbrec_port_binding_insert(ctx->ovnsb_txn);
867 ovn_port_update_sbrec(op);
868
869 sbrec_port_binding_set_logical_port(op->sb, op->key);
870 sbrec_port_binding_set_tunnel_key(op->sb, tunnel_key);
871 }
872
873 /* Delete southbound records without northbound matches. */
874 LIST_FOR_EACH_SAFE(op, next, list, &sb_only) {
417e7e66 875 ovs_list_remove(&op->list);
5868eb24
BP
876 sbrec_port_binding_delete(op->sb);
877 ovn_port_destroy(ports, op);
878 }
879}
880\f
881#define OVN_MIN_MULTICAST 32768
882#define OVN_MAX_MULTICAST 65535
883
884struct multicast_group {
885 const char *name;
886 uint16_t key; /* OVN_MIN_MULTICAST...OVN_MAX_MULTICAST. */
887};
888
889#define MC_FLOOD "_MC_flood"
890static const struct multicast_group mc_flood = { MC_FLOOD, 65535 };
891
892#define MC_UNKNOWN "_MC_unknown"
893static const struct multicast_group mc_unknown = { MC_UNKNOWN, 65534 };
894
895static bool
896multicast_group_equal(const struct multicast_group *a,
897 const struct multicast_group *b)
898{
899 return !strcmp(a->name, b->name) && a->key == b->key;
900}
901
902/* Multicast group entry. */
903struct ovn_multicast {
904 struct hmap_node hmap_node; /* Index on 'datapath' and 'key'. */
905 struct ovn_datapath *datapath;
906 const struct multicast_group *group;
907
908 struct ovn_port **ports;
909 size_t n_ports, allocated_ports;
910};
911
912static uint32_t
913ovn_multicast_hash(const struct ovn_datapath *datapath,
914 const struct multicast_group *group)
915{
916 return hash_pointer(datapath, group->key);
917}
918
919static struct ovn_multicast *
920ovn_multicast_find(struct hmap *mcgroups, struct ovn_datapath *datapath,
921 const struct multicast_group *group)
922{
923 struct ovn_multicast *mc;
924
925 HMAP_FOR_EACH_WITH_HASH (mc, hmap_node,
926 ovn_multicast_hash(datapath, group), mcgroups) {
927 if (mc->datapath == datapath
928 && multicast_group_equal(mc->group, group)) {
929 return mc;
4edcdcf4
RB
930 }
931 }
5868eb24
BP
932 return NULL;
933}
934
935static void
936ovn_multicast_add(struct hmap *mcgroups, const struct multicast_group *group,
937 struct ovn_port *port)
938{
939 struct ovn_datapath *od = port->od;
940 struct ovn_multicast *mc = ovn_multicast_find(mcgroups, od, group);
941 if (!mc) {
942 mc = xmalloc(sizeof *mc);
943 hmap_insert(mcgroups, &mc->hmap_node, ovn_multicast_hash(od, group));
944 mc->datapath = od;
945 mc->group = group;
946 mc->n_ports = 0;
947 mc->allocated_ports = 4;
948 mc->ports = xmalloc(mc->allocated_ports * sizeof *mc->ports);
949 }
950 if (mc->n_ports >= mc->allocated_ports) {
951 mc->ports = x2nrealloc(mc->ports, &mc->allocated_ports,
952 sizeof *mc->ports);
953 }
954 mc->ports[mc->n_ports++] = port;
955}
4edcdcf4 956
5868eb24
BP
957static void
958ovn_multicast_destroy(struct hmap *mcgroups, struct ovn_multicast *mc)
959{
960 if (mc) {
961 hmap_remove(mcgroups, &mc->hmap_node);
962 free(mc->ports);
963 free(mc);
964 }
965}
4edcdcf4 966
5868eb24
BP
967static void
968ovn_multicast_update_sbrec(const struct ovn_multicast *mc,
969 const struct sbrec_multicast_group *sb)
970{
971 struct sbrec_port_binding **ports = xmalloc(mc->n_ports * sizeof *ports);
972 for (size_t i = 0; i < mc->n_ports; i++) {
973 ports[i] = CONST_CAST(struct sbrec_port_binding *, mc->ports[i]->sb);
974 }
975 sbrec_multicast_group_set_ports(sb, ports, mc->n_ports);
976 free(ports);
4edcdcf4 977}
bd39395f 978\f
48605550 979/* Logical flow generation.
bd39395f 980 *
48605550 981 * This code generates the Logical_Flow table in the southbound database, as a
bd39395f
BP
982 * function of most of the northbound database.
983 */
984
5868eb24
BP
985struct ovn_lflow {
986 struct hmap_node hmap_node;
bd39395f 987
5868eb24 988 struct ovn_datapath *od;
880fcd14 989 enum ovn_stage stage;
5868eb24
BP
990 uint16_t priority;
991 char *match;
992 char *actions;
bd39395f
BP
993};
994
995static size_t
5868eb24 996ovn_lflow_hash(const struct ovn_lflow *lflow)
bd39395f 997{
5868eb24 998 size_t hash = uuid_hash(&lflow->od->key);
880fcd14 999 hash = hash_2words((lflow->stage << 16) | lflow->priority, hash);
5868eb24
BP
1000 hash = hash_string(lflow->match, hash);
1001 return hash_string(lflow->actions, hash);
bd39395f
BP
1002}
1003
5868eb24
BP
1004static bool
1005ovn_lflow_equal(const struct ovn_lflow *a, const struct ovn_lflow *b)
1006{
1007 return (a->od == b->od
880fcd14 1008 && a->stage == b->stage
5868eb24
BP
1009 && a->priority == b->priority
1010 && !strcmp(a->match, b->match)
1011 && !strcmp(a->actions, b->actions));
1012}
1013
1014static void
1015ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
880fcd14 1016 enum ovn_stage stage, uint16_t priority,
5868eb24 1017 char *match, char *actions)
bd39395f 1018{
5868eb24 1019 lflow->od = od;
880fcd14 1020 lflow->stage = stage;
5868eb24
BP
1021 lflow->priority = priority;
1022 lflow->match = match;
1023 lflow->actions = actions;
bd39395f
BP
1024}
1025
48605550 1026/* Adds a row with the specified contents to the Logical_Flow table. */
bd39395f 1027static void
5868eb24 1028ovn_lflow_add(struct hmap *lflow_map, struct ovn_datapath *od,
880fcd14 1029 enum ovn_stage stage, uint16_t priority,
5868eb24
BP
1030 const char *match, const char *actions)
1031{
9a9961d2
BP
1032 ovs_assert(ovn_stage_to_datapath_type(stage) == ovn_datapath_get_type(od));
1033
5868eb24 1034 struct ovn_lflow *lflow = xmalloc(sizeof *lflow);
880fcd14 1035 ovn_lflow_init(lflow, od, stage, priority,
5868eb24
BP
1036 xstrdup(match), xstrdup(actions));
1037 hmap_insert(lflow_map, &lflow->hmap_node, ovn_lflow_hash(lflow));
1038}
1039
1040static struct ovn_lflow *
1041ovn_lflow_find(struct hmap *lflows, struct ovn_datapath *od,
880fcd14 1042 enum ovn_stage stage, uint16_t priority,
5868eb24
BP
1043 const char *match, const char *actions)
1044{
1045 struct ovn_lflow target;
880fcd14 1046 ovn_lflow_init(&target, od, stage, priority,
5868eb24
BP
1047 CONST_CAST(char *, match), CONST_CAST(char *, actions));
1048
1049 struct ovn_lflow *lflow;
1050 HMAP_FOR_EACH_WITH_HASH (lflow, hmap_node, ovn_lflow_hash(&target),
1051 lflows) {
1052 if (ovn_lflow_equal(lflow, &target)) {
1053 return lflow;
bd39395f
BP
1054 }
1055 }
5868eb24
BP
1056 return NULL;
1057}
bd39395f 1058
5868eb24
BP
1059static void
1060ovn_lflow_destroy(struct hmap *lflows, struct ovn_lflow *lflow)
1061{
1062 if (lflow) {
1063 hmap_remove(lflows, &lflow->hmap_node);
1064 free(lflow->match);
1065 free(lflow->actions);
1066 free(lflow);
1067 }
bd39395f
BP
1068}
1069
bd39395f 1070/* Appends port security constraints on L2 address field 'eth_addr_field'
e93b43d6
JP
1071 * (e.g. "eth.src" or "eth.dst") to 'match'. 'ps_addrs', with 'n_ps_addrs'
1072 * elements, is the collection of port_security constraints from an
1073 * OVN_NB Logical_Switch_Port row generated by extract_lsp_addresses(). */
bd39395f 1074static void
685f4dfe 1075build_port_security_l2(const char *eth_addr_field,
e93b43d6
JP
1076 struct lport_addresses *ps_addrs,
1077 unsigned int n_ps_addrs,
685f4dfe 1078 struct ds *match)
bd39395f 1079{
e93b43d6
JP
1080 if (!n_ps_addrs) {
1081 return;
1082 }
bd39395f 1083
e93b43d6 1084 ds_put_format(match, " && %s == {", eth_addr_field);
f7cb14cd 1085
e93b43d6
JP
1086 for (size_t i = 0; i < n_ps_addrs; i++) {
1087 ds_put_format(match, "%s ", ps_addrs[i].ea_s);
bd39395f 1088 }
f7cb14cd 1089 ds_chomp(match, ' ');
bd39395f 1090 ds_put_cstr(match, "}");
bd39395f
BP
1091}
1092
685f4dfe
NS
1093static void
1094build_port_security_ipv6_nd_flow(
1095 struct ds *match, struct eth_addr ea, struct ipv6_netaddr *ipv6_addrs,
1096 int n_ipv6_addrs)
1097{
1098 ds_put_format(match, " && ip6 && nd && ((nd.sll == "ETH_ADDR_FMT" || "
1099 "nd.sll == "ETH_ADDR_FMT") || ((nd.tll == "ETH_ADDR_FMT" || "
1100 "nd.tll == "ETH_ADDR_FMT")", ETH_ADDR_ARGS(eth_addr_zero),
1101 ETH_ADDR_ARGS(ea), ETH_ADDR_ARGS(eth_addr_zero),
1102 ETH_ADDR_ARGS(ea));
1103 if (!n_ipv6_addrs) {
1104 ds_put_cstr(match, "))");
1105 return;
1106 }
1107
1108 char ip6_str[INET6_ADDRSTRLEN + 1];
1109 struct in6_addr lla;
1110 in6_generate_lla(ea, &lla);
1111 memset(ip6_str, 0, sizeof(ip6_str));
1112 ipv6_string_mapped(ip6_str, &lla);
1113 ds_put_format(match, " && (nd.target == %s", ip6_str);
1114
1115 for(int i = 0; i < n_ipv6_addrs; i++) {
1116 memset(ip6_str, 0, sizeof(ip6_str));
1117 ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr);
1118 ds_put_format(match, " || nd.target == %s", ip6_str);
1119 }
1120
1121 ds_put_format(match, ")))");
1122}
1123
1124static void
1125build_port_security_ipv6_flow(
1126 enum ovn_pipeline pipeline, struct ds *match, struct eth_addr ea,
1127 struct ipv6_netaddr *ipv6_addrs, int n_ipv6_addrs)
1128{
1129 char ip6_str[INET6_ADDRSTRLEN + 1];
1130
1131 ds_put_format(match, " && %s == {",
1132 pipeline == P_IN ? "ip6.src" : "ip6.dst");
1133
1134 /* Allow link-local address. */
1135 struct in6_addr lla;
1136 in6_generate_lla(ea, &lla);
1137 ipv6_string_mapped(ip6_str, &lla);
1138 ds_put_format(match, "%s, ", ip6_str);
1139
9e687b23
DL
1140 /* Allow ip6.dst=ff00::/8 for multicast packets */
1141 if (pipeline == P_OUT) {
1142 ds_put_cstr(match, "ff00::/8, ");
1143 }
685f4dfe
NS
1144 for(int i = 0; i < n_ipv6_addrs; i++) {
1145 ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr);
9e687b23 1146 ds_put_format(match, "%s, ", ip6_str);
685f4dfe 1147 }
9e687b23
DL
1148 /* Replace ", " by "}". */
1149 ds_chomp(match, ' ');
1150 ds_chomp(match, ',');
685f4dfe
NS
1151 ds_put_cstr(match, "}");
1152}
1153
1154/**
1155 * Build port security constraints on ARP and IPv6 ND fields
1156 * and add logical flows to S_SWITCH_IN_PORT_SEC_ND stage.
1157 *
1158 * For each port security of the logical port, following
1159 * logical flows are added
1160 * - If the port security has no IP (both IPv4 and IPv6) or
1161 * if it has IPv4 address(es)
1162 * - Priority 90 flow to allow ARP packets for known MAC addresses
1163 * in the eth.src and arp.spa fields. If the port security
1164 * has IPv4 addresses, allow known IPv4 addresses in the arp.tpa field.
1165 *
1166 * - If the port security has no IP (both IPv4 and IPv6) or
1167 * if it has IPv6 address(es)
1168 * - Priority 90 flow to allow IPv6 ND packets for known MAC addresses
1169 * in the eth.src and nd.sll/nd.tll fields. If the port security
1170 * has IPv6 addresses, allow known IPv6 addresses in the nd.target field
1171 * for IPv6 Neighbor Advertisement packet.
1172 *
1173 * - Priority 80 flow to drop ARP and IPv6 ND packets.
1174 */
1175static void
1176build_port_security_nd(struct ovn_port *op, struct hmap *lflows)
1177{
e93b43d6
JP
1178 struct ds match = DS_EMPTY_INITIALIZER;
1179
1180 for (size_t i = 0; i < op->n_ps_addrs; i++) {
1181 struct lport_addresses *ps = &op->ps_addrs[i];
685f4dfe 1182
e93b43d6 1183 bool no_ip = !(ps->n_ipv4_addrs || ps->n_ipv6_addrs);
685f4dfe 1184
e93b43d6
JP
1185 ds_clear(&match);
1186 if (ps->n_ipv4_addrs || no_ip) {
1187 ds_put_format(&match,
1188 "inport == %s && eth.src == %s && arp.sha == %s",
1189 op->json_key, ps->ea_s, ps->ea_s);
685f4dfe 1190
e93b43d6
JP
1191 if (ps->n_ipv4_addrs) {
1192 ds_put_cstr(&match, " && arp.spa == {");
f95523c0 1193 for (size_t j = 0; j < ps->n_ipv4_addrs; j++) {
7d9d86ad
NS
1194 /* When the netmask is applied, if the host portion is
1195 * non-zero, the host can only use the specified
1196 * address in the arp.spa. If zero, the host is allowed
1197 * to use any address in the subnet. */
f95523c0
JP
1198 if (ps->ipv4_addrs[j].plen == 32
1199 || ps->ipv4_addrs[j].addr & ~ps->ipv4_addrs[j].mask) {
1200 ds_put_cstr(&match, ps->ipv4_addrs[j].addr_s);
7d9d86ad 1201 } else {
e93b43d6 1202 ds_put_format(&match, "%s/%d",
f95523c0
JP
1203 ps->ipv4_addrs[j].network_s,
1204 ps->ipv4_addrs[j].plen);
7d9d86ad 1205 }
e93b43d6 1206 ds_put_cstr(&match, ", ");
685f4dfe
NS
1207 }
1208 ds_chomp(&match, ' ');
e93b43d6
JP
1209 ds_chomp(&match, ',');
1210 ds_put_cstr(&match, "}");
685f4dfe
NS
1211 }
1212 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90,
1213 ds_cstr(&match), "next;");
685f4dfe
NS
1214 }
1215
e93b43d6
JP
1216 if (ps->n_ipv6_addrs || no_ip) {
1217 ds_clear(&match);
1218 ds_put_format(&match, "inport == %s && eth.src == %s",
1219 op->json_key, ps->ea_s);
1220 build_port_security_ipv6_nd_flow(&match, ps->ea, ps->ipv6_addrs,
1221 ps->n_ipv6_addrs);
685f4dfe
NS
1222 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90,
1223 ds_cstr(&match), "next;");
685f4dfe 1224 }
685f4dfe
NS
1225 }
1226
e93b43d6
JP
1227 ds_clear(&match);
1228 ds_put_format(&match, "inport == %s && (arp || nd)", op->json_key);
685f4dfe 1229 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 80,
e93b43d6
JP
1230 ds_cstr(&match), "drop;");
1231 ds_destroy(&match);
685f4dfe
NS
1232}
1233
1234/**
1235 * Build port security constraints on IPv4 and IPv6 src and dst fields
1236 * and add logical flows to S_SWITCH_(IN/OUT)_PORT_SEC_IP stage.
1237 *
1238 * For each port security of the logical port, following
1239 * logical flows are added
1240 * - If the port security has IPv4 addresses,
1241 * - Priority 90 flow to allow IPv4 packets for known IPv4 addresses
1242 *
1243 * - If the port security has IPv6 addresses,
1244 * - Priority 90 flow to allow IPv6 packets for known IPv6 addresses
1245 *
1246 * - If the port security has IPv4 addresses or IPv6 addresses or both
1247 * - Priority 80 flow to drop all IPv4 and IPv6 traffic
1248 */
1249static void
1250build_port_security_ip(enum ovn_pipeline pipeline, struct ovn_port *op,
1251 struct hmap *lflows)
1252{
1253 char *port_direction;
1254 enum ovn_stage stage;
1255 if (pipeline == P_IN) {
1256 port_direction = "inport";
1257 stage = S_SWITCH_IN_PORT_SEC_IP;
1258 } else {
1259 port_direction = "outport";
1260 stage = S_SWITCH_OUT_PORT_SEC_IP;
1261 }
1262
e93b43d6
JP
1263 for (size_t i = 0; i < op->n_ps_addrs; i++) {
1264 struct lport_addresses *ps = &op->ps_addrs[i];
685f4dfe 1265
e93b43d6 1266 if (!(ps->n_ipv4_addrs || ps->n_ipv6_addrs)) {
685f4dfe
NS
1267 continue;
1268 }
1269
e93b43d6 1270 if (ps->n_ipv4_addrs) {
685f4dfe
NS
1271 struct ds match = DS_EMPTY_INITIALIZER;
1272 if (pipeline == P_IN) {
9e687b23
DL
1273 /* Permit use of the unspecified address for DHCP discovery */
1274 struct ds dhcp_match = DS_EMPTY_INITIALIZER;
1275 ds_put_format(&dhcp_match, "inport == %s"
e93b43d6 1276 " && eth.src == %s"
9e687b23
DL
1277 " && ip4.src == 0.0.0.0"
1278 " && ip4.dst == 255.255.255.255"
e93b43d6
JP
1279 " && udp.src == 68 && udp.dst == 67",
1280 op->json_key, ps->ea_s);
9e687b23
DL
1281 ovn_lflow_add(lflows, op->od, stage, 90,
1282 ds_cstr(&dhcp_match), "next;");
1283 ds_destroy(&dhcp_match);
e93b43d6 1284 ds_put_format(&match, "inport == %s && eth.src == %s"
9e687b23 1285 " && ip4.src == {", op->json_key,
e93b43d6 1286 ps->ea_s);
685f4dfe 1287 } else {
e93b43d6 1288 ds_put_format(&match, "outport == %s && eth.dst == %s"
685f4dfe 1289 " && ip4.dst == {255.255.255.255, 224.0.0.0/4, ",
e93b43d6 1290 op->json_key, ps->ea_s);
685f4dfe
NS
1291 }
1292
f95523c0
JP
1293 for (int j = 0; j < ps->n_ipv4_addrs; j++) {
1294 ovs_be32 mask = ps->ipv4_addrs[j].mask;
7d9d86ad
NS
1295 /* When the netmask is applied, if the host portion is
1296 * non-zero, the host can only use the specified
1297 * address. If zero, the host is allowed to use any
1298 * address in the subnet.
e93b43d6 1299 */
f95523c0
JP
1300 if (ps->ipv4_addrs[j].plen == 32
1301 || ps->ipv4_addrs[j].addr & ~mask) {
1302 ds_put_format(&match, "%s", ps->ipv4_addrs[j].addr_s);
1303 if (pipeline == P_OUT && ps->ipv4_addrs[j].plen != 32) {
e93b43d6
JP
1304 /* Host is also allowed to receive packets to the
1305 * broadcast address in the specified subnet. */
1306 ds_put_format(&match, ", %s",
f95523c0 1307 ps->ipv4_addrs[j].bcast_s);
7d9d86ad
NS
1308 }
1309 } else {
1310 /* host portion is zero */
f95523c0
JP
1311 ds_put_format(&match, "%s/%d", ps->ipv4_addrs[j].network_s,
1312 ps->ipv4_addrs[j].plen);
7d9d86ad
NS
1313 }
1314 ds_put_cstr(&match, ", ");
685f4dfe
NS
1315 }
1316
1317 /* Replace ", " by "}". */
1318 ds_chomp(&match, ' ');
1319 ds_chomp(&match, ',');
1320 ds_put_cstr(&match, "}");
1321 ovn_lflow_add(lflows, op->od, stage, 90, ds_cstr(&match), "next;");
1322 ds_destroy(&match);
685f4dfe
NS
1323 }
1324
e93b43d6 1325 if (ps->n_ipv6_addrs) {
685f4dfe 1326 struct ds match = DS_EMPTY_INITIALIZER;
9e687b23
DL
1327 if (pipeline == P_IN) {
1328 /* Permit use of unspecified address for duplicate address
1329 * detection */
1330 struct ds dad_match = DS_EMPTY_INITIALIZER;
1331 ds_put_format(&dad_match, "inport == %s"
e93b43d6 1332 " && eth.src == %s"
9e687b23
DL
1333 " && ip6.src == ::"
1334 " && ip6.dst == ff02::/16"
1335 " && icmp6.type == {131, 135, 143}", op->json_key,
e93b43d6 1336 ps->ea_s);
9e687b23
DL
1337 ovn_lflow_add(lflows, op->od, stage, 90,
1338 ds_cstr(&dad_match), "next;");
1339 ds_destroy(&dad_match);
1340 }
e93b43d6 1341 ds_put_format(&match, "%s == %s && %s == %s",
685f4dfe 1342 port_direction, op->json_key,
e93b43d6
JP
1343 pipeline == P_IN ? "eth.src" : "eth.dst", ps->ea_s);
1344 build_port_security_ipv6_flow(pipeline, &match, ps->ea,
1345 ps->ipv6_addrs, ps->n_ipv6_addrs);
685f4dfe
NS
1346 ovn_lflow_add(lflows, op->od, stage, 90,
1347 ds_cstr(&match), "next;");
1348 ds_destroy(&match);
685f4dfe
NS
1349 }
1350
e93b43d6
JP
1351 char *match = xasprintf("%s == %s && %s == %s && ip",
1352 port_direction, op->json_key,
1353 pipeline == P_IN ? "eth.src" : "eth.dst",
1354 ps->ea_s);
685f4dfe
NS
1355 ovn_lflow_add(lflows, op->od, stage, 80, match, "drop;");
1356 free(match);
1357 }
f2a715b5 1358
685f4dfe
NS
1359}
1360
95a9a275 1361static bool
80f408f4 1362lsp_is_enabled(const struct nbrec_logical_switch_port *lsp)
95a9a275 1363{
80f408f4 1364 return !lsp->enabled || *lsp->enabled;
95a9a275
RB
1365}
1366
4c7bf534 1367static bool
80f408f4 1368lsp_is_up(const struct nbrec_logical_switch_port *lsp)
4c7bf534 1369{
80f408f4 1370 return !lsp->up || *lsp->up;
4c7bf534
NS
1371}
1372
78aab811
JP
1373static bool
1374has_stateful_acl(struct ovn_datapath *od)
1375{
9975d7be
BP
1376 for (size_t i = 0; i < od->nbs->n_acls; i++) {
1377 struct nbrec_acl *acl = od->nbs->acls[i];
78aab811
JP
1378 if (!strcmp(acl->action, "allow-related")) {
1379 return true;
1380 }
1381 }
1382
1383 return false;
1384}
1385
1386static void
2d018f9b
GS
1387build_pre_acls(struct ovn_datapath *od, struct hmap *lflows,
1388 struct hmap *ports)
78aab811
JP
1389{
1390 bool has_stateful = has_stateful_acl(od);
48fcdb47 1391 struct ovn_port *op;
78aab811
JP
1392
1393 /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are
1394 * allowed by default. */
880fcd14
BP
1395 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 0, "1", "next;");
1396 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 0, "1", "next;");
78aab811 1397
78aab811
JP
1398 /* If there are any stateful ACL rules in this dapapath, we must
1399 * send all IP packets through the conntrack action, which handles
1400 * defragmentation, in order to match L4 headers. */
1401 if (has_stateful) {
48fcdb47 1402 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 1403 if (op->od == od && !strcmp(op->nbsp->type, "router")) {
501f95e1
JP
1404 /* Can't use ct() for router ports. Consider the
1405 * following configuration: lp1(10.0.0.2) on
1406 * hostA--ls1--lr0--ls2--lp2(10.0.1.2) on hostB, For a
1407 * ping from lp1 to lp2, First, the response will go
1408 * through ct() with a zone for lp2 in the ls2 ingress
1409 * pipeline on hostB. That ct zone knows about this
1410 * connection. Next, it goes through ct() with the zone
1411 * for the router port in the egress pipeline of ls2 on
1412 * hostB. This zone does not know about the connection,
1413 * as the icmp request went through the logical router
1414 * on hostA, not hostB. This would only work with
1415 * distributed conntrack state across all chassis. */
1416 struct ds match_in = DS_EMPTY_INITIALIZER;
1417 struct ds match_out = DS_EMPTY_INITIALIZER;
1418
48fcdb47
WL
1419 ds_put_format(&match_in, "ip && inport == %s", op->json_key);
1420 ds_put_format(&match_out, "ip && outport == %s", op->json_key);
501f95e1
JP
1421 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110,
1422 ds_cstr(&match_in), "next;");
1423 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110,
1424 ds_cstr(&match_out), "next;");
48fcdb47
WL
1425
1426 ds_destroy(&match_in);
1427 ds_destroy(&match_out);
1428 }
1429 }
2d018f9b
GS
1430 /* Ingress and Egress Pre-ACL Table (Priority 110).
1431 *
1432 * Not to do conntrack on ND packets. */
1433 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110, "nd", "next;");
1434 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110, "nd", "next;");
48fcdb47 1435
78aab811
JP
1436 /* Ingress and Egress Pre-ACL Table (Priority 100).
1437 *
1438 * Regardless of whether the ACL is "from-lport" or "to-lport",
1439 * we need rules in both the ingress and egress table, because
facf8652
GS
1440 * the return traffic needs to be followed.
1441 *
1442 * 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send
1443 * it to conntrack for tracking and defragmentation. */
1444 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 100, "ip",
1445 REGBIT_CONNTRACK_DEFRAG" = 1; next;");
1446 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip",
1447 REGBIT_CONNTRACK_DEFRAG" = 1; next;");
2d018f9b
GS
1448 }
1449}
78aab811 1450
7a15be69
GS
1451/* For a 'key' of the form "IP:port" or just "IP", sets 'port' and
1452 * 'ip_address'. The caller must free() the memory allocated for
1453 * 'ip_address'. */
1454static void
1455ip_address_and_port_from_lb_key(const char *key, char **ip_address,
1456 uint16_t *port)
1457{
1458 char *ip_str, *start, *next;
1459 *ip_address = NULL;
1460 *port = 0;
1461
1462 next = start = xstrdup(key);
1463 ip_str = strsep(&next, ":");
1464 if (!ip_str || !ip_str[0]) {
1465 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1466 VLOG_WARN_RL(&rl, "bad ip address for load balancer key %s", key);
1467 free(start);
1468 return;
1469 }
1470
1471 ovs_be32 ip, mask;
1472 char *error = ip_parse_masked(ip_str, &ip, &mask);
1473 if (error || mask != OVS_BE32_MAX) {
1474 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1475 VLOG_WARN_RL(&rl, "bad ip address for load balancer key %s", key);
1476 free(start);
1477 free(error);
1478 return;
1479 }
1480
1481 int l4_port = 0;
1482 if (next && next[0]) {
1483 if (!str_to_int(next, 0, &l4_port) || l4_port < 0 || l4_port > 65535) {
1484 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1485 VLOG_WARN_RL(&rl, "bad ip port for load balancer key %s", key);
1486 free(start);
1487 return;
1488 }
1489 }
1490
1491 *port = l4_port;
1492 *ip_address = strdup(ip_str);
1493 free(start);
1494}
1495
1496static void
1497build_pre_lb(struct ovn_datapath *od, struct hmap *lflows)
1498{
1499 /* Allow all packets to go to next tables by default. */
1500 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 0, "1", "next;");
1501 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 0, "1", "next;");
1502
1503 struct sset all_ips = SSET_INITIALIZER(&all_ips);
1504 if (od->nbs->load_balancer) {
1505 struct nbrec_load_balancer *lb = od->nbs->load_balancer;
1506 struct smap *vips = &lb->vips;
1507 struct smap_node *node;
1508 bool vip_configured = false;
1509
1510 SMAP_FOR_EACH (node, vips) {
1511 vip_configured = true;
1512
1513 /* node->key contains IP:port or just IP. */
1514 char *ip_address = NULL;
1515 uint16_t port;
1516 ip_address_and_port_from_lb_key(node->key, &ip_address, &port);
1517 if (!ip_address) {
1518 continue;
1519 }
1520
1521 if (!sset_contains(&all_ips, ip_address)) {
1522 sset_add(&all_ips, ip_address);
1523 }
1524
1525 free(ip_address);
1526
1527 /* Ignore L4 port information in the key because fragmented packets
1528 * may not have L4 information. The pre-stateful table will send
1529 * the packet through ct() action to de-fragment. In stateful
1530 * table, we will eventually look at L4 information. */
1531 }
1532
1533 /* 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send
1534 * packet to conntrack for defragmentation. */
1535 const char *ip_address;
1536 SSET_FOR_EACH(ip_address, &all_ips) {
1537 char *match = xasprintf("ip && ip4.dst == %s", ip_address);
1538 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB,
1539 100, match, REGBIT_CONNTRACK_DEFRAG" = 1; next;");
1540 free(match);
1541 }
1542
1543 sset_destroy(&all_ips);
1544
1545 if (vip_configured) {
1546 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB,
1547 100, "ip", REGBIT_CONNTRACK_DEFRAG" = 1; next;");
1548 }
1549 }
1550}
1551
facf8652
GS
1552static void
1553build_pre_stateful(struct ovn_datapath *od, struct hmap *lflows)
1554{
1555 /* Ingress and Egress pre-stateful Table (Priority 0): Packets are
1556 * allowed by default. */
1557 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 0, "1", "next;");
1558 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 0, "1", "next;");
1559
1560 /* If REGBIT_CONNTRACK_DEFRAG is set as 1, then the packets should be
1561 * sent to conntrack for tracking and defragmentation. */
1562 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 100,
1563 REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;");
1564 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 100,
1565 REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;");
1566}
1567
2d018f9b
GS
1568static void
1569build_acls(struct ovn_datapath *od, struct hmap *lflows)
1570{
1571 bool has_stateful = has_stateful_acl(od);
e75451fe 1572
2d018f9b
GS
1573 /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
1574 * default. A related rule at priority 1 is added below if there
1575 * are any stateful ACLs in this datapath. */
1576 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 0, "1", "next;");
1577 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 0, "1", "next;");
1578
1579 if (has_stateful) {
78aab811
JP
1580 /* Ingress and Egress ACL Table (Priority 1).
1581 *
1582 * By default, traffic is allowed. This is partially handled by
1583 * the Priority 0 ACL flows added earlier, but we also need to
1584 * commit IP flows. This is because, while the initiater's
1585 * direction may not have any stateful rules, the server's may
1586 * and then its return traffic would not have an associated
cc58e1f2
RB
1587 * conntrack entry and would return "+invalid".
1588 *
1589 * We use "ct_commit" for a connection that is not already known
1590 * by the connection tracker. Once a connection is committed,
1591 * subsequent packets will hit the flow at priority 0 that just
1592 * uses "next;"
1593 *
1594 * We also check for established connections that have ct_label[0]
1595 * set on them. That's a connection that was disallowed, but is
1596 * now allowed by policy again since it hit this default-allow flow.
1597 * We need to set ct_label[0]=0 to let the connection continue,
1598 * which will be done by ct_commit() in the "stateful" stage.
1599 * Subsequent packets will hit the flow at priority 0 that just
1600 * uses "next;". */
1601 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 1,
1602 "ip && (!ct.est || (ct.est && ct_label[0] == 1))",
1603 REGBIT_CONNTRACK_COMMIT" = 1; next;");
1604 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 1,
1605 "ip && (!ct.est || (ct.est && ct_label[0] == 1))",
1606 REGBIT_CONNTRACK_COMMIT" = 1; next;");
78aab811
JP
1607
1608 /* Ingress and Egress ACL Table (Priority 65535).
1609 *
cc58e1f2
RB
1610 * Always drop traffic that's in an invalid state. Also drop
1611 * reply direction packets for connections that have been marked
1612 * for deletion (bit 0 of ct_label is set).
1613 *
1614 * This is enforced at a higher priority than ACLs can be defined. */
880fcd14 1615 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
cc58e1f2
RB
1616 "ct.inv || (ct.est && ct.rpl && ct_label[0] == 1)",
1617 "drop;");
880fcd14 1618 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
cc58e1f2
RB
1619 "ct.inv || (ct.est && ct.rpl && ct_label[0] == 1)",
1620 "drop;");
78aab811
JP
1621
1622 /* Ingress and Egress ACL Table (Priority 65535).
1623 *
cc58e1f2
RB
1624 * Allow reply traffic that is part of an established
1625 * conntrack entry that has not been marked for deletion
1626 * (bit 0 of ct_label). We only match traffic in the
1627 * reply direction because we want traffic in the request
1628 * direction to hit the currently defined policy from ACLs.
1629 *
1630 * This is enforced at a higher priority than ACLs can be defined. */
880fcd14 1631 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
cc58e1f2
RB
1632 "ct.est && !ct.rel && !ct.new && !ct.inv "
1633 "&& ct.rpl && ct_label[0] == 0",
78aab811 1634 "next;");
880fcd14 1635 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
cc58e1f2
RB
1636 "ct.est && !ct.rel && !ct.new && !ct.inv "
1637 "&& ct.rpl && ct_label[0] == 0",
78aab811
JP
1638 "next;");
1639
1640 /* Ingress and Egress ACL Table (Priority 65535).
1641 *
cc58e1f2
RB
1642 * Allow traffic that is related to an existing conntrack entry that
1643 * has not been marked for deletion (bit 0 of ct_label).
1644 *
1645 * This is enforced at a higher priority than ACLs can be defined.
78aab811
JP
1646 *
1647 * NOTE: This does not support related data sessions (eg,
1648 * a dynamically negotiated FTP data channel), but will allow
1649 * related traffic such as an ICMP Port Unreachable through
1650 * that's generated from a non-listening UDP port. */
880fcd14 1651 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
cc58e1f2
RB
1652 "!ct.est && ct.rel && !ct.new && !ct.inv "
1653 "&& ct_label[0] == 0",
78aab811 1654 "next;");
880fcd14 1655 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
cc58e1f2
RB
1656 "!ct.est && ct.rel && !ct.new && !ct.inv "
1657 "&& ct_label[0] == 0",
78aab811 1658 "next;");
e75451fe
ZKL
1659
1660 /* Ingress and Egress ACL Table (Priority 65535).
1661 *
1662 * Not to do conntrack on ND packets. */
1663 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, "nd", "next;");
1664 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, "nd", "next;");
78aab811
JP
1665 }
1666
1667 /* Ingress or Egress ACL Table (Various priorities). */
9975d7be
BP
1668 for (size_t i = 0; i < od->nbs->n_acls; i++) {
1669 struct nbrec_acl *acl = od->nbs->acls[i];
78aab811 1670 bool ingress = !strcmp(acl->direction, "from-lport") ? true :false;
880fcd14 1671 enum ovn_stage stage = ingress ? S_SWITCH_IN_ACL : S_SWITCH_OUT_ACL;
78aab811 1672
cc58e1f2
RB
1673 if (!strcmp(acl->action, "allow")
1674 || !strcmp(acl->action, "allow-related")) {
78aab811
JP
1675 /* If there are any stateful flows, we must even commit "allow"
1676 * actions. This is because, while the initiater's
1677 * direction may not have any stateful rules, the server's
1678 * may and then its return traffic would not have an
1679 * associated conntrack entry and would return "+invalid". */
cc58e1f2
RB
1680 if (!has_stateful) {
1681 ovn_lflow_add(lflows, od, stage,
1682 acl->priority + OVN_ACL_PRI_OFFSET,
1683 acl->match, "next;");
1684 } else {
1685 struct ds match = DS_EMPTY_INITIALIZER;
1686
1687 /* Commit the connection tracking entry if it's a new
1688 * connection that matches this ACL. After this commit,
1689 * the reply traffic is allowed by a flow we create at
1690 * priority 65535, defined earlier.
1691 *
1692 * It's also possible that a known connection was marked for
1693 * deletion after a policy was deleted, but the policy was
1694 * re-added while that connection is still known. We catch
1695 * that case here and un-set ct_label[0] (which will be done
1696 * by ct_commit in the "stateful" stage) to indicate that the
1697 * connection should be allowed to resume.
1698 */
1699 ds_put_format(&match, "((ct.new && !ct.est)"
1700 " || (!ct.new && ct.est && !ct.rpl "
1701 "&& ct_label[0] == 1)) "
1702 "&& (%s)", acl->match);
1703 ovn_lflow_add(lflows, od, stage,
1704 acl->priority + OVN_ACL_PRI_OFFSET,
1705 ds_cstr(&match),
1706 REGBIT_CONNTRACK_COMMIT" = 1; next;");
1707
1708 /* Match on traffic in the request direction for an established
1709 * connection tracking entry that has not been marked for
1710 * deletion. There is no need to commit here, so we can just
1711 * proceed to the next table. We use this to ensure that this
1712 * connection is still allowed by the currently defined
1713 * policy. */
1714 ds_clear(&match);
1715 ds_put_format(&match,
1716 "!ct.new && ct.est && !ct.rpl"
1717 " && ct_label[0] == 0 && (%s)",
1718 acl->match);
1719 ovn_lflow_add(lflows, od, stage,
1720 acl->priority + OVN_ACL_PRI_OFFSET,
1721 ds_cstr(&match), "next;");
1722
1723 ds_destroy(&match);
1724 }
1725 } else if (!strcmp(acl->action, "drop")
1726 || !strcmp(acl->action, "reject")) {
78aab811
JP
1727 struct ds match = DS_EMPTY_INITIALIZER;
1728
cc58e1f2
RB
1729 /* XXX Need to support "reject", treat it as "drop;" for now. */
1730 if (!strcmp(acl->action, "reject")) {
1731 VLOG_INFO("reject is not a supported action");
1732 }
78aab811 1733
cc58e1f2
RB
1734 /* The implementation of "drop" differs if stateful ACLs are in
1735 * use for this datapath. In that case, the actions differ
1736 * depending on whether the connection was previously committed
1737 * to the connection tracker with ct_commit. */
1738 if (has_stateful) {
1739 /* If the packet is not part of an established connection, then
1740 * we can simply drop it. */
1741 ds_put_format(&match,
1742 "(!ct.est || (ct.est && ct_label[0] == 1)) "
1743 "&& (%s)",
1744 acl->match);
1745 ovn_lflow_add(lflows, od, stage, acl->priority +
1746 OVN_ACL_PRI_OFFSET, ds_cstr(&match), "drop;");
1747
1748 /* For an existing connection without ct_label set, we've
1749 * encountered a policy change. ACLs previously allowed
1750 * this connection and we committed the connection tracking
1751 * entry. Current policy says that we should drop this
1752 * connection. First, we set bit 0 of ct_label to indicate
1753 * that this connection is set for deletion. By not
1754 * specifying "next;", we implicitly drop the packet after
1755 * updating conntrack state. We would normally defer
1756 * ct_commit() to the "stateful" stage, but since we're
1757 * dropping the packet, we go ahead and do it here. */
1758 ds_clear(&match);
1759 ds_put_format(&match,
1760 "ct.est && ct_label[0] == 0 && (%s)",
1761 acl->match);
1762 ovn_lflow_add(lflows, od, stage,
1763 acl->priority + OVN_ACL_PRI_OFFSET,
1764 ds_cstr(&match), "ct_commit(ct_label=1/1);");
1765
1766 ds_destroy(&match);
1767 } else {
1768 /* There are no stateful ACLs in use on this datapath,
1769 * so a "drop" ACL is simply the "drop" logical flow action
1770 * in all cases. */
1771 ovn_lflow_add(lflows, od, stage,
1772 acl->priority + OVN_ACL_PRI_OFFSET,
1773 acl->match, "drop;");
1774 }
78aab811
JP
1775 }
1776 }
1777}
1778
7a15be69
GS
1779static void
1780build_lb(struct ovn_datapath *od, struct hmap *lflows)
1781{
1782 /* Ingress and Egress LB Table (Priority 0): Packets are allowed by
1783 * default. */
1784 ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, 0, "1", "next;");
1785 ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, 0, "1", "next;");
1786
1787 if (od->nbs->load_balancer) {
1788 /* Ingress and Egress LB Table (Priority 65535).
1789 *
1790 * Send established traffic through conntrack for just NAT. */
1791 ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, UINT16_MAX,
1792 "ct.est && !ct.rel && !ct.new && !ct.inv",
1793 REGBIT_CONNTRACK_NAT" = 1; next;");
1794 ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, UINT16_MAX,
1795 "ct.est && !ct.rel && !ct.new && !ct.inv",
1796 REGBIT_CONNTRACK_NAT" = 1; next;");
1797 }
1798}
1799
fa313a8c
GS
1800static void
1801build_stateful(struct ovn_datapath *od, struct hmap *lflows)
1802{
1803 /* Ingress and Egress stateful Table (Priority 0): Packets are
1804 * allowed by default. */
1805 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 0, "1", "next;");
1806 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 0, "1", "next;");
1807
1808 /* If REGBIT_CONNTRACK_COMMIT is set as 1, then the packets should be
cc58e1f2
RB
1809 * committed to conntrack. We always set ct_label[0] to 0 here as
1810 * any packet that makes it this far is part of a connection we
1811 * want to allow to continue. */
fa313a8c 1812 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100,
cc58e1f2 1813 REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit(ct_label=0/1); next;");
fa313a8c 1814 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100,
cc58e1f2 1815 REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit(ct_label=0/1); next;");
7a15be69
GS
1816
1817 /* If REGBIT_CONNTRACK_NAT is set as 1, then packets should just be sent
1818 * through nat (without committing).
1819 *
1820 * REGBIT_CONNTRACK_COMMIT is set for new connections and
1821 * REGBIT_CONNTRACK_NAT is set for established connections. So they
1822 * don't overlap.
1823 */
1824 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100,
1825 REGBIT_CONNTRACK_NAT" == 1", "ct_lb;");
1826 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100,
1827 REGBIT_CONNTRACK_NAT" == 1", "ct_lb;");
1828
1829 /* Load balancing rules for new connections get committed to conntrack
1830 * table. So even if REGBIT_CONNTRACK_COMMIT is set in a previous table
1831 * a higher priority rule for load balancing below also commits the
1832 * connection, so it is okay if we do not hit the above match on
1833 * REGBIT_CONNTRACK_COMMIT. */
1834 if (od->nbs->load_balancer) {
1835 struct nbrec_load_balancer *lb = od->nbs->load_balancer;
1836 struct smap *vips = &lb->vips;
1837 struct smap_node *node;
1838
1839 SMAP_FOR_EACH (node, vips) {
1840 uint16_t port = 0;
1841
1842 /* node->key contains IP:port or just IP. */
1843 char *ip_address = NULL;
1844 ip_address_and_port_from_lb_key(node->key, &ip_address, &port);
1845 if (!ip_address) {
1846 continue;
1847 }
1848
1849 /* New connections in Ingress table. */
1850 char *action = xasprintf("ct_lb(%s);", node->value);
1851 struct ds match = DS_EMPTY_INITIALIZER;
1852 ds_put_format(&match, "ct.new && ip && ip4.dst == %s", ip_address);
1853 if (port) {
1854 if (lb->protocol && !strcmp(lb->protocol, "udp")) {
1855 ds_put_format(&match, "&& udp && udp.dst == %d", port);
1856 } else {
1857 ds_put_format(&match, "&& tcp && tcp.dst == %d", port);
1858 }
1859 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL,
1860 120, ds_cstr(&match), action);
1861 } else {
1862 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL,
1863 110, ds_cstr(&match), action);
1864 }
1865
1866 ds_destroy(&match);
1867 free(action);
1868 }
1869 }
fa313a8c
GS
1870}
1871
bd39395f 1872static void
9975d7be
BP
1873build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
1874 struct hmap *lflows, struct hmap *mcgroups)
bd39395f 1875{
5cff6b99
BP
1876 /* This flow table structure is documented in ovn-northd(8), so please
1877 * update ovn-northd.8.xml if you change anything. */
1878
09b39248
JP
1879 struct ds match = DS_EMPTY_INITIALIZER;
1880 struct ds actions = DS_EMPTY_INITIALIZER;
1881
9975d7be 1882 /* Build pre-ACL and ACL tables for both ingress and egress.
685f4dfe 1883 * Ingress tables 3 and 4. Egress tables 0 and 1. */
5868eb24
BP
1884 struct ovn_datapath *od;
1885 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
1886 if (!od->nbs) {
1887 continue;
1888 }
1889
2d018f9b 1890 build_pre_acls(od, lflows, ports);
7a15be69 1891 build_pre_lb(od, lflows);
facf8652 1892 build_pre_stateful(od, lflows);
2d018f9b 1893 build_acls(od, lflows);
7a15be69 1894 build_lb(od, lflows);
fa313a8c 1895 build_stateful(od, lflows);
9975d7be
BP
1896 }
1897
1898 /* Logical switch ingress table 0: Admission control framework (priority
1899 * 100). */
1900 HMAP_FOR_EACH (od, key_node, datapaths) {
1901 if (!od->nbs) {
1902 continue;
1903 }
1904
bd39395f 1905 /* Logical VLANs not supported. */
685f4dfe 1906 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "vlan.present",
091e3af9 1907 "drop;");
bd39395f
BP
1908
1909 /* Broadcast/multicast source address is invalid. */
685f4dfe 1910 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "eth.src[40]",
091e3af9 1911 "drop;");
bd39395f 1912
35060cdc
BP
1913 /* Port security flows have priority 50 (see below) and will continue
1914 * to the next table if packet source is acceptable. */
bd39395f
BP
1915 }
1916
685f4dfe
NS
1917 /* Logical switch ingress table 0: Ingress port security - L2
1918 * (priority 50).
1919 * Ingress table 1: Ingress port security - IP (priority 90 and 80)
1920 * Ingress table 2: Ingress port security - ND (priority 90 and 80)
1921 */
5868eb24
BP
1922 struct ovn_port *op;
1923 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 1924 if (!op->nbsp) {
9975d7be
BP
1925 continue;
1926 }
1927
0ee00741 1928 if (!lsp_is_enabled(op->nbsp)) {
96af668a
BP
1929 /* Drop packets from disabled logical ports (since logical flow
1930 * tables are default-drop). */
1931 continue;
1932 }
1933
09b39248 1934 ds_clear(&match);
9975d7be 1935 ds_put_format(&match, "inport == %s", op->json_key);
e93b43d6
JP
1936 build_port_security_l2("eth.src", op->ps_addrs, op->n_ps_addrs,
1937 &match);
685f4dfe 1938 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_L2, 50,
96af668a 1939 ds_cstr(&match), "next;");
685f4dfe 1940
0ee00741 1941 if (op->nbsp->n_port_security) {
685f4dfe
NS
1942 build_port_security_ip(P_IN, op, lflows);
1943 build_port_security_nd(op, lflows);
1944 }
1945 }
1946
1947 /* Ingress table 1 and 2: Port security - IP and ND, by default goto next.
1948 * (priority 0)*/
1949 HMAP_FOR_EACH (od, key_node, datapaths) {
1950 if (!od->nbs) {
1951 continue;
1952 }
1953
1954 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_ND, 0, "1", "next;");
1955 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_IP, 0, "1", "next;");
5868eb24 1956 }
445a266a 1957
94300e09 1958 /* Ingress table 9: ARP responder, skip requests coming from localnet ports.
fa128126
HZ
1959 * (priority 100). */
1960 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 1961 if (!op->nbsp) {
fa128126
HZ
1962 continue;
1963 }
1964
0ee00741 1965 if (!strcmp(op->nbsp->type, "localnet")) {
09b39248
JP
1966 ds_clear(&match);
1967 ds_put_format(&match, "inport == %s", op->json_key);
e75451fe 1968 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100,
09b39248 1969 ds_cstr(&match), "next;");
fa128126
HZ
1970 }
1971 }
1972
94300e09 1973 /* Ingress table 9: ARP/ND responder, reply for known IPs.
fa128126 1974 * (priority 50). */
57d143eb 1975 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 1976 if (!op->nbsp) {
57d143eb
HZ
1977 continue;
1978 }
1979
4c7bf534 1980 /*
e75451fe 1981 * Add ARP/ND reply flows if either the
4c7bf534
NS
1982 * - port is up or
1983 * - port type is router
1984 */
0ee00741 1985 if (!lsp_is_up(op->nbsp) && strcmp(op->nbsp->type, "router")) {
4c7bf534
NS
1986 continue;
1987 }
1988
e93b43d6
JP
1989 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
1990 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
09b39248 1991 ds_clear(&match);
e93b43d6
JP
1992 ds_put_format(&match, "arp.tpa == %s && arp.op == 1",
1993 op->lsp_addrs[i].ipv4_addrs[j].addr_s);
09b39248
JP
1994 ds_clear(&actions);
1995 ds_put_format(&actions,
57d143eb 1996 "eth.dst = eth.src; "
e93b43d6 1997 "eth.src = %s; "
57d143eb
HZ
1998 "arp.op = 2; /* ARP reply */ "
1999 "arp.tha = arp.sha; "
e93b43d6 2000 "arp.sha = %s; "
57d143eb 2001 "arp.tpa = arp.spa; "
e93b43d6 2002 "arp.spa = %s; "
57d143eb
HZ
2003 "outport = inport; "
2004 "inport = \"\"; /* Allow sending out inport. */ "
2005 "output;",
e93b43d6
JP
2006 op->lsp_addrs[i].ea_s, op->lsp_addrs[i].ea_s,
2007 op->lsp_addrs[i].ipv4_addrs[j].addr_s);
e75451fe 2008 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50,
09b39248 2009 ds_cstr(&match), ds_cstr(&actions));
57d143eb 2010 }
7dc88496 2011
e93b43d6 2012 if (op->lsp_addrs[i].n_ipv6_addrs > 0) {
09b39248 2013 ds_clear(&match);
e75451fe 2014 ds_put_cstr(&match, "icmp6 && icmp6.type == 135 && ");
e93b43d6
JP
2015 if (op->lsp_addrs[i].n_ipv6_addrs == 1) {
2016 ds_put_format(&match, "nd.target == %s",
2017 op->lsp_addrs[i].ipv6_addrs[0].addr_s);
e75451fe 2018 } else {
e93b43d6
JP
2019 ds_put_format(&match, "nd.target == {");
2020 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
2021 ds_put_cstr(&match,
2022 op->lsp_addrs[i].ipv6_addrs[j].addr_s);
e75451fe
ZKL
2023 }
2024 ds_chomp(&match, ' ');
e93b43d6
JP
2025 ds_chomp(&match, ',');
2026 ds_put_cstr(&match, "}");
e75451fe 2027 }
09b39248
JP
2028 ds_clear(&actions);
2029 ds_put_format(&actions,
e93b43d6
JP
2030 "na { eth.src = %s; "
2031 "nd.tll = %s; "
e75451fe
ZKL
2032 "outport = inport; "
2033 "inport = \"\"; /* Allow sending out inport. */ "
2034 "output; };",
e93b43d6
JP
2035 op->lsp_addrs[i].ea_s,
2036 op->lsp_addrs[i].ea_s);
e75451fe
ZKL
2037
2038 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50,
09b39248 2039 ds_cstr(&match), ds_cstr(&actions));
e75451fe 2040
e75451fe 2041 }
57d143eb
HZ
2042 }
2043 }
2044
94300e09 2045 /* Ingress table 9: ARP/ND responder, by default goto next.
fa128126
HZ
2046 * (priority 0)*/
2047 HMAP_FOR_EACH (od, key_node, datapaths) {
2048 if (!od->nbs) {
2049 continue;
2050 }
2051
e75451fe 2052 ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_ND_RSP, 0, "1", "next;");
fa128126
HZ
2053 }
2054
94300e09 2055 /* Ingress table 10: Destination lookup, broadcast and multicast handling
5868eb24
BP
2056 * (priority 100). */
2057 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 2058 if (!op->nbsp) {
9975d7be
BP
2059 continue;
2060 }
2061
0ee00741 2062 if (lsp_is_enabled(op->nbsp)) {
9975d7be 2063 ovn_multicast_add(mcgroups, &mc_flood, op);
445a266a 2064 }
5868eb24
BP
2065 }
2066 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
2067 if (!od->nbs) {
2068 continue;
2069 }
2070
2071 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 100, "eth.mcast",
5868eb24 2072 "outport = \""MC_FLOOD"\"; output;");
bd39395f 2073 }
bd39395f 2074
94300e09 2075 /* Ingress table 10: Destination lookup, unicast handling (priority 50), */
5868eb24 2076 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 2077 if (!op->nbsp) {
9975d7be
BP
2078 continue;
2079 }
2080
0ee00741 2081 for (size_t i = 0; i < op->nbsp->n_addresses; i++) {
74ff3298 2082 struct eth_addr mac;
5868eb24 2083
0ee00741 2084 if (eth_addr_from_string(op->nbsp->addresses[i], &mac)) {
09b39248 2085 ds_clear(&match);
9975d7be
BP
2086 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
2087 ETH_ADDR_ARGS(mac));
5868eb24 2088
09b39248 2089 ds_clear(&actions);
9975d7be
BP
2090 ds_put_format(&actions, "outport = %s; output;", op->json_key);
2091 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
5868eb24 2092 ds_cstr(&match), ds_cstr(&actions));
0ee00741
HK
2093 } else if (!strcmp(op->nbsp->addresses[i], "unknown")) {
2094 if (lsp_is_enabled(op->nbsp)) {
9975d7be 2095 ovn_multicast_add(mcgroups, &mc_unknown, op);
96af668a
BP
2096 op->od->has_unknown = true;
2097 }
5868eb24
BP
2098 } else {
2099 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
445a266a 2100
2fa326a3
BP
2101 VLOG_INFO_RL(&rl,
2102 "%s: invalid syntax '%s' in addresses column",
0ee00741 2103 op->nbsp->name, op->nbsp->addresses[i]);
445a266a
BP
2104 }
2105 }
bd39395f
BP
2106 }
2107
94300e09 2108 /* Ingress table 10: Destination lookup for unknown MACs (priority 0). */
5868eb24 2109 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
2110 if (!od->nbs) {
2111 continue;
2112 }
2113
5868eb24 2114 if (od->has_unknown) {
9975d7be 2115 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1",
5868eb24 2116 "outport = \""MC_UNKNOWN"\"; output;");
445a266a 2117 }
bd39395f
BP
2118 }
2119
94300e09
JP
2120 /* Egress tables 6: Egress port security - IP (priority 0)
2121 * Egress table 7: Egress port security L2 - multicast/broadcast
2122 * (priority 100). */
5868eb24 2123 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
2124 if (!od->nbs) {
2125 continue;
2126 }
2127
685f4dfe
NS
2128 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_IP, 0, "1", "next;");
2129 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_L2, 100, "eth.mcast",
091e3af9 2130 "output;");
48f42f3a
RB
2131 }
2132
94300e09 2133 /* Egress table 6: Egress port security - IP (priorities 90 and 80)
685f4dfe
NS
2134 * if port security enabled.
2135 *
94300e09 2136 * Egress table 7: Egress port security - L2 (priorities 50 and 150).
d770a830
BP
2137 *
2138 * Priority 50 rules implement port security for enabled logical port.
2139 *
2140 * Priority 150 rules drop packets to disabled logical ports, so that they
2141 * don't even receive multicast or broadcast packets. */
5868eb24 2142 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 2143 if (!op->nbsp) {
9975d7be
BP
2144 continue;
2145 }
2146
09b39248 2147 ds_clear(&match);
9975d7be 2148 ds_put_format(&match, "outport == %s", op->json_key);
0ee00741 2149 if (lsp_is_enabled(op->nbsp)) {
e93b43d6
JP
2150 build_port_security_l2("eth.dst", op->ps_addrs, op->n_ps_addrs,
2151 &match);
685f4dfe 2152 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 50,
d770a830
BP
2153 ds_cstr(&match), "output;");
2154 } else {
685f4dfe 2155 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 150,
d770a830
BP
2156 ds_cstr(&match), "drop;");
2157 }
eb00399e 2158
0ee00741 2159 if (op->nbsp->n_port_security) {
685f4dfe
NS
2160 build_port_security_ip(P_OUT, op, lflows);
2161 }
eb00399e 2162 }
09b39248
JP
2163
2164 ds_destroy(&match);
2165 ds_destroy(&actions);
9975d7be 2166}
eb00399e 2167
9975d7be
BP
2168static bool
2169lrport_is_enabled(const struct nbrec_logical_router_port *lrport)
2170{
2171 return !lrport->enabled || *lrport->enabled;
2172}
2173
4685e523
JP
2174/* Returns a string of the IP address of the router port 'op' that
2175 * overlaps with 'ip_s". If one is not found, returns NULL.
2176 *
2177 * The caller must not free the returned string. */
2178static const char *
2179find_lrp_member_ip(const struct ovn_port *op, const char *ip_s)
2180{
6026f534 2181 ovs_be32 ip;
4685e523
JP
2182
2183 if (!ip_parse(ip_s, &ip)) {
2184 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2185 VLOG_WARN_RL(&rl, "bad ip address %s", ip_s);
2186 return NULL;
2187 }
2188
2189 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
2190 const struct ipv4_netaddr *na = &op->lrp_networks.ipv4_addrs[i];
2191
2192 if (!((na->network ^ ip) & na->mask)) {
2193 /* There should be only 1 interface that matches the
2194 * next hop. Otherwise, it's a configuration error,
2195 * because subnets of router's interfaces should NOT
2196 * overlap. */
2197 return na->addr_s;
2198 }
2199 }
2200
2201 return NULL;
2202}
2203
9975d7be 2204static void
0bac7164 2205add_route(struct hmap *lflows, const struct ovn_port *op,
4685e523
JP
2206 const char *lrp_addr_s, const char *network_s, int plen,
2207 const char *gateway)
9975d7be 2208{
c9bdf7bd 2209 char *match = xasprintf("ip4.dst == %s/%d", network_s, plen);
9975d7be
BP
2210
2211 struct ds actions = DS_EMPTY_INITIALIZER;
47f3b59b 2212 ds_put_cstr(&actions, "ip.ttl--; reg0 = ");
9975d7be 2213 if (gateway) {
c9bdf7bd 2214 ds_put_cstr(&actions, gateway);
9975d7be
BP
2215 } else {
2216 ds_put_cstr(&actions, "ip4.dst");
2217 }
4685e523 2218 ds_put_format(&actions, "; "
c9bdf7bd 2219 "reg1 = %s; "
4685e523 2220 "eth.src = %s; "
0bac7164 2221 "outport = %s; "
4685e523 2222 "inport = \"\"; /* Allow sending out inport. */ "
0bac7164 2223 "next;",
4685e523
JP
2224 lrp_addr_s,
2225 op->lrp_networks.ea_s,
2226 op->json_key);
9975d7be
BP
2227
2228 /* The priority here is calculated to implement longest-prefix-match
2229 * routing. */
c9bdf7bd
JP
2230 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_ROUTING, plen, match,
2231 ds_cstr(&actions));
9975d7be
BP
2232 ds_destroy(&actions);
2233 free(match);
2234}
2235
28dc3fe9
SR
2236static void
2237build_static_route_flow(struct hmap *lflows, struct ovn_datapath *od,
2238 struct hmap *ports,
2239 const struct nbrec_logical_router_static_route *route)
2240{
4685e523
JP
2241 ovs_be32 prefix, nexthop, mask;
2242 const char *lrp_addr_s;
28dc3fe9
SR
2243
2244 /* Verify that next hop is an IP address with 32 bits mask. */
4685e523 2245 char *error = ip_parse_masked(route->nexthop, &nexthop, &mask);
28dc3fe9
SR
2246 if (error || mask != OVS_BE32_MAX) {
2247 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2248 VLOG_WARN_RL(&rl, "bad next hop ip address %s", route->nexthop);
2249 free(error);
2250 return;
2251 }
2252
2253 /* Verify that ip prefix is a valid CIDR address. */
2254 error = ip_parse_masked(route->ip_prefix, &prefix, &mask);
2255 if (error || !ip_is_cidr(mask)) {
2256 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
c9bdf7bd 2257 VLOG_WARN_RL(&rl, "bad 'ip_prefix' in static routes %s",
28dc3fe9
SR
2258 route->ip_prefix);
2259 free(error);
2260 return;
2261 }
2262
2263 /* Find the outgoing port. */
2264 struct ovn_port *out_port = NULL;
2265 if (route->output_port) {
2266 out_port = ovn_port_find(ports, route->output_port);
2267 if (!out_port) {
2268 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2269 VLOG_WARN_RL(&rl, "Bad out port %s for static route %s",
2270 route->output_port, route->ip_prefix);
2271 return;
2272 }
4685e523 2273 lrp_addr_s = find_lrp_member_ip(out_port, route->nexthop);
28dc3fe9
SR
2274 } else {
2275 /* output_port is not specified, find the
2276 * router port matching the next hop. */
2277 int i;
2278 for (i = 0; i < od->nbr->n_ports; i++) {
2279 struct nbrec_logical_router_port *lrp = od->nbr->ports[i];
2280 out_port = ovn_port_find(ports, lrp->name);
2281 if (!out_port) {
2282 /* This should not happen. */
2283 continue;
2284 }
2285
4685e523
JP
2286 lrp_addr_s = find_lrp_member_ip(out_port, route->nexthop);
2287 if (lrp_addr_s) {
28dc3fe9
SR
2288 break;
2289 }
2290 }
28dc3fe9
SR
2291 }
2292
4685e523
JP
2293 if (!lrp_addr_s) {
2294 /* There is no matched out port. */
2295 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2296 VLOG_WARN_RL(&rl, "No path for static route %s; next hop %s",
2297 route->ip_prefix, route->nexthop);
2298 return;
2299 }
2300
2301 char *prefix_s = xasprintf(IP_FMT, IP_ARGS(prefix & mask));
2302 add_route(lflows, out_port, lrp_addr_s, prefix_s,
2303 ip_count_cidr_bits(mask), route->nexthop);
c9bdf7bd 2304 free(prefix_s);
28dc3fe9
SR
2305}
2306
4685e523
JP
2307static void
2308op_put_networks(struct ds *ds, const struct ovn_port *op, bool add_bcast)
2309{
2310 if (!add_bcast && op->lrp_networks.n_ipv4_addrs == 1) {
2311 ds_put_format(ds, "%s", op->lrp_networks.ipv4_addrs[0].addr_s);
2312 return;
2313 }
2314
2315 ds_put_cstr(ds, "{");
2316 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
2317 ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].addr_s);
2318 if (add_bcast) {
2319 ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].bcast_s);
2320 }
2321 }
2322 ds_chomp(ds, ' ');
2323 ds_chomp(ds, ',');
2324 ds_put_cstr(ds, "}");
2325}
2326
9975d7be
BP
2327static void
2328build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
2329 struct hmap *lflows)
2330{
2331 /* This flow table structure is documented in ovn-northd(8), so please
2332 * update ovn-northd.8.xml if you change anything. */
2333
09b39248
JP
2334 struct ds match = DS_EMPTY_INITIALIZER;
2335 struct ds actions = DS_EMPTY_INITIALIZER;
2336
9975d7be
BP
2337 /* Logical router ingress table 0: Admission control framework. */
2338 struct ovn_datapath *od;
2339 HMAP_FOR_EACH (od, key_node, datapaths) {
2340 if (!od->nbr) {
2341 continue;
2342 }
2343
2344 /* Logical VLANs not supported.
2345 * Broadcast/multicast source address is invalid. */
2346 ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100,
2347 "vlan.present || eth.src[40]", "drop;");
2348 }
2349
2350 /* Logical router ingress table 0: match (priority 50). */
2351 struct ovn_port *op;
2352 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 2353 if (!op->nbrp) {
9975d7be
BP
2354 continue;
2355 }
2356
0ee00741 2357 if (!lrport_is_enabled(op->nbrp)) {
9975d7be
BP
2358 /* Drop packets from disabled logical ports (since logical flow
2359 * tables are default-drop). */
2360 continue;
2361 }
2362
09b39248 2363 ds_clear(&match);
4685e523
JP
2364 ds_put_format(&match, "(eth.mcast || eth.dst == %s) && inport == %s",
2365 op->lrp_networks.ea_s, op->json_key);
9975d7be 2366 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
09b39248 2367 ds_cstr(&match), "next;");
9975d7be
BP
2368 }
2369
2370 /* Logical router ingress table 1: IP Input. */
78aab811 2371 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
2372 if (!od->nbr) {
2373 continue;
2374 }
2375
2376 /* L3 admission control: drop multicast and broadcast source, localhost
2377 * source or destination, and zero network source or destination
2378 * (priority 100). */
2379 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100,
2380 "ip4.mcast || "
2381 "ip4.src == 255.255.255.255 || "
2382 "ip4.src == 127.0.0.0/8 || "
2383 "ip4.dst == 127.0.0.0/8 || "
2384 "ip4.src == 0.0.0.0/8 || "
2385 "ip4.dst == 0.0.0.0/8",
2386 "drop;");
2387
0bac7164
BP
2388 /* ARP reply handling. Use ARP replies to populate the logical
2389 * router's ARP table. */
2390 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "arp.op == 2",
2391 "put_arp(inport, arp.spa, arp.sha);");
2392
9975d7be
BP
2393 /* Drop Ethernet local broadcast. By definition this traffic should
2394 * not be forwarded.*/
2395 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
2396 "eth.bcast", "drop;");
2397
9975d7be
BP
2398 /* TTL discard.
2399 *
2400 * XXX Need to send ICMP time exceeded if !ip.later_frag. */
09b39248
JP
2401 ds_clear(&match);
2402 ds_put_cstr(&match, "ip4 && ip.ttl == {0, 1}");
2403 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30,
2404 ds_cstr(&match), "drop;");
9975d7be
BP
2405
2406 /* Pass other traffic not already handled to the next table for
2407 * routing. */
2408 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;");
78aab811
JP
2409 }
2410
9975d7be 2411 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 2412 if (!op->nbrp) {
9975d7be
BP
2413 continue;
2414 }
2415
2416 /* L3 admission control: drop packets that originate from an IP address
2417 * owned by the router or a broadcast address known to the router
2418 * (priority 100). */
09b39248 2419 ds_clear(&match);
4685e523
JP
2420 ds_put_cstr(&match, "ip4.src == ");
2421 op_put_networks(&match, op, true);
9975d7be 2422 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
09b39248 2423 ds_cstr(&match), "drop;");
9975d7be 2424
dd7652e6 2425 /* ICMP echo reply. These flows reply to ICMP echo requests
bb3c4568
FF
2426 * received for the router's IP address. Since packets only
2427 * get here as part of the logical router datapath, the inport
2428 * (i.e. the incoming locally attached net) does not matter.
2429 * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */
09b39248 2430 ds_clear(&match);
4685e523
JP
2431 ds_put_cstr(&match, "ip4.dst == ");
2432 op_put_networks(&match, op, false);
2433 ds_put_cstr(&match, " && icmp4.type == 8 && icmp4.code == 0");
2434
09b39248
JP
2435 ds_clear(&actions);
2436 ds_put_format(&actions,
4685e523 2437 "ip4.dst <-> ip4.src; "
dd7652e6
JP
2438 "ip.ttl = 255; "
2439 "icmp4.type = 0; "
2440 "inport = \"\"; /* Allow sending out inport. */ "
4685e523 2441 "next; ");
dd7652e6 2442 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
09b39248 2443 ds_cstr(&match), ds_cstr(&actions));
dd7652e6 2444
9975d7be
BP
2445 /* ARP reply. These flows reply to ARP requests for the router's own
2446 * IP address. */
4685e523
JP
2447 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
2448 ds_clear(&match);
2449 ds_put_format(&match,
2450 "inport == %s && arp.tpa == %s && arp.op == 1",
2451 op->json_key, op->lrp_networks.ipv4_addrs[i].addr_s);
2452
2453 ds_clear(&actions);
2454 ds_put_format(&actions,
2455 "eth.dst = eth.src; "
2456 "eth.src = %s; "
2457 "arp.op = 2; /* ARP reply */ "
2458 "arp.tha = arp.sha; "
2459 "arp.sha = %s; "
2460 "arp.tpa = arp.spa; "
2461 "arp.spa = %s; "
2462 "outport = %s; "
2463 "inport = \"\"; /* Allow sending out inport. */ "
2464 "output;",
2465 op->lrp_networks.ea_s,
2466 op->lrp_networks.ea_s,
2467 op->lrp_networks.ipv4_addrs[i].addr_s,
2468 op->json_key);
2469 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
2470 ds_cstr(&match), ds_cstr(&actions));
2471 }
9975d7be 2472
de297547
GS
2473 /* ARP handling for external IP addresses.
2474 *
2475 * DNAT IP addresses are external IP addresses that need ARP
2476 * handling. */
2477 for (int i = 0; i < op->od->nbr->n_nat; i++) {
2478 const struct nbrec_nat *nat;
2479
2480 nat = op->od->nbr->nat[i];
2481
2482 if(!strcmp(nat->type, "snat")) {
2483 continue;
2484 }
2485
2486 ovs_be32 ip;
2487 if (!ip_parse(nat->external_ip, &ip) || !ip) {
2488 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2489 VLOG_WARN_RL(&rl, "bad ip address %s in dnat configuration "
2490 "for router %s", nat->external_ip, op->key);
2491 continue;
2492 }
2493
09b39248
JP
2494 ds_clear(&match);
2495 ds_put_format(&match,
2496 "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1",
2497 op->json_key, IP_ARGS(ip));
4685e523 2498
09b39248
JP
2499 ds_clear(&actions);
2500 ds_put_format(&actions,
de297547 2501 "eth.dst = eth.src; "
4685e523 2502 "eth.src = %s; "
de297547
GS
2503 "arp.op = 2; /* ARP reply */ "
2504 "arp.tha = arp.sha; "
4685e523 2505 "arp.sha = %s; "
de297547
GS
2506 "arp.tpa = arp.spa; "
2507 "arp.spa = "IP_FMT"; "
2508 "outport = %s; "
2509 "inport = \"\"; /* Allow sending out inport. */ "
2510 "output;",
4685e523
JP
2511 op->lrp_networks.ea_s,
2512 op->lrp_networks.ea_s,
de297547
GS
2513 IP_ARGS(ip),
2514 op->json_key);
2515 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
09b39248 2516 ds_cstr(&match), ds_cstr(&actions));
de297547
GS
2517 }
2518
4ef48e9d
CSV
2519 /* Drop IP traffic to this router, unless the router ip is used as
2520 * SNAT ip. */
4685e523
JP
2521 ovs_be32 *nat_ips = xmalloc(sizeof *nat_ips * op->od->nbr->n_nat);
2522 size_t n_nat_ips = 0;
4ef48e9d
CSV
2523 for (int i = 0; i < op->od->nbr->n_nat; i++) {
2524 const struct nbrec_nat *nat;
2525 ovs_be32 ip;
2526
2527 nat = op->od->nbr->nat[i];
2528 if (strcmp(nat->type, "snat")) {
2529 continue;
2530 }
2531
2532 if (!ip_parse(nat->external_ip, &ip) || !ip) {
2533 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2534 VLOG_WARN_RL(&rl, "bad ip address %s in snat configuration "
2535 "for router %s", nat->external_ip, op->key);
2536 continue;
2537 }
2538
4685e523
JP
2539 nat_ips[n_nat_ips++] = ip;
2540 }
2541
2542 ds_clear(&match);
2543 ds_put_cstr(&match, "ip4.dst == {");
2544 bool has_drop_ips = false;
2545 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
2546 for (int j = 0; j < n_nat_ips; j++) {
2547 if (op->lrp_networks.ipv4_addrs[i].addr == nat_ips[j]) {
2548 continue;
2549 }
4ef48e9d 2550 }
4685e523
JP
2551 ds_put_format(&match, "%s, ",
2552 op->lrp_networks.ipv4_addrs[i].addr_s);
2553 has_drop_ips = true;
4ef48e9d 2554 }
4685e523
JP
2555 ds_chomp(&match, ' ');
2556 ds_chomp(&match, ',');
2557 ds_put_cstr(&match, "}");
4ef48e9d 2558
4685e523
JP
2559 if (has_drop_ips) {
2560 /* Drop IP traffic to this router. */
09b39248
JP
2561 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
2562 ds_cstr(&match), "drop;");
4ef48e9d 2563 }
4685e523
JP
2564
2565 free(nat_ips);
9975d7be
BP
2566 }
2567
de297547
GS
2568 /* NAT in Gateway routers. */
2569 HMAP_FOR_EACH (od, key_node, datapaths) {
2570 if (!od->nbr) {
2571 continue;
2572 }
2573
2574 /* Packets are allowed by default. */
2575 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;");
2576 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;");
2577 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;");
2578
2579 /* NAT rules are only valid on Gateway routers. */
2580 if (!smap_get(&od->nbr->options, "chassis")) {
2581 continue;
2582 }
2583
2584 for (int i = 0; i < od->nbr->n_nat; i++) {
2585 const struct nbrec_nat *nat;
2586
2587 nat = od->nbr->nat[i];
2588
2589 ovs_be32 ip, mask;
2590
2591 char *error = ip_parse_masked(nat->external_ip, &ip, &mask);
2592 if (error || mask != OVS_BE32_MAX) {
2593 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2594 VLOG_WARN_RL(&rl, "bad external ip %s for nat",
2595 nat->external_ip);
2596 free(error);
2597 continue;
2598 }
2599
2600 /* Check the validity of nat->logical_ip. 'logical_ip' can
2601 * be a subnet when the type is "snat". */
2602 error = ip_parse_masked(nat->logical_ip, &ip, &mask);
2603 if (!strcmp(nat->type, "snat")) {
2604 if (error) {
2605 static struct vlog_rate_limit rl =
2606 VLOG_RATE_LIMIT_INIT(5, 1);
2607 VLOG_WARN_RL(&rl, "bad ip network or ip %s for snat "
2608 "in router "UUID_FMT"",
2609 nat->logical_ip, UUID_ARGS(&od->key));
2610 free(error);
2611 continue;
2612 }
2613 } else {
2614 if (error || mask != OVS_BE32_MAX) {
2615 static struct vlog_rate_limit rl =
2616 VLOG_RATE_LIMIT_INIT(5, 1);
2617 VLOG_WARN_RL(&rl, "bad ip %s for dnat in router "
2618 ""UUID_FMT"", nat->logical_ip, UUID_ARGS(&od->key));
2619 free(error);
2620 continue;
2621 }
2622 }
2623
de297547
GS
2624 /* Ingress UNSNAT table: It is for already established connections'
2625 * reverse traffic. i.e., SNAT has already been done in egress
2626 * pipeline and now the packet has entered the ingress pipeline as
2627 * part of a reply. We undo the SNAT here.
2628 *
2629 * Undoing SNAT has to happen before DNAT processing. This is
2630 * because when the packet was DNATed in ingress pipeline, it did
2631 * not know about the possibility of eventual additional SNAT in
2632 * egress pipeline. */
2633 if (!strcmp(nat->type, "snat")
2634 || !strcmp(nat->type, "dnat_and_snat")) {
09b39248
JP
2635 ds_clear(&match);
2636 ds_put_format(&match, "ip && ip4.dst == %s", nat->external_ip);
de297547 2637 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 100,
09b39248 2638 ds_cstr(&match), "ct_snat; next;");
de297547
GS
2639 }
2640
2641 /* Ingress DNAT table: Packets enter the pipeline with destination
2642 * IP address that needs to be DNATted from a external IP address
2643 * to a logical IP address. */
2644 if (!strcmp(nat->type, "dnat")
2645 || !strcmp(nat->type, "dnat_and_snat")) {
2646 /* Packet when it goes from the initiator to destination.
2647 * We need to zero the inport because the router can
2648 * send the packet back through the same interface. */
09b39248
JP
2649 ds_clear(&match);
2650 ds_put_format(&match, "ip && ip4.dst == %s", nat->external_ip);
2651 ds_clear(&actions);
2652 ds_put_format(&actions,"inport = \"\"; ct_dnat(%s);",
2653 nat->logical_ip);
de297547 2654 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100,
09b39248 2655 ds_cstr(&match), ds_cstr(&actions));
de297547
GS
2656 }
2657
2658 /* Egress SNAT table: Packets enter the egress pipeline with
2659 * source ip address that needs to be SNATted to a external ip
2660 * address. */
2661 if (!strcmp(nat->type, "snat")
2662 || !strcmp(nat->type, "dnat_and_snat")) {
09b39248
JP
2663 ds_clear(&match);
2664 ds_put_format(&match, "ip && ip4.src == %s", nat->logical_ip);
2665 ds_clear(&actions);
2666 ds_put_format(&actions, "ct_snat(%s);", nat->external_ip);
de297547
GS
2667
2668 /* The priority here is calculated such that the
2669 * nat->logical_ip with the longest mask gets a higher
2670 * priority. */
2671 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT,
09b39248
JP
2672 count_1bits(ntohl(mask)) + 1,
2673 ds_cstr(&match), ds_cstr(&actions));
de297547
GS
2674 }
2675 }
2676
2677 /* Re-circulate every packet through the DNAT zone.
2678 * This helps with two things.
2679 *
2680 * 1. Any packet that needs to be unDNATed in the reverse
2681 * direction gets unDNATed. Ideally this could be done in
2682 * the egress pipeline. But since the gateway router
2683 * does not have any feature that depends on the source
2684 * ip address being external IP address for IP routing,
2685 * we can do it here, saving a future re-circulation.
2686 *
2687 * 2. Any packet that was sent through SNAT zone in the
2688 * previous table automatically gets re-circulated to get
2689 * back the new destination IP address that is needed for
2690 * routing in the openflow pipeline. */
2691 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50,
2692 "ip", "inport = \"\"; ct_dnat;");
2693 }
2694
94300e09 2695 /* Logical router ingress table 4: IP Routing.
9975d7be
BP
2696 *
2697 * A packet that arrives at this table is an IP packet that should be
0bac7164
BP
2698 * routed to the address in ip4.dst. This table sets outport to the correct
2699 * output port, eth.src to the output port's MAC address, and reg0 to the
2700 * next-hop IP address (leaving ip4.dst, the packet’s final destination,
2701 * unchanged), and advances to the next table for ARP resolution. */
9975d7be 2702 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 2703 if (!op->nbrp) {
9975d7be
BP
2704 continue;
2705 }
2706
4685e523
JP
2707 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
2708 add_route(lflows, op, op->lrp_networks.ipv4_addrs[i].addr_s,
2709 op->lrp_networks.ipv4_addrs[i].network_s,
2710 op->lrp_networks.ipv4_addrs[i].plen, NULL);
2711 }
9975d7be 2712 }
4685e523 2713
9975d7be
BP
2714 HMAP_FOR_EACH (od, key_node, datapaths) {
2715 if (!od->nbr) {
2716 continue;
2717 }
2718
28dc3fe9
SR
2719 /* Convert the static routes to flows. */
2720 for (int i = 0; i < od->nbr->n_static_routes; i++) {
2721 const struct nbrec_logical_router_static_route *route;
2722
2723 route = od->nbr->static_routes[i];
2724 build_static_route_flow(lflows, od, ports, route);
2725 }
9975d7be
BP
2726 }
2727 /* XXX destination unreachable */
2728
94300e09 2729 /* Local router ingress table 5: ARP Resolution.
9975d7be
BP
2730 *
2731 * Any packet that reaches this table is an IP packet whose next-hop IP
2732 * address is in reg0. (ip4.dst is the final destination.) This table
2733 * resolves the IP address in reg0 into an output port in outport and an
2734 * Ethernet address in eth.dst. */
2735 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 2736 if (op->nbrp) {
509afdc3
GS
2737 /* This is a logical router port. If next-hop IP address in 'reg0'
2738 * matches ip address of this router port, then the packet is
2739 * intended to eventually be sent to this logical port. Set the
2740 * destination mac address using this port's mac address.
2741 *
2742 * The packet is still in peer's logical pipeline. So the match
2743 * should be on peer's outport. */
cd150899 2744 if (op->peer && op->peer->nbrp) {
09b39248 2745 ds_clear(&match);
4685e523 2746 ds_put_format(&match, "outport == %s && reg0 == ",
cd150899 2747 op->peer->json_key);
4685e523
JP
2748 op_put_networks(&match, op, false);
2749
09b39248 2750 ds_clear(&actions);
4685e523
JP
2751 ds_put_format(&actions, "eth.dst = %s; next;",
2752 op->lrp_networks.ea_s);
cd150899 2753 ovn_lflow_add(lflows, op->peer->od, S_ROUTER_IN_ARP_RESOLVE,
09b39248 2754 100, ds_cstr(&match), ds_cstr(&actions));
509afdc3 2755 }
0ee00741 2756 } else if (op->od->n_router_ports && strcmp(op->nbsp->type, "router")) {
75cf9d2b
GS
2757 /* This is a logical switch port that backs a VM or a container.
2758 * Extract its addresses. For each of the address, go through all
2759 * the router ports attached to the switch (to which this port
2760 * connects) and if the address in question is reachable from the
2761 * router port, add an ARP entry in that router's pipeline. */
2762
e93b43d6 2763 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
4685e523 2764 const char *ea_s = op->lsp_addrs[i].ea_s;
e93b43d6 2765 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
4685e523 2766 const char *ip_s = op->lsp_addrs[i].ipv4_addrs[j].addr_s;
e93b43d6 2767 for (size_t k = 0; k < op->od->n_router_ports; k++) {
80f408f4
JP
2768 /* Get the Logical_Router_Port that the
2769 * Logical_Switch_Port is connected to, as
2770 * 'peer'. */
86e98048 2771 const char *peer_name = smap_get(
0ee00741 2772 &op->od->router_ports[k]->nbsp->options,
86e98048
BP
2773 "router-port");
2774 if (!peer_name) {
2775 continue;
2776 }
2777
e93b43d6 2778 struct ovn_port *peer = ovn_port_find(ports, peer_name);
0ee00741 2779 if (!peer || !peer->nbrp) {
86e98048
BP
2780 continue;
2781 }
2782
4685e523 2783 if (!find_lrp_member_ip(peer, ip_s)) {
86e98048
BP
2784 continue;
2785 }
2786
09b39248 2787 ds_clear(&match);
e93b43d6 2788 ds_put_format(&match, "outport == %s && reg0 == %s",
4685e523
JP
2789 peer->json_key, ip_s);
2790
09b39248 2791 ds_clear(&actions);
4685e523 2792 ds_put_format(&actions, "eth.dst = %s; next;", ea_s);
86e98048 2793 ovn_lflow_add(lflows, peer->od,
09b39248
JP
2794 S_ROUTER_IN_ARP_RESOLVE, 100,
2795 ds_cstr(&match), ds_cstr(&actions));
86e98048 2796 }
9975d7be
BP
2797 }
2798 }
0ee00741 2799 } else if (!strcmp(op->nbsp->type, "router")) {
75cf9d2b
GS
2800 /* This is a logical switch port that connects to a router. */
2801
2802 /* The peer of this switch port is the router port for which
2803 * we need to add logical flows such that it can resolve
2804 * ARP entries for all the other router ports connected to
2805 * the switch in question. */
2806
0ee00741 2807 const char *peer_name = smap_get(&op->nbsp->options,
75cf9d2b
GS
2808 "router-port");
2809 if (!peer_name) {
2810 continue;
2811 }
2812
2813 struct ovn_port *peer = ovn_port_find(ports, peer_name);
0ee00741 2814 if (!peer || !peer->nbrp) {
75cf9d2b
GS
2815 continue;
2816 }
2817
4685e523 2818 for (size_t i = 0; i < op->od->n_router_ports; i++) {
75cf9d2b 2819 const char *router_port_name = smap_get(
0ee00741 2820 &op->od->router_ports[i]->nbsp->options,
75cf9d2b
GS
2821 "router-port");
2822 struct ovn_port *router_port = ovn_port_find(ports,
2823 router_port_name);
0ee00741 2824 if (!router_port || !router_port->nbrp) {
75cf9d2b
GS
2825 continue;
2826 }
2827
2828 /* Skip the router port under consideration. */
2829 if (router_port == peer) {
2830 continue;
2831 }
2832
09b39248 2833 ds_clear(&match);
4685e523
JP
2834 ds_put_format(&match, "outport == %s && reg0 == ",
2835 peer->json_key);
2836 op_put_networks(&match, router_port, false);
2837
09b39248 2838 ds_clear(&actions);
4685e523
JP
2839 ds_put_format(&actions, "eth.dst = %s; next;",
2840 router_port->lrp_networks.ea_s);
75cf9d2b 2841 ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE,
09b39248 2842 100, ds_cstr(&match), ds_cstr(&actions));
75cf9d2b 2843 }
9975d7be
BP
2844 }
2845 }
75cf9d2b 2846
0bac7164
BP
2847 HMAP_FOR_EACH (od, key_node, datapaths) {
2848 if (!od->nbr) {
2849 continue;
2850 }
2851
2852 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "1",
2853 "get_arp(outport, reg0); next;");
2854 }
2855
94300e09 2856 /* Local router ingress table 6: ARP request.
0bac7164
BP
2857 *
2858 * In the common case where the Ethernet destination has been resolved,
94300e09
JP
2859 * this table outputs the packet (priority 0). Otherwise, it composes
2860 * and sends an ARP request (priority 100). */
0bac7164
BP
2861 HMAP_FOR_EACH (od, key_node, datapaths) {
2862 if (!od->nbr) {
2863 continue;
2864 }
2865
2866 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100,
2867 "eth.dst == 00:00:00:00:00:00",
2868 "arp { "
2869 "eth.dst = ff:ff:ff:ff:ff:ff; "
2870 "arp.spa = reg1; "
2871 "arp.op = 1; " /* ARP request */
2872 "output; "
2873 "};");
2874 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1", "output;");
2875 }
9975d7be 2876
de297547 2877 /* Logical router egress table 1: Delivery (priority 100).
9975d7be
BP
2878 *
2879 * Priority 100 rules deliver packets to enabled logical ports. */
2880 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 2881 if (!op->nbrp) {
9975d7be
BP
2882 continue;
2883 }
2884
0ee00741 2885 if (!lrport_is_enabled(op->nbrp)) {
9975d7be
BP
2886 /* Drop packets to disabled logical ports (since logical flow
2887 * tables are default-drop). */
2888 continue;
2889 }
2890
09b39248
JP
2891 ds_clear(&match);
2892 ds_put_format(&match, "outport == %s", op->json_key);
9975d7be 2893 ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100,
09b39248 2894 ds_cstr(&match), "output;");
9975d7be 2895 }
09b39248
JP
2896
2897 ds_destroy(&match);
2898 ds_destroy(&actions);
9975d7be
BP
2899}
2900
2901/* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
2902 * constructing their contents based on the OVN_NB database. */
2903static void
2904build_lflows(struct northd_context *ctx, struct hmap *datapaths,
2905 struct hmap *ports)
2906{
2907 struct hmap lflows = HMAP_INITIALIZER(&lflows);
2908 struct hmap mcgroups = HMAP_INITIALIZER(&mcgroups);
2909
2910 build_lswitch_flows(datapaths, ports, &lflows, &mcgroups);
2911 build_lrouter_flows(datapaths, ports, &lflows);
2912
5868eb24
BP
2913 /* Push changes to the Logical_Flow table to database. */
2914 const struct sbrec_logical_flow *sbflow, *next_sbflow;
2915 SBREC_LOGICAL_FLOW_FOR_EACH_SAFE (sbflow, next_sbflow, ctx->ovnsb_idl) {
2916 struct ovn_datapath *od
2917 = ovn_datapath_from_sbrec(datapaths, sbflow->logical_datapath);
2918 if (!od) {
2919 sbrec_logical_flow_delete(sbflow);
2920 continue;
eb00399e 2921 }
eb00399e 2922
9975d7be 2923 enum ovn_datapath_type dp_type = od->nbs ? DP_SWITCH : DP_ROUTER;
880fcd14
BP
2924 enum ovn_pipeline pipeline
2925 = !strcmp(sbflow->pipeline, "ingress") ? P_IN : P_OUT;
5868eb24 2926 struct ovn_lflow *lflow = ovn_lflow_find(
880fcd14
BP
2927 &lflows, od, ovn_stage_build(dp_type, pipeline, sbflow->table_id),
2928 sbflow->priority, sbflow->match, sbflow->actions);
5868eb24
BP
2929 if (lflow) {
2930 ovn_lflow_destroy(&lflows, lflow);
2931 } else {
2932 sbrec_logical_flow_delete(sbflow);
4edcdcf4
RB
2933 }
2934 }
5868eb24
BP
2935 struct ovn_lflow *lflow, *next_lflow;
2936 HMAP_FOR_EACH_SAFE (lflow, next_lflow, hmap_node, &lflows) {
880fcd14
BP
2937 enum ovn_pipeline pipeline = ovn_stage_get_pipeline(lflow->stage);
2938 uint8_t table = ovn_stage_get_table(lflow->stage);
2939
5868eb24
BP
2940 sbflow = sbrec_logical_flow_insert(ctx->ovnsb_txn);
2941 sbrec_logical_flow_set_logical_datapath(sbflow, lflow->od->sb);
9975d7be
BP
2942 sbrec_logical_flow_set_pipeline(
2943 sbflow, pipeline == P_IN ? "ingress" : "egress");
880fcd14 2944 sbrec_logical_flow_set_table_id(sbflow, table);
5868eb24
BP
2945 sbrec_logical_flow_set_priority(sbflow, lflow->priority);
2946 sbrec_logical_flow_set_match(sbflow, lflow->match);
2947 sbrec_logical_flow_set_actions(sbflow, lflow->actions);
091e3af9 2948
880fcd14
BP
2949 const struct smap ids = SMAP_CONST1(&ids, "stage-name",
2950 ovn_stage_to_str(lflow->stage));
aaf881c6 2951 sbrec_logical_flow_set_external_ids(sbflow, &ids);
091e3af9 2952
5868eb24 2953 ovn_lflow_destroy(&lflows, lflow);
eb00399e 2954 }
5868eb24
BP
2955 hmap_destroy(&lflows);
2956
2957 /* Push changes to the Multicast_Group table to database. */
2958 const struct sbrec_multicast_group *sbmc, *next_sbmc;
2959 SBREC_MULTICAST_GROUP_FOR_EACH_SAFE (sbmc, next_sbmc, ctx->ovnsb_idl) {
2960 struct ovn_datapath *od = ovn_datapath_from_sbrec(datapaths,
2961 sbmc->datapath);
2962 if (!od) {
2963 sbrec_multicast_group_delete(sbmc);
2964 continue;
2965 }
eb00399e 2966
5868eb24
BP
2967 struct multicast_group group = { .name = sbmc->name,
2968 .key = sbmc->tunnel_key };
2969 struct ovn_multicast *mc = ovn_multicast_find(&mcgroups, od, &group);
2970 if (mc) {
2971 ovn_multicast_update_sbrec(mc, sbmc);
2972 ovn_multicast_destroy(&mcgroups, mc);
2973 } else {
2974 sbrec_multicast_group_delete(sbmc);
2975 }
2976 }
2977 struct ovn_multicast *mc, *next_mc;
2978 HMAP_FOR_EACH_SAFE (mc, next_mc, hmap_node, &mcgroups) {
2979 sbmc = sbrec_multicast_group_insert(ctx->ovnsb_txn);
2980 sbrec_multicast_group_set_datapath(sbmc, mc->datapath->sb);
2981 sbrec_multicast_group_set_name(sbmc, mc->group->name);
2982 sbrec_multicast_group_set_tunnel_key(sbmc, mc->group->key);
2983 ovn_multicast_update_sbrec(mc, sbmc);
2984 ovn_multicast_destroy(&mcgroups, mc);
4edcdcf4 2985 }
5868eb24 2986 hmap_destroy(&mcgroups);
4edcdcf4 2987}
ea382567
RB
2988
2989/* OVN_Northbound and OVN_Southbound have an identical Address_Set table.
2990 * We always update OVN_Southbound to match the current data in
2991 * OVN_Northbound, so that the address sets used in Logical_Flows in
2992 * OVN_Southbound is checked against the proper set.*/
2993static void
2994sync_address_sets(struct northd_context *ctx)
2995{
2996 struct shash sb_address_sets = SHASH_INITIALIZER(&sb_address_sets);
2997
2998 const struct sbrec_address_set *sb_address_set;
2999 SBREC_ADDRESS_SET_FOR_EACH (sb_address_set, ctx->ovnsb_idl) {
3000 shash_add(&sb_address_sets, sb_address_set->name, sb_address_set);
3001 }
3002
3003 const struct nbrec_address_set *nb_address_set;
3004 NBREC_ADDRESS_SET_FOR_EACH (nb_address_set, ctx->ovnnb_idl) {
3005 sb_address_set = shash_find_and_delete(&sb_address_sets,
3006 nb_address_set->name);
3007 if (!sb_address_set) {
3008 sb_address_set = sbrec_address_set_insert(ctx->ovnsb_txn);
3009 sbrec_address_set_set_name(sb_address_set, nb_address_set->name);
3010 }
3011
3012 sbrec_address_set_set_addresses(sb_address_set,
3013 /* "char **" is not compatible with "const char **" */
3014 (const char **) nb_address_set->addresses,
3015 nb_address_set->n_addresses);
3016 }
3017
3018 struct shash_node *node, *next;
3019 SHASH_FOR_EACH_SAFE (node, next, &sb_address_sets) {
3020 sbrec_address_set_delete(node->data);
3021 shash_delete(&sb_address_sets, node);
3022 }
3023 shash_destroy(&sb_address_sets);
3024}
5868eb24 3025\f
4edcdcf4 3026static void
331e7aef 3027ovnnb_db_run(struct northd_context *ctx)
4edcdcf4 3028{
331e7aef
NS
3029 if (!ctx->ovnsb_txn) {
3030 return;
3031 }
5868eb24
BP
3032 struct hmap datapaths, ports;
3033 build_datapaths(ctx, &datapaths);
3034 build_ports(ctx, &datapaths, &ports);
3035 build_lflows(ctx, &datapaths, &ports);
3036
ea382567
RB
3037 sync_address_sets(ctx);
3038
5868eb24
BP
3039 struct ovn_datapath *dp, *next_dp;
3040 HMAP_FOR_EACH_SAFE (dp, next_dp, key_node, &datapaths) {
3041 ovn_datapath_destroy(&datapaths, dp);
3042 }
3043 hmap_destroy(&datapaths);
3044
3045 struct ovn_port *port, *next_port;
3046 HMAP_FOR_EACH_SAFE (port, next_port, key_node, &ports) {
3047 ovn_port_destroy(&ports, port);
3048 }
3049 hmap_destroy(&ports);
ac0630a2
RB
3050}
3051
f93818dd
RB
3052/*
3053 * The only change we get notified about is if the 'chassis' column of the
dcda6e0d
BP
3054 * 'Port_Binding' table changes. When this column is not empty, it means we
3055 * need to set the corresponding logical port as 'up' in the northbound DB.
f93818dd 3056 */
ac0630a2 3057static void
331e7aef 3058ovnsb_db_run(struct northd_context *ctx)
ac0630a2 3059{
331e7aef
NS
3060 if (!ctx->ovnnb_txn) {
3061 return;
3062 }
fc3113bc 3063 struct hmap lports_hmap;
5868eb24 3064 const struct sbrec_port_binding *sb;
0ee00741 3065 const struct nbrec_logical_switch_port *nbsp;
fc3113bc
RB
3066
3067 struct lport_hash_node {
3068 struct hmap_node node;
0ee00741 3069 const struct nbrec_logical_switch_port *nbsp;
4ec3d7c7 3070 } *hash_node;
f93818dd 3071
fc3113bc 3072 hmap_init(&lports_hmap);
f93818dd 3073
0ee00741 3074 NBREC_LOGICAL_SWITCH_PORT_FOR_EACH(nbsp, ctx->ovnnb_idl) {
fc3113bc 3075 hash_node = xzalloc(sizeof *hash_node);
0ee00741
HK
3076 hash_node->nbsp = nbsp;
3077 hmap_insert(&lports_hmap, &hash_node->node, hash_string(nbsp->name, 0));
fc3113bc
RB
3078 }
3079
5868eb24 3080 SBREC_PORT_BINDING_FOR_EACH(sb, ctx->ovnsb_idl) {
0ee00741 3081 nbsp = NULL;
fc3113bc 3082 HMAP_FOR_EACH_WITH_HASH(hash_node, node,
5868eb24
BP
3083 hash_string(sb->logical_port, 0),
3084 &lports_hmap) {
0ee00741
HK
3085 if (!strcmp(sb->logical_port, hash_node->nbsp->name)) {
3086 nbsp = hash_node->nbsp;
fc3113bc
RB
3087 break;
3088 }
f93818dd
RB
3089 }
3090
0ee00741 3091 if (!nbsp) {
dcda6e0d 3092 /* The logical port doesn't exist for this port binding. This can
2e2762d4 3093 * happen under normal circumstances when ovn-northd hasn't gotten
dcda6e0d 3094 * around to pruning the Port_Binding yet. */
f93818dd
RB
3095 continue;
3096 }
3097
0ee00741 3098 if (sb->chassis && (!nbsp->up || !*nbsp->up)) {
f93818dd 3099 bool up = true;
0ee00741
HK
3100 nbrec_logical_switch_port_set_up(nbsp, &up, 1);
3101 } else if (!sb->chassis && (!nbsp->up || *nbsp->up)) {
f93818dd 3102 bool up = false;
0ee00741 3103 nbrec_logical_switch_port_set_up(nbsp, &up, 1);
f93818dd
RB
3104 }
3105 }
fc3113bc 3106
4ec3d7c7 3107 HMAP_FOR_EACH_POP(hash_node, node, &lports_hmap) {
fc3113bc
RB
3108 free(hash_node);
3109 }
3110 hmap_destroy(&lports_hmap);
ac0630a2
RB
3111}
3112\f
45f98d4c 3113
60bdd011 3114static char *default_nb_db_;
45f98d4c 3115
ac0630a2 3116static const char *
60bdd011 3117default_nb_db(void)
ac0630a2 3118{
60bdd011
RM
3119 if (!default_nb_db_) {
3120 default_nb_db_ = xasprintf("unix:%s/ovnnb_db.sock", ovs_rundir());
ac0630a2 3121 }
60bdd011
RM
3122 return default_nb_db_;
3123}
3124
3125static char *default_sb_db_;
3126
3127static const char *
3128default_sb_db(void)
3129{
3130 if (!default_sb_db_) {
3131 default_sb_db_ = xasprintf("unix:%s/ovnsb_db.sock", ovs_rundir());
3132 }
3133 return default_sb_db_;
ac0630a2
RB
3134}
3135
3136static void
3137parse_options(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
3138{
3139 enum {
67d9b930 3140 DAEMON_OPTION_ENUMS,
ac0630a2
RB
3141 VLOG_OPTION_ENUMS,
3142 };
3143 static const struct option long_options[] = {
ec78987f 3144 {"ovnsb-db", required_argument, NULL, 'd'},
ac0630a2
RB
3145 {"ovnnb-db", required_argument, NULL, 'D'},
3146 {"help", no_argument, NULL, 'h'},
3147 {"options", no_argument, NULL, 'o'},
3148 {"version", no_argument, NULL, 'V'},
67d9b930 3149 DAEMON_LONG_OPTIONS,
ac0630a2
RB
3150 VLOG_LONG_OPTIONS,
3151 STREAM_SSL_LONG_OPTIONS,
3152 {NULL, 0, NULL, 0},
3153 };
3154 char *short_options = ovs_cmdl_long_options_to_short_options(long_options);
3155
3156 for (;;) {
3157 int c;
3158
3159 c = getopt_long(argc, argv, short_options, long_options, NULL);
3160 if (c == -1) {
3161 break;
3162 }
3163
3164 switch (c) {
67d9b930 3165 DAEMON_OPTION_HANDLERS;
ac0630a2
RB
3166 VLOG_OPTION_HANDLERS;
3167 STREAM_SSL_OPTION_HANDLERS;
3168
3169 case 'd':
ec78987f 3170 ovnsb_db = optarg;
ac0630a2
RB
3171 break;
3172
3173 case 'D':
3174 ovnnb_db = optarg;
3175 break;
3176
3177 case 'h':
3178 usage();
3179 exit(EXIT_SUCCESS);
3180
3181 case 'o':
3182 ovs_cmdl_print_options(long_options);
3183 exit(EXIT_SUCCESS);
3184
3185 case 'V':
3186 ovs_print_version(0, 0);
3187 exit(EXIT_SUCCESS);
3188
3189 default:
3190 break;
3191 }
3192 }
3193
ec78987f 3194 if (!ovnsb_db) {
60bdd011 3195 ovnsb_db = default_sb_db();
ac0630a2
RB
3196 }
3197
3198 if (!ovnnb_db) {
60bdd011 3199 ovnnb_db = default_nb_db();
ac0630a2
RB
3200 }
3201
3202 free(short_options);
3203}
3204
5868eb24
BP
3205static void
3206add_column_noalert(struct ovsdb_idl *idl,
3207 const struct ovsdb_idl_column *column)
3208{
3209 ovsdb_idl_add_column(idl, column);
3210 ovsdb_idl_omit_alert(idl, column);
3211}
3212
ac0630a2
RB
3213int
3214main(int argc, char *argv[])
3215{
ac0630a2 3216 int res = EXIT_SUCCESS;
7b303ff9
AW
3217 struct unixctl_server *unixctl;
3218 int retval;
3219 bool exiting;
ac0630a2
RB
3220
3221 fatal_ignore_sigpipe();
3222 set_program_name(argv[0]);
485f0696 3223 service_start(&argc, &argv);
ac0630a2 3224 parse_options(argc, argv);
67d9b930 3225
e91b927d 3226 daemonize_start(false);
7b303ff9
AW
3227
3228 retval = unixctl_server_create(NULL, &unixctl);
3229 if (retval) {
3230 exit(EXIT_FAILURE);
3231 }
3232 unixctl_command_register("exit", "", 0, 0, ovn_northd_exit, &exiting);
3233
3234 daemonize_complete();
67d9b930 3235
ac0630a2 3236 nbrec_init();
ec78987f 3237 sbrec_init();
ac0630a2
RB
3238
3239 /* We want to detect all changes to the ovn-nb db. */
331e7aef
NS
3240 struct ovsdb_idl_loop ovnnb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
3241 ovsdb_idl_create(ovnnb_db, &nbrec_idl_class, true, true));
3242
3243 struct ovsdb_idl_loop ovnsb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
3244 ovsdb_idl_create(ovnsb_db, &sbrec_idl_class, false, true));
3245
3246 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_logical_flow);
3247 add_column_noalert(ovnsb_idl_loop.idl,
3248 &sbrec_logical_flow_col_logical_datapath);
3249 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_pipeline);
3250 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_table_id);
3251 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_priority);
3252 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_match);
3253 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_actions);
3254
3255 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_multicast_group);
3256 add_column_noalert(ovnsb_idl_loop.idl,
3257 &sbrec_multicast_group_col_datapath);
3258 add_column_noalert(ovnsb_idl_loop.idl,
3259 &sbrec_multicast_group_col_tunnel_key);
3260 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_name);
3261 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_ports);
3262
3263 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_datapath_binding);
3264 add_column_noalert(ovnsb_idl_loop.idl,
3265 &sbrec_datapath_binding_col_tunnel_key);
3266 add_column_noalert(ovnsb_idl_loop.idl,
3267 &sbrec_datapath_binding_col_external_ids);
3268
3269 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_port_binding);
3270 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_datapath);
3271 add_column_noalert(ovnsb_idl_loop.idl,
3272 &sbrec_port_binding_col_logical_port);
3273 add_column_noalert(ovnsb_idl_loop.idl,
3274 &sbrec_port_binding_col_tunnel_key);
3275 add_column_noalert(ovnsb_idl_loop.idl,
3276 &sbrec_port_binding_col_parent_port);
3277 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_tag);
3278 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_type);
3279 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_options);
3280 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_mac);
3281 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_port_binding_col_chassis);
3282
ea382567
RB
3283 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_address_set);
3284 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_name);
3285 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_addresses);
3286
331e7aef 3287 /* Main loop. */
7b303ff9
AW
3288 exiting = false;
3289 while (!exiting) {
331e7aef
NS
3290 struct northd_context ctx = {
3291 .ovnnb_idl = ovnnb_idl_loop.idl,
3292 .ovnnb_txn = ovsdb_idl_loop_run(&ovnnb_idl_loop),
3293 .ovnsb_idl = ovnsb_idl_loop.idl,
3294 .ovnsb_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop),
3295 };
ac0630a2 3296
8c0fae89
NS
3297 ovnnb_db_run(&ctx);
3298 ovnsb_db_run(&ctx);
f93818dd 3299
331e7aef
NS
3300 unixctl_server_run(unixctl);
3301 unixctl_server_wait(unixctl);
3302 if (exiting) {
3303 poll_immediate_wake();
ac0630a2 3304 }
331e7aef
NS
3305 ovsdb_idl_loop_commit_and_wait(&ovnnb_idl_loop);
3306 ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);
ac0630a2 3307
331e7aef 3308 poll_block();
485f0696
GS
3309 if (should_service_stop()) {
3310 exiting = true;
3311 }
ac0630a2
RB
3312 }
3313
7b303ff9 3314 unixctl_server_destroy(unixctl);
331e7aef
NS
3315 ovsdb_idl_loop_destroy(&ovnnb_idl_loop);
3316 ovsdb_idl_loop_destroy(&ovnsb_idl_loop);
485f0696 3317 service_stop();
ac0630a2 3318
60bdd011
RM
3319 free(default_nb_db_);
3320 free(default_sb_db_);
ac0630a2
RB
3321 exit(res);
3322}
7b303ff9
AW
3323
3324static void
3325ovn_northd_exit(struct unixctl_conn *conn, int argc OVS_UNUSED,
3326 const char *argv[] OVS_UNUSED, void *exiting_)
3327{
3328 bool *exiting = exiting_;
3329 *exiting = true;
3330
3331 unixctl_command_reply(conn, NULL);
3332}