]> git.proxmox.com Git - mirror_ovs.git/blame - ovn/northd/ovn-northd.c
tests: Add a tunnel packet-out test.
[mirror_ovs.git] / ovn / northd / ovn-northd.c
CommitLineData
ac0630a2
RB
1/*
2 * Licensed under the Apache License, Version 2.0 (the "License");
3 * you may not use this file except in compliance with the License.
4 * You may obtain a copy of the License at:
5 *
6 * http://www.apache.org/licenses/LICENSE-2.0
7 *
8 * Unless required by applicable law or agreed to in writing, software
9 * distributed under the License is distributed on an "AS IS" BASIS,
10 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 * See the License for the specific language governing permissions and
12 * limitations under the License.
13 */
14
15#include <config.h>
16
17#include <getopt.h>
18#include <stdlib.h>
19#include <stdio.h>
20
21#include "command-line.h"
67d9b930 22#include "daemon.h"
ac0630a2 23#include "dirs.h"
3e8a2ad1 24#include "openvswitch/dynamic-string.h"
ac0630a2 25#include "fatal-signal.h"
4edcdcf4
RB
26#include "hash.h"
27#include "hmap.h"
bd39395f
BP
28#include "json.h"
29#include "ovn/lib/lex.h"
e3df8838
BP
30#include "ovn/lib/ovn-nb-idl.h"
31#include "ovn/lib/ovn-sb-idl.h"
218351dd 32#include "ovn/lib/ovn-util.h"
064d7f84 33#include "packets.h"
ac0630a2 34#include "poll-loop.h"
5868eb24 35#include "smap.h"
ac0630a2
RB
36#include "stream.h"
37#include "stream-ssl.h"
7b303ff9 38#include "unixctl.h"
ac0630a2 39#include "util.h"
4edcdcf4 40#include "uuid.h"
ac0630a2
RB
41#include "openvswitch/vlog.h"
42
2e2762d4 43VLOG_DEFINE_THIS_MODULE(ovn_northd);
ac0630a2 44
7b303ff9
AW
45static unixctl_cb_func ovn_northd_exit;
46
2e2762d4 47struct northd_context {
f93818dd 48 struct ovsdb_idl *ovnnb_idl;
ec78987f 49 struct ovsdb_idl *ovnsb_idl;
f93818dd 50 struct ovsdb_idl_txn *ovnnb_txn;
3c78b3ca 51 struct ovsdb_idl_txn *ovnsb_txn;
f93818dd
RB
52};
53
ac0630a2 54static const char *ovnnb_db;
ec78987f 55static const char *ovnsb_db;
ac0630a2 56
60bdd011
RM
57static const char *default_nb_db(void);
58static const char *default_sb_db(void);
880fcd14
BP
59\f
60/* Pipeline stages. */
ac0630a2 61
880fcd14
BP
62/* The two pipelines in an OVN logical flow table. */
63enum ovn_pipeline {
64 P_IN, /* Ingress pipeline. */
65 P_OUT /* Egress pipeline. */
66};
091e3af9 67
880fcd14
BP
68/* The two purposes for which ovn-northd uses OVN logical datapaths. */
69enum ovn_datapath_type {
70 DP_SWITCH, /* OVN logical switch. */
71 DP_ROUTER /* OVN logical router. */
091e3af9
JP
72};
73
880fcd14
BP
74/* Returns an "enum ovn_stage" built from the arguments.
75 *
76 * (It's better to use ovn_stage_build() for type-safety reasons, but inline
77 * functions can't be used in enums or switch cases.) */
78#define OVN_STAGE_BUILD(DP_TYPE, PIPELINE, TABLE) \
79 (((DP_TYPE) << 9) | ((PIPELINE) << 8) | (TABLE))
80
81/* A stage within an OVN logical switch or router.
091e3af9 82 *
880fcd14
BP
83 * An "enum ovn_stage" indicates whether the stage is part of a logical switch
84 * or router, whether the stage is part of the ingress or egress pipeline, and
85 * the table within that pipeline. The first three components are combined to
685f4dfe 86 * form the stage's full name, e.g. S_SWITCH_IN_PORT_SEC_L2,
880fcd14
BP
87 * S_ROUTER_OUT_DELIVERY. */
88enum ovn_stage {
e0c9e58b
JP
89#define PIPELINE_STAGES \
90 /* Logical switch ingress stages. */ \
685f4dfe
NS
91 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_L2, 0, "ls_in_port_sec_l2") \
92 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_IP, 1, "ls_in_port_sec_ip") \
93 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_ND, 2, "ls_in_port_sec_nd") \
94 PIPELINE_STAGE(SWITCH, IN, PRE_ACL, 3, "ls_in_pre_acl") \
95 PIPELINE_STAGE(SWITCH, IN, ACL, 4, "ls_in_acl") \
96 PIPELINE_STAGE(SWITCH, IN, ARP_RSP, 5, "ls_in_arp_rsp") \
97 PIPELINE_STAGE(SWITCH, IN, L2_LKUP, 6, "ls_in_l2_lkup") \
e0c9e58b
JP
98 \
99 /* Logical switch egress stages. */ \
100 PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 0, "ls_out_pre_acl") \
101 PIPELINE_STAGE(SWITCH, OUT, ACL, 1, "ls_out_acl") \
685f4dfe
NS
102 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_IP, 2, "ls_out_port_sec_ip") \
103 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_L2, 3, "ls_out_port_sec_l2") \
e0c9e58b
JP
104 \
105 /* Logical router ingress stages. */ \
106 PIPELINE_STAGE(ROUTER, IN, ADMISSION, 0, "lr_in_admission") \
107 PIPELINE_STAGE(ROUTER, IN, IP_INPUT, 1, "lr_in_ip_input") \
108 PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 2, "lr_in_ip_routing") \
0bac7164
BP
109 PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 3, "lr_in_arp_resolve") \
110 PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 4, "lr_in_arp_request") \
e0c9e58b
JP
111 \
112 /* Logical router egress stages. */ \
113 PIPELINE_STAGE(ROUTER, OUT, DELIVERY, 0, "lr_out_delivery")
880fcd14
BP
114
115#define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
116 S_##DP_TYPE##_##PIPELINE##_##STAGE \
117 = OVN_STAGE_BUILD(DP_##DP_TYPE, P_##PIPELINE, TABLE),
118 PIPELINE_STAGES
119#undef PIPELINE_STAGE
091e3af9
JP
120};
121
6bb4a18e
JP
122/* Due to various hard-coded priorities need to implement ACLs, the
123 * northbound database supports a smaller range of ACL priorities than
124 * are available to logical flows. This value is added to an ACL
125 * priority to determine the ACL's logical flow priority. */
126#define OVN_ACL_PRI_OFFSET 1000
127
880fcd14
BP
128/* Returns an "enum ovn_stage" built from the arguments. */
129static enum ovn_stage
130ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline,
131 uint8_t table)
132{
133 return OVN_STAGE_BUILD(dp_type, pipeline, table);
134}
135
136/* Returns the pipeline to which 'stage' belongs. */
137static enum ovn_pipeline
138ovn_stage_get_pipeline(enum ovn_stage stage)
139{
140 return (stage >> 8) & 1;
141}
142
143/* Returns the table to which 'stage' belongs. */
144static uint8_t
145ovn_stage_get_table(enum ovn_stage stage)
146{
147 return stage & 0xff;
148}
149
150/* Returns a string name for 'stage'. */
151static const char *
152ovn_stage_to_str(enum ovn_stage stage)
153{
154 switch (stage) {
155#define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
156 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return NAME;
157 PIPELINE_STAGES
158#undef PIPELINE_STAGE
159 default: return "<unknown>";
160 }
161}
162\f
ac0630a2
RB
163static void
164usage(void)
165{
166 printf("\
167%s: OVN northbound management daemon\n\
168usage: %s [OPTIONS]\n\
169\n\
170Options:\n\
171 --ovnnb-db=DATABASE connect to ovn-nb database at DATABASE\n\
172 (default: %s)\n\
ec78987f 173 --ovnsb-db=DATABASE connect to ovn-sb database at DATABASE\n\
ac0630a2
RB
174 (default: %s)\n\
175 -h, --help display this help message\n\
176 -o, --options list available options\n\
177 -V, --version display version information\n\
60bdd011 178", program_name, program_name, default_nb_db(), default_sb_db());
67d9b930 179 daemon_usage();
ac0630a2
RB
180 vlog_usage();
181 stream_usage("database", true, true, false);
182}
183\f
5868eb24
BP
184struct tnlid_node {
185 struct hmap_node hmap_node;
186 uint32_t tnlid;
187};
188
189static void
190destroy_tnlids(struct hmap *tnlids)
4edcdcf4 191{
4ec3d7c7
DDP
192 struct tnlid_node *node;
193 HMAP_FOR_EACH_POP (node, hmap_node, tnlids) {
5868eb24
BP
194 free(node);
195 }
196 hmap_destroy(tnlids);
197}
198
199static void
200add_tnlid(struct hmap *set, uint32_t tnlid)
201{
202 struct tnlid_node *node = xmalloc(sizeof *node);
203 hmap_insert(set, &node->hmap_node, hash_int(tnlid, 0));
204 node->tnlid = tnlid;
4edcdcf4
RB
205}
206
4edcdcf4 207static bool
5868eb24 208tnlid_in_use(const struct hmap *set, uint32_t tnlid)
4edcdcf4 209{
5868eb24
BP
210 const struct tnlid_node *node;
211 HMAP_FOR_EACH_IN_BUCKET (node, hmap_node, hash_int(tnlid, 0), set) {
212 if (node->tnlid == tnlid) {
213 return true;
214 }
215 }
216 return false;
217}
4edcdcf4 218
5868eb24
BP
219static uint32_t
220allocate_tnlid(struct hmap *set, const char *name, uint32_t max,
221 uint32_t *hint)
222{
223 for (uint32_t tnlid = *hint + 1; tnlid != *hint;
224 tnlid = tnlid + 1 <= max ? tnlid + 1 : 1) {
225 if (!tnlid_in_use(set, tnlid)) {
226 add_tnlid(set, tnlid);
227 *hint = tnlid;
228 return tnlid;
229 }
4edcdcf4
RB
230 }
231
5868eb24
BP
232 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
233 VLOG_WARN_RL(&rl, "all %s tunnel ids exhausted", name);
234 return 0;
235}
236\f
9975d7be
BP
237/* The 'key' comes from nbs->header_.uuid or nbr->header_.uuid or
238 * sb->external_ids:logical-switch. */
5868eb24
BP
239struct ovn_datapath {
240 struct hmap_node key_node; /* Index on 'key'. */
9975d7be 241 struct uuid key; /* (nbs/nbr)->header_.uuid. */
4edcdcf4 242
9975d7be
BP
243 const struct nbrec_logical_switch *nbs; /* May be NULL. */
244 const struct nbrec_logical_router *nbr; /* May be NULL. */
5868eb24 245 const struct sbrec_datapath_binding *sb; /* May be NULL. */
4edcdcf4 246
5868eb24 247 struct ovs_list list; /* In list of similar records. */
4edcdcf4 248
9975d7be 249 /* Logical router data (digested from nbr). */
0bac7164 250 const struct ovn_port *gateway_port;
9975d7be
BP
251 ovs_be32 gateway;
252
253 /* Logical switch data. */
86e98048
BP
254 struct ovn_port **router_ports;
255 size_t n_router_ports;
9975d7be 256
5868eb24
BP
257 struct hmap port_tnlids;
258 uint32_t port_key_hint;
259
260 bool has_unknown;
261};
262
263static struct ovn_datapath *
264ovn_datapath_create(struct hmap *datapaths, const struct uuid *key,
9975d7be
BP
265 const struct nbrec_logical_switch *nbs,
266 const struct nbrec_logical_router *nbr,
5868eb24
BP
267 const struct sbrec_datapath_binding *sb)
268{
269 struct ovn_datapath *od = xzalloc(sizeof *od);
270 od->key = *key;
271 od->sb = sb;
9975d7be
BP
272 od->nbs = nbs;
273 od->nbr = nbr;
5868eb24
BP
274 hmap_init(&od->port_tnlids);
275 od->port_key_hint = 0;
276 hmap_insert(datapaths, &od->key_node, uuid_hash(&od->key));
277 return od;
278}
279
280static void
281ovn_datapath_destroy(struct hmap *datapaths, struct ovn_datapath *od)
282{
283 if (od) {
284 /* Don't remove od->list. It is used within build_datapaths() as a
285 * private list and once we've exited that function it is not safe to
286 * use it. */
287 hmap_remove(datapaths, &od->key_node);
288 destroy_tnlids(&od->port_tnlids);
86e98048 289 free(od->router_ports);
5868eb24
BP
290 free(od);
291 }
292}
293
294static struct ovn_datapath *
295ovn_datapath_find(struct hmap *datapaths, const struct uuid *uuid)
296{
297 struct ovn_datapath *od;
298
299 HMAP_FOR_EACH_WITH_HASH (od, key_node, uuid_hash(uuid), datapaths) {
300 if (uuid_equals(uuid, &od->key)) {
301 return od;
302 }
303 }
304 return NULL;
305}
306
307static struct ovn_datapath *
308ovn_datapath_from_sbrec(struct hmap *datapaths,
309 const struct sbrec_datapath_binding *sb)
310{
311 struct uuid key;
312
9975d7be
BP
313 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
314 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
5868eb24
BP
315 return NULL;
316 }
317 return ovn_datapath_find(datapaths, &key);
318}
319
5412db30
J
320static bool
321lrouter_is_enabled(const struct nbrec_logical_router *lrouter)
322{
323 return !lrouter->enabled || *lrouter->enabled;
324}
325
5868eb24
BP
326static void
327join_datapaths(struct northd_context *ctx, struct hmap *datapaths,
328 struct ovs_list *sb_only, struct ovs_list *nb_only,
329 struct ovs_list *both)
330{
331 hmap_init(datapaths);
417e7e66
BW
332 ovs_list_init(sb_only);
333 ovs_list_init(nb_only);
334 ovs_list_init(both);
5868eb24
BP
335
336 const struct sbrec_datapath_binding *sb, *sb_next;
337 SBREC_DATAPATH_BINDING_FOR_EACH_SAFE (sb, sb_next, ctx->ovnsb_idl) {
338 struct uuid key;
9975d7be
BP
339 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
340 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
341 ovsdb_idl_txn_add_comment(
342 ctx->ovnsb_txn,
343 "deleting Datapath_Binding "UUID_FMT" that lacks "
344 "external-ids:logical-switch and "
345 "external-ids:logical-router",
346 UUID_ARGS(&sb->header_.uuid));
5868eb24
BP
347 sbrec_datapath_binding_delete(sb);
348 continue;
349 }
350
351 if (ovn_datapath_find(datapaths, &key)) {
352 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
9975d7be
BP
353 VLOG_INFO_RL(
354 &rl, "deleting Datapath_Binding "UUID_FMT" with "
355 "duplicate external-ids:logical-switch/router "UUID_FMT,
356 UUID_ARGS(&sb->header_.uuid), UUID_ARGS(&key));
5868eb24
BP
357 sbrec_datapath_binding_delete(sb);
358 continue;
359 }
360
361 struct ovn_datapath *od = ovn_datapath_create(datapaths, &key,
9975d7be 362 NULL, NULL, sb);
417e7e66 363 ovs_list_push_back(sb_only, &od->list);
5868eb24
BP
364 }
365
9975d7be
BP
366 const struct nbrec_logical_switch *nbs;
367 NBREC_LOGICAL_SWITCH_FOR_EACH (nbs, ctx->ovnnb_idl) {
5868eb24 368 struct ovn_datapath *od = ovn_datapath_find(datapaths,
9975d7be 369 &nbs->header_.uuid);
5868eb24 370 if (od) {
9975d7be 371 od->nbs = nbs;
417e7e66
BW
372 ovs_list_remove(&od->list);
373 ovs_list_push_back(both, &od->list);
5868eb24 374 } else {
9975d7be
BP
375 od = ovn_datapath_create(datapaths, &nbs->header_.uuid,
376 nbs, NULL, NULL);
417e7e66 377 ovs_list_push_back(nb_only, &od->list);
5868eb24
BP
378 }
379 }
9975d7be
BP
380
381 const struct nbrec_logical_router *nbr;
382 NBREC_LOGICAL_ROUTER_FOR_EACH (nbr, ctx->ovnnb_idl) {
5412db30
J
383 if (!lrouter_is_enabled(nbr)) {
384 continue;
385 }
386
9975d7be
BP
387 struct ovn_datapath *od = ovn_datapath_find(datapaths,
388 &nbr->header_.uuid);
389 if (od) {
390 if (!od->nbs) {
391 od->nbr = nbr;
417e7e66
BW
392 ovs_list_remove(&od->list);
393 ovs_list_push_back(both, &od->list);
9975d7be
BP
394 } else {
395 /* Can't happen! */
396 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
397 VLOG_WARN_RL(&rl,
398 "duplicate UUID "UUID_FMT" in OVN_Northbound",
399 UUID_ARGS(&nbr->header_.uuid));
400 continue;
401 }
402 } else {
403 od = ovn_datapath_create(datapaths, &nbr->header_.uuid,
404 NULL, nbr, NULL);
417e7e66 405 ovs_list_push_back(nb_only, &od->list);
9975d7be
BP
406 }
407
408 od->gateway = 0;
409 if (nbr->default_gw) {
0bac7164
BP
410 ovs_be32 ip;
411 if (!ip_parse(nbr->default_gw, &ip) || !ip) {
412 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
9975d7be 413 VLOG_WARN_RL(&rl, "bad 'gateway' %s", nbr->default_gw);
9975d7be
BP
414 } else {
415 od->gateway = ip;
416 }
417 }
0bac7164
BP
418
419 /* Set the gateway port to NULL. If there is a gateway, it will get
420 * filled in as we go through the ports later. */
421 od->gateway_port = NULL;
9975d7be 422 }
5868eb24
BP
423}
424
425static uint32_t
426ovn_datapath_allocate_key(struct hmap *dp_tnlids)
427{
428 static uint32_t hint;
429 return allocate_tnlid(dp_tnlids, "datapath", (1u << 24) - 1, &hint);
430}
431
0bac7164
BP
432/* Updates the southbound Datapath_Binding table so that it contains the
433 * logical switches and routers specified by the northbound database.
434 *
435 * Initializes 'datapaths' to contain a "struct ovn_datapath" for every logical
436 * switch and router. */
5868eb24
BP
437static void
438build_datapaths(struct northd_context *ctx, struct hmap *datapaths)
439{
440 struct ovs_list sb_only, nb_only, both;
441
442 join_datapaths(ctx, datapaths, &sb_only, &nb_only, &both);
443
417e7e66 444 if (!ovs_list_is_empty(&nb_only)) {
5868eb24
BP
445 /* First index the in-use datapath tunnel IDs. */
446 struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
447 struct ovn_datapath *od;
448 LIST_FOR_EACH (od, list, &both) {
449 add_tnlid(&dp_tnlids, od->sb->tunnel_key);
450 }
451
452 /* Add southbound record for each unmatched northbound record. */
453 LIST_FOR_EACH (od, list, &nb_only) {
454 uint16_t tunnel_key = ovn_datapath_allocate_key(&dp_tnlids);
455 if (!tunnel_key) {
456 break;
457 }
458
459 od->sb = sbrec_datapath_binding_insert(ctx->ovnsb_txn);
460
5868eb24 461 char uuid_s[UUID_LEN + 1];
9975d7be
BP
462 sprintf(uuid_s, UUID_FMT, UUID_ARGS(&od->key));
463 const char *key = od->nbs ? "logical-switch" : "logical-router";
464 const struct smap id = SMAP_CONST1(&id, key, uuid_s);
aaf881c6 465 sbrec_datapath_binding_set_external_ids(od->sb, &id);
5868eb24
BP
466
467 sbrec_datapath_binding_set_tunnel_key(od->sb, tunnel_key);
468 }
469 destroy_tnlids(&dp_tnlids);
470 }
471
472 /* Delete southbound records without northbound matches. */
473 struct ovn_datapath *od, *next;
474 LIST_FOR_EACH_SAFE (od, next, list, &sb_only) {
417e7e66 475 ovs_list_remove(&od->list);
5868eb24
BP
476 sbrec_datapath_binding_delete(od->sb);
477 ovn_datapath_destroy(datapaths, od);
478 }
479}
480\f
481struct ovn_port {
482 struct hmap_node key_node; /* Index on 'key'. */
9975d7be
BP
483 char *key; /* nbs->name, nbr->name, sb->logical_port. */
484 char *json_key; /* 'key', quoted for use in JSON. */
5868eb24 485
9975d7be
BP
486 const struct nbrec_logical_port *nbs; /* May be NULL. */
487 const struct nbrec_logical_router_port *nbr; /* May be NULL. */
488 const struct sbrec_port_binding *sb; /* May be NULL. */
489
490 /* Logical router port data. */
491 ovs_be32 ip, mask; /* 192.168.10.123/24. */
492 ovs_be32 network; /* 192.168.10.0. */
493 ovs_be32 bcast; /* 192.168.10.255. */
494 struct eth_addr mac;
495 struct ovn_port *peer;
5868eb24
BP
496
497 struct ovn_datapath *od;
498
499 struct ovs_list list; /* In list of similar records. */
500};
501
502static struct ovn_port *
503ovn_port_create(struct hmap *ports, const char *key,
9975d7be
BP
504 const struct nbrec_logical_port *nbs,
505 const struct nbrec_logical_router_port *nbr,
5868eb24
BP
506 const struct sbrec_port_binding *sb)
507{
508 struct ovn_port *op = xzalloc(sizeof *op);
9975d7be
BP
509
510 struct ds json_key = DS_EMPTY_INITIALIZER;
511 json_string_escape(key, &json_key);
512 op->json_key = ds_steal_cstr(&json_key);
513
514 op->key = xstrdup(key);
5868eb24 515 op->sb = sb;
9975d7be
BP
516 op->nbs = nbs;
517 op->nbr = nbr;
5868eb24
BP
518 hmap_insert(ports, &op->key_node, hash_string(op->key, 0));
519 return op;
520}
521
522static void
523ovn_port_destroy(struct hmap *ports, struct ovn_port *port)
524{
525 if (port) {
526 /* Don't remove port->list. It is used within build_ports() as a
527 * private list and once we've exited that function it is not safe to
528 * use it. */
529 hmap_remove(ports, &port->key_node);
9975d7be
BP
530 free(port->json_key);
531 free(port->key);
5868eb24
BP
532 free(port);
533 }
534}
535
536static struct ovn_port *
537ovn_port_find(struct hmap *ports, const char *name)
538{
539 struct ovn_port *op;
540
541 HMAP_FOR_EACH_WITH_HASH (op, key_node, hash_string(name, 0), ports) {
542 if (!strcmp(op->key, name)) {
543 return op;
544 }
545 }
546 return NULL;
547}
548
549static uint32_t
550ovn_port_allocate_key(struct ovn_datapath *od)
551{
552 return allocate_tnlid(&od->port_tnlids, "port",
553 (1u << 15) - 1, &od->port_key_hint);
554}
555
556static void
557join_logical_ports(struct northd_context *ctx,
558 struct hmap *datapaths, struct hmap *ports,
559 struct ovs_list *sb_only, struct ovs_list *nb_only,
560 struct ovs_list *both)
561{
562 hmap_init(ports);
417e7e66
BW
563 ovs_list_init(sb_only);
564 ovs_list_init(nb_only);
565 ovs_list_init(both);
5868eb24
BP
566
567 const struct sbrec_port_binding *sb;
568 SBREC_PORT_BINDING_FOR_EACH (sb, ctx->ovnsb_idl) {
569 struct ovn_port *op = ovn_port_create(ports, sb->logical_port,
9975d7be 570 NULL, NULL, sb);
417e7e66 571 ovs_list_push_back(sb_only, &op->list);
5868eb24
BP
572 }
573
574 struct ovn_datapath *od;
575 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
576 if (od->nbs) {
577 for (size_t i = 0; i < od->nbs->n_ports; i++) {
578 const struct nbrec_logical_port *nbs = od->nbs->ports[i];
579 struct ovn_port *op = ovn_port_find(ports, nbs->name);
580 if (op) {
581 if (op->nbs || op->nbr) {
582 static struct vlog_rate_limit rl
583 = VLOG_RATE_LIMIT_INIT(5, 1);
584 VLOG_WARN_RL(&rl, "duplicate logical port %s",
585 nbs->name);
586 continue;
587 }
588 op->nbs = nbs;
417e7e66
BW
589 ovs_list_remove(&op->list);
590 ovs_list_push_back(both, &op->list);
9975d7be
BP
591 } else {
592 op = ovn_port_create(ports, nbs->name, nbs, NULL, NULL);
417e7e66 593 ovs_list_push_back(nb_only, &op->list);
9975d7be
BP
594 }
595
596 op->od = od;
597 }
598 } else {
599 for (size_t i = 0; i < od->nbr->n_ports; i++) {
600 const struct nbrec_logical_router_port *nbr
601 = od->nbr->ports[i];
602
603 struct eth_addr mac;
604 if (!eth_addr_from_string(nbr->mac, &mac)) {
605 static struct vlog_rate_limit rl
606 = VLOG_RATE_LIMIT_INIT(5, 1);
607 VLOG_WARN_RL(&rl, "bad 'mac' %s", nbr->mac);
608 continue;
609 }
610
611 ovs_be32 ip, mask;
612 char *error = ip_parse_masked(nbr->network, &ip, &mask);
613 if (error || mask == OVS_BE32_MAX || !ip_is_cidr(mask)) {
614 static struct vlog_rate_limit rl
615 = VLOG_RATE_LIMIT_INIT(5, 1);
616 VLOG_WARN_RL(&rl, "bad 'network' %s", nbr->network);
617 free(error);
618 continue;
619 }
620
00007447 621 struct ovn_port *op = ovn_port_find(ports, nbr->name);
9975d7be
BP
622 if (op) {
623 if (op->nbs || op->nbr) {
624 static struct vlog_rate_limit rl
625 = VLOG_RATE_LIMIT_INIT(5, 1);
626 VLOG_WARN_RL(&rl, "duplicate logical router port %s",
00007447 627 nbr->name);
9975d7be
BP
628 continue;
629 }
630 op->nbr = nbr;
417e7e66
BW
631 ovs_list_remove(&op->list);
632 ovs_list_push_back(both, &op->list);
9975d7be 633 } else {
00007447 634 op = ovn_port_create(ports, nbr->name, NULL, nbr, NULL);
417e7e66 635 ovs_list_push_back(nb_only, &op->list);
9975d7be
BP
636 }
637
638 op->ip = ip;
639 op->mask = mask;
640 op->network = ip & mask;
641 op->bcast = ip | ~mask;
642 op->mac = mac;
643
644 op->od = od;
0bac7164
BP
645
646 /* If 'od' has a gateway and 'op' routes to it... */
647 if (od->gateway && !((op->network ^ od->gateway) & op->mask)) {
648 /* ...and if 'op' is a longer match than the current
649 * choice... */
650 const struct ovn_port *gw = od->gateway_port;
651 int len = gw ? ip_count_cidr_bits(gw->mask) : 0;
652 if (ip_count_cidr_bits(op->mask) > len) {
653 /* ...then it's the default gateway port. */
654 od->gateway_port = op;
655 }
656 }
5868eb24 657 }
9975d7be
BP
658 }
659 }
660
661 /* Connect logical router ports, and logical switch ports of type "router",
662 * to their peers. */
663 struct ovn_port *op;
664 HMAP_FOR_EACH (op, key_node, ports) {
665 if (op->nbs && !strcmp(op->nbs->type, "router")) {
666 const char *peer_name = smap_get(&op->nbs->options, "router-port");
667 if (!peer_name) {
668 continue;
669 }
670
671 struct ovn_port *peer = ovn_port_find(ports, peer_name);
672 if (!peer || !peer->nbr) {
673 continue;
674 }
675
676 peer->peer = op;
677 op->peer = peer;
86e98048
BP
678 op->od->router_ports = xrealloc(
679 op->od->router_ports,
680 sizeof *op->od->router_ports * (op->od->n_router_ports + 1));
681 op->od->router_ports[op->od->n_router_ports++] = op;
9975d7be 682 } else if (op->nbr && op->nbr->peer) {
509afdc3 683 op->peer = ovn_port_find(ports, op->nbr->peer);
5868eb24
BP
684 }
685 }
686}
687
688static void
689ovn_port_update_sbrec(const struct ovn_port *op)
690{
691 sbrec_port_binding_set_datapath(op->sb, op->od->sb);
9975d7be
BP
692 if (op->nbr) {
693 sbrec_port_binding_set_type(op->sb, "patch");
694
695 const char *peer = op->peer ? op->peer->key : "<error>";
696 const struct smap ids = SMAP_CONST1(&ids, "peer", peer);
697 sbrec_port_binding_set_options(op->sb, &ids);
698
699 sbrec_port_binding_set_parent_port(op->sb, NULL);
700 sbrec_port_binding_set_tag(op->sb, NULL, 0);
701 sbrec_port_binding_set_mac(op->sb, NULL, 0);
702 } else {
703 if (strcmp(op->nbs->type, "router")) {
704 sbrec_port_binding_set_type(op->sb, op->nbs->type);
705 sbrec_port_binding_set_options(op->sb, &op->nbs->options);
706 } else {
707 sbrec_port_binding_set_type(op->sb, "patch");
708
709 const char *router_port = smap_get(&op->nbs->options,
710 "router-port");
711 if (!router_port) {
712 router_port = "<error>";
713 }
714 const struct smap ids = SMAP_CONST1(&ids, "peer", router_port);
715 sbrec_port_binding_set_options(op->sb, &ids);
716 }
717 sbrec_port_binding_set_parent_port(op->sb, op->nbs->parent_name);
718 sbrec_port_binding_set_tag(op->sb, op->nbs->tag, op->nbs->n_tag);
719 sbrec_port_binding_set_mac(op->sb, (const char **) op->nbs->addresses,
720 op->nbs->n_addresses);
721 }
5868eb24
BP
722}
723
0bac7164
BP
724/* Updates the southbound Port_Binding table so that it contains the logical
725 * ports specified by the northbound database.
726 *
727 * Initializes 'ports' to contain a "struct ovn_port" for every logical port,
728 * using the "struct ovn_datapath"s in 'datapaths' to look up logical
729 * datapaths. */
5868eb24
BP
730static void
731build_ports(struct northd_context *ctx, struct hmap *datapaths,
732 struct hmap *ports)
733{
734 struct ovs_list sb_only, nb_only, both;
735
736 join_logical_ports(ctx, datapaths, ports, &sb_only, &nb_only, &both);
737
738 /* For logical ports that are in both databases, update the southbound
739 * record based on northbound data. Also index the in-use tunnel_keys. */
740 struct ovn_port *op, *next;
741 LIST_FOR_EACH_SAFE (op, next, list, &both) {
742 ovn_port_update_sbrec(op);
743
744 add_tnlid(&op->od->port_tnlids, op->sb->tunnel_key);
745 if (op->sb->tunnel_key > op->od->port_key_hint) {
746 op->od->port_key_hint = op->sb->tunnel_key;
747 }
748 }
749
750 /* Add southbound record for each unmatched northbound record. */
751 LIST_FOR_EACH_SAFE (op, next, list, &nb_only) {
752 uint16_t tunnel_key = ovn_port_allocate_key(op->od);
753 if (!tunnel_key) {
754 continue;
755 }
756
757 op->sb = sbrec_port_binding_insert(ctx->ovnsb_txn);
758 ovn_port_update_sbrec(op);
759
760 sbrec_port_binding_set_logical_port(op->sb, op->key);
761 sbrec_port_binding_set_tunnel_key(op->sb, tunnel_key);
762 }
763
764 /* Delete southbound records without northbound matches. */
765 LIST_FOR_EACH_SAFE(op, next, list, &sb_only) {
417e7e66 766 ovs_list_remove(&op->list);
5868eb24
BP
767 sbrec_port_binding_delete(op->sb);
768 ovn_port_destroy(ports, op);
769 }
770}
771\f
772#define OVN_MIN_MULTICAST 32768
773#define OVN_MAX_MULTICAST 65535
774
775struct multicast_group {
776 const char *name;
777 uint16_t key; /* OVN_MIN_MULTICAST...OVN_MAX_MULTICAST. */
778};
779
780#define MC_FLOOD "_MC_flood"
781static const struct multicast_group mc_flood = { MC_FLOOD, 65535 };
782
783#define MC_UNKNOWN "_MC_unknown"
784static const struct multicast_group mc_unknown = { MC_UNKNOWN, 65534 };
785
786static bool
787multicast_group_equal(const struct multicast_group *a,
788 const struct multicast_group *b)
789{
790 return !strcmp(a->name, b->name) && a->key == b->key;
791}
792
793/* Multicast group entry. */
794struct ovn_multicast {
795 struct hmap_node hmap_node; /* Index on 'datapath' and 'key'. */
796 struct ovn_datapath *datapath;
797 const struct multicast_group *group;
798
799 struct ovn_port **ports;
800 size_t n_ports, allocated_ports;
801};
802
803static uint32_t
804ovn_multicast_hash(const struct ovn_datapath *datapath,
805 const struct multicast_group *group)
806{
807 return hash_pointer(datapath, group->key);
808}
809
810static struct ovn_multicast *
811ovn_multicast_find(struct hmap *mcgroups, struct ovn_datapath *datapath,
812 const struct multicast_group *group)
813{
814 struct ovn_multicast *mc;
815
816 HMAP_FOR_EACH_WITH_HASH (mc, hmap_node,
817 ovn_multicast_hash(datapath, group), mcgroups) {
818 if (mc->datapath == datapath
819 && multicast_group_equal(mc->group, group)) {
820 return mc;
4edcdcf4
RB
821 }
822 }
5868eb24
BP
823 return NULL;
824}
825
826static void
827ovn_multicast_add(struct hmap *mcgroups, const struct multicast_group *group,
828 struct ovn_port *port)
829{
830 struct ovn_datapath *od = port->od;
831 struct ovn_multicast *mc = ovn_multicast_find(mcgroups, od, group);
832 if (!mc) {
833 mc = xmalloc(sizeof *mc);
834 hmap_insert(mcgroups, &mc->hmap_node, ovn_multicast_hash(od, group));
835 mc->datapath = od;
836 mc->group = group;
837 mc->n_ports = 0;
838 mc->allocated_ports = 4;
839 mc->ports = xmalloc(mc->allocated_ports * sizeof *mc->ports);
840 }
841 if (mc->n_ports >= mc->allocated_ports) {
842 mc->ports = x2nrealloc(mc->ports, &mc->allocated_ports,
843 sizeof *mc->ports);
844 }
845 mc->ports[mc->n_ports++] = port;
846}
4edcdcf4 847
5868eb24
BP
848static void
849ovn_multicast_destroy(struct hmap *mcgroups, struct ovn_multicast *mc)
850{
851 if (mc) {
852 hmap_remove(mcgroups, &mc->hmap_node);
853 free(mc->ports);
854 free(mc);
855 }
856}
4edcdcf4 857
5868eb24
BP
858static void
859ovn_multicast_update_sbrec(const struct ovn_multicast *mc,
860 const struct sbrec_multicast_group *sb)
861{
862 struct sbrec_port_binding **ports = xmalloc(mc->n_ports * sizeof *ports);
863 for (size_t i = 0; i < mc->n_ports; i++) {
864 ports[i] = CONST_CAST(struct sbrec_port_binding *, mc->ports[i]->sb);
865 }
866 sbrec_multicast_group_set_ports(sb, ports, mc->n_ports);
867 free(ports);
4edcdcf4 868}
bd39395f 869\f
48605550 870/* Logical flow generation.
bd39395f 871 *
48605550 872 * This code generates the Logical_Flow table in the southbound database, as a
bd39395f
BP
873 * function of most of the northbound database.
874 */
875
5868eb24
BP
876struct ovn_lflow {
877 struct hmap_node hmap_node;
bd39395f 878
5868eb24 879 struct ovn_datapath *od;
880fcd14 880 enum ovn_stage stage;
5868eb24
BP
881 uint16_t priority;
882 char *match;
883 char *actions;
bd39395f
BP
884};
885
886static size_t
5868eb24 887ovn_lflow_hash(const struct ovn_lflow *lflow)
bd39395f 888{
5868eb24 889 size_t hash = uuid_hash(&lflow->od->key);
880fcd14 890 hash = hash_2words((lflow->stage << 16) | lflow->priority, hash);
5868eb24
BP
891 hash = hash_string(lflow->match, hash);
892 return hash_string(lflow->actions, hash);
bd39395f
BP
893}
894
5868eb24
BP
895static bool
896ovn_lflow_equal(const struct ovn_lflow *a, const struct ovn_lflow *b)
897{
898 return (a->od == b->od
880fcd14 899 && a->stage == b->stage
5868eb24
BP
900 && a->priority == b->priority
901 && !strcmp(a->match, b->match)
902 && !strcmp(a->actions, b->actions));
903}
904
905static void
906ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
880fcd14 907 enum ovn_stage stage, uint16_t priority,
5868eb24 908 char *match, char *actions)
bd39395f 909{
5868eb24 910 lflow->od = od;
880fcd14 911 lflow->stage = stage;
5868eb24
BP
912 lflow->priority = priority;
913 lflow->match = match;
914 lflow->actions = actions;
bd39395f
BP
915}
916
48605550 917/* Adds a row with the specified contents to the Logical_Flow table. */
bd39395f 918static void
5868eb24 919ovn_lflow_add(struct hmap *lflow_map, struct ovn_datapath *od,
880fcd14 920 enum ovn_stage stage, uint16_t priority,
5868eb24
BP
921 const char *match, const char *actions)
922{
923 struct ovn_lflow *lflow = xmalloc(sizeof *lflow);
880fcd14 924 ovn_lflow_init(lflow, od, stage, priority,
5868eb24
BP
925 xstrdup(match), xstrdup(actions));
926 hmap_insert(lflow_map, &lflow->hmap_node, ovn_lflow_hash(lflow));
927}
928
929static struct ovn_lflow *
930ovn_lflow_find(struct hmap *lflows, struct ovn_datapath *od,
880fcd14 931 enum ovn_stage stage, uint16_t priority,
5868eb24
BP
932 const char *match, const char *actions)
933{
934 struct ovn_lflow target;
880fcd14 935 ovn_lflow_init(&target, od, stage, priority,
5868eb24
BP
936 CONST_CAST(char *, match), CONST_CAST(char *, actions));
937
938 struct ovn_lflow *lflow;
939 HMAP_FOR_EACH_WITH_HASH (lflow, hmap_node, ovn_lflow_hash(&target),
940 lflows) {
941 if (ovn_lflow_equal(lflow, &target)) {
942 return lflow;
bd39395f
BP
943 }
944 }
5868eb24
BP
945 return NULL;
946}
bd39395f 947
5868eb24
BP
948static void
949ovn_lflow_destroy(struct hmap *lflows, struct ovn_lflow *lflow)
950{
951 if (lflow) {
952 hmap_remove(lflows, &lflow->hmap_node);
953 free(lflow->match);
954 free(lflow->actions);
955 free(lflow);
956 }
bd39395f
BP
957}
958
bd39395f
BP
959/* Appends port security constraints on L2 address field 'eth_addr_field'
960 * (e.g. "eth.src" or "eth.dst") to 'match'. 'port_security', with
961 * 'n_port_security' elements, is the collection of port_security constraints
f7cb14cd 962 * from an OVN_NB Logical_Port row. */
bd39395f 963static void
685f4dfe
NS
964build_port_security_l2(const char *eth_addr_field,
965 char **port_security, size_t n_port_security,
966 struct ds *match)
bd39395f
BP
967{
968 size_t base_len = match->length;
969 ds_put_format(match, " && %s == {", eth_addr_field);
970
971 size_t n = 0;
972 for (size_t i = 0; i < n_port_security; i++) {
74ff3298 973 struct eth_addr ea;
f7cb14cd 974
74ff3298 975 if (eth_addr_from_string(port_security[i], &ea)) {
f7cb14cd 976 ds_put_format(match, ETH_ADDR_FMT, ETH_ADDR_ARGS(ea));
bd39395f
BP
977 ds_put_char(match, ' ');
978 n++;
979 }
980 }
f7cb14cd 981 ds_chomp(match, ' ');
bd39395f 982 ds_put_cstr(match, "}");
4edcdcf4 983
bd39395f
BP
984 if (!n) {
985 match->length = base_len;
986 }
987}
988
685f4dfe
NS
989static void
990build_port_security_ipv6_nd_flow(
991 struct ds *match, struct eth_addr ea, struct ipv6_netaddr *ipv6_addrs,
992 int n_ipv6_addrs)
993{
994 ds_put_format(match, " && ip6 && nd && ((nd.sll == "ETH_ADDR_FMT" || "
995 "nd.sll == "ETH_ADDR_FMT") || ((nd.tll == "ETH_ADDR_FMT" || "
996 "nd.tll == "ETH_ADDR_FMT")", ETH_ADDR_ARGS(eth_addr_zero),
997 ETH_ADDR_ARGS(ea), ETH_ADDR_ARGS(eth_addr_zero),
998 ETH_ADDR_ARGS(ea));
999 if (!n_ipv6_addrs) {
1000 ds_put_cstr(match, "))");
1001 return;
1002 }
1003
1004 char ip6_str[INET6_ADDRSTRLEN + 1];
1005 struct in6_addr lla;
1006 in6_generate_lla(ea, &lla);
1007 memset(ip6_str, 0, sizeof(ip6_str));
1008 ipv6_string_mapped(ip6_str, &lla);
1009 ds_put_format(match, " && (nd.target == %s", ip6_str);
1010
1011 for(int i = 0; i < n_ipv6_addrs; i++) {
1012 memset(ip6_str, 0, sizeof(ip6_str));
1013 ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr);
1014 ds_put_format(match, " || nd.target == %s", ip6_str);
1015 }
1016
1017 ds_put_format(match, ")))");
1018}
1019
1020static void
1021build_port_security_ipv6_flow(
1022 enum ovn_pipeline pipeline, struct ds *match, struct eth_addr ea,
1023 struct ipv6_netaddr *ipv6_addrs, int n_ipv6_addrs)
1024{
1025 char ip6_str[INET6_ADDRSTRLEN + 1];
1026
1027 ds_put_format(match, " && %s == {",
1028 pipeline == P_IN ? "ip6.src" : "ip6.dst");
1029
1030 /* Allow link-local address. */
1031 struct in6_addr lla;
1032 in6_generate_lla(ea, &lla);
1033 ipv6_string_mapped(ip6_str, &lla);
1034 ds_put_format(match, "%s, ", ip6_str);
1035
1036 /* Allow ip6.src=:: and ip6.dst=ff00::/8 for ND packets */
1037 ds_put_cstr(match, pipeline == P_IN ? "::" : "ff00::/8");
1038 for(int i = 0; i < n_ipv6_addrs; i++) {
1039 ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr);
1040 ds_put_format(match, ", %s", ip6_str);
1041 }
1042 ds_put_cstr(match, "}");
1043}
1044
1045/**
1046 * Build port security constraints on ARP and IPv6 ND fields
1047 * and add logical flows to S_SWITCH_IN_PORT_SEC_ND stage.
1048 *
1049 * For each port security of the logical port, following
1050 * logical flows are added
1051 * - If the port security has no IP (both IPv4 and IPv6) or
1052 * if it has IPv4 address(es)
1053 * - Priority 90 flow to allow ARP packets for known MAC addresses
1054 * in the eth.src and arp.spa fields. If the port security
1055 * has IPv4 addresses, allow known IPv4 addresses in the arp.tpa field.
1056 *
1057 * - If the port security has no IP (both IPv4 and IPv6) or
1058 * if it has IPv6 address(es)
1059 * - Priority 90 flow to allow IPv6 ND packets for known MAC addresses
1060 * in the eth.src and nd.sll/nd.tll fields. If the port security
1061 * has IPv6 addresses, allow known IPv6 addresses in the nd.target field
1062 * for IPv6 Neighbor Advertisement packet.
1063 *
1064 * - Priority 80 flow to drop ARP and IPv6 ND packets.
1065 */
1066static void
1067build_port_security_nd(struct ovn_port *op, struct hmap *lflows)
1068{
1069 for (size_t i = 0; i < op->nbs->n_port_security; i++) {
1070 struct lport_addresses ps;
1071 if (!extract_lport_addresses(op->nbs->port_security[i], &ps, true)) {
1072 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1073 VLOG_INFO_RL(&rl, "invalid syntax '%s' in port security. No MAC"
1074 " address found", op->nbs->port_security[i]);
1075 continue;
1076 }
1077
1078 bool no_ip = !(ps.n_ipv4_addrs || ps.n_ipv6_addrs);
1079 struct ds match = DS_EMPTY_INITIALIZER;
1080
1081 if (ps.n_ipv4_addrs || no_ip) {
1082 ds_put_format(
1083 &match, "inport == %s && eth.src == "ETH_ADDR_FMT" && arp.sha == "
1084 ETH_ADDR_FMT, op->json_key, ETH_ADDR_ARGS(ps.ea),
1085 ETH_ADDR_ARGS(ps.ea));
1086
1087 if (ps.n_ipv4_addrs) {
1088 ds_put_cstr(&match, " && (");
1089 for (size_t i = 0; i < ps.n_ipv4_addrs; i++) {
7d9d86ad
NS
1090 ds_put_cstr(&match, "arp.spa == ");
1091 ovs_be32 mask = be32_prefix_mask(ps.ipv4_addrs[i].plen);
1092 /* When the netmask is applied, if the host portion is
1093 * non-zero, the host can only use the specified
1094 * address in the arp.spa. If zero, the host is allowed
1095 * to use any address in the subnet. */
1096 if (ps.ipv4_addrs[i].addr & ~mask) {
1097 ds_put_format(&match, IP_FMT,
1098 IP_ARGS(ps.ipv4_addrs[i].addr));
1099 } else {
1100 ip_format_masked(ps.ipv4_addrs[i].addr & mask, mask,
1101 &match);
1102 }
1103 ds_put_cstr(&match, " || ");
685f4dfe
NS
1104 }
1105 ds_chomp(&match, ' ');
1106 ds_chomp(&match, '|');
1107 ds_chomp(&match, '|');
1108 ds_put_cstr(&match, ")");
1109 }
1110 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90,
1111 ds_cstr(&match), "next;");
1112 ds_destroy(&match);
1113 }
1114
1115 if (ps.n_ipv6_addrs || no_ip) {
1116 ds_init(&match);
1117 ds_put_format(&match, "inport == %s && eth.src == "ETH_ADDR_FMT,
1118 op->json_key, ETH_ADDR_ARGS(ps.ea));
1119 build_port_security_ipv6_nd_flow(&match, ps.ea, ps.ipv6_addrs,
1120 ps.n_ipv6_addrs);
1121 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90,
1122 ds_cstr(&match), "next;");
1123 ds_destroy(&match);
1124 }
1125 free(ps.ipv4_addrs);
1126 free(ps.ipv6_addrs);
1127 }
1128
1129 char *match = xasprintf("inport == %s && (arp || nd)", op->json_key);
1130 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 80,
1131 match, "drop;");
1132 free(match);
1133}
1134
1135/**
1136 * Build port security constraints on IPv4 and IPv6 src and dst fields
1137 * and add logical flows to S_SWITCH_(IN/OUT)_PORT_SEC_IP stage.
1138 *
1139 * For each port security of the logical port, following
1140 * logical flows are added
1141 * - If the port security has IPv4 addresses,
1142 * - Priority 90 flow to allow IPv4 packets for known IPv4 addresses
1143 *
1144 * - If the port security has IPv6 addresses,
1145 * - Priority 90 flow to allow IPv6 packets for known IPv6 addresses
1146 *
1147 * - If the port security has IPv4 addresses or IPv6 addresses or both
1148 * - Priority 80 flow to drop all IPv4 and IPv6 traffic
1149 */
1150static void
1151build_port_security_ip(enum ovn_pipeline pipeline, struct ovn_port *op,
1152 struct hmap *lflows)
1153{
1154 char *port_direction;
1155 enum ovn_stage stage;
1156 if (pipeline == P_IN) {
1157 port_direction = "inport";
1158 stage = S_SWITCH_IN_PORT_SEC_IP;
1159 } else {
1160 port_direction = "outport";
1161 stage = S_SWITCH_OUT_PORT_SEC_IP;
1162 }
1163
1164 for (size_t i = 0; i < op->nbs->n_port_security; i++) {
1165 struct lport_addresses ps;
1166 if (!extract_lport_addresses(op->nbs->port_security[i], &ps, true)) {
1167 continue;
1168 }
1169
1170 if (!(ps.n_ipv4_addrs || ps.n_ipv6_addrs)) {
1171 continue;
1172 }
1173
1174 if (ps.n_ipv4_addrs) {
1175 struct ds match = DS_EMPTY_INITIALIZER;
1176 if (pipeline == P_IN) {
1177 ds_put_format(&match, "inport == %s && eth.src == "ETH_ADDR_FMT
1178 " && ip4.src == {0.0.0.0, ", op->json_key,
1179 ETH_ADDR_ARGS(ps.ea));
1180 } else {
1181 ds_put_format(&match, "outport == %s && eth.dst == "ETH_ADDR_FMT
1182 " && ip4.dst == {255.255.255.255, 224.0.0.0/4, ",
1183 op->json_key, ETH_ADDR_ARGS(ps.ea));
1184 }
1185
1186 for (int i = 0; i < ps.n_ipv4_addrs; i++) {
7d9d86ad
NS
1187 ovs_be32 mask = be32_prefix_mask(ps.ipv4_addrs[i].plen);
1188 /* When the netmask is applied, if the host portion is
1189 * non-zero, the host can only use the specified
1190 * address. If zero, the host is allowed to use any
1191 * address in the subnet.
1192 * */
1193 if (ps.ipv4_addrs[i].addr & ~mask) {
1194 ds_put_format(&match, IP_FMT,
1195 IP_ARGS(ps.ipv4_addrs[i].addr));
1196 if (pipeline == P_OUT && ps.ipv4_addrs[i].plen != 32) {
1197 /* Host is also allowed to receive packets to the
1198 * broadcast address in the specified subnet.
1199 */
1200 ds_put_format(&match, ", "IP_FMT,
1201 IP_ARGS(ps.ipv4_addrs[i].addr | ~mask));
1202 }
1203 } else {
1204 /* host portion is zero */
1205 ip_format_masked(ps.ipv4_addrs[i].addr & mask, mask,
1206 &match);
1207 }
1208 ds_put_cstr(&match, ", ");
685f4dfe
NS
1209 }
1210
1211 /* Replace ", " by "}". */
1212 ds_chomp(&match, ' ');
1213 ds_chomp(&match, ',');
1214 ds_put_cstr(&match, "}");
1215 ovn_lflow_add(lflows, op->od, stage, 90, ds_cstr(&match), "next;");
1216 ds_destroy(&match);
1217 free(ps.ipv4_addrs);
1218 }
1219
1220 if (ps.n_ipv6_addrs) {
1221 struct ds match = DS_EMPTY_INITIALIZER;
1222 ds_put_format(&match, "%s == %s && %s == "ETH_ADDR_FMT"",
1223 port_direction, op->json_key,
1224 pipeline == P_IN ? "eth.src" : "eth.dst",
1225 ETH_ADDR_ARGS(ps.ea));
1226 build_port_security_ipv6_flow(pipeline, &match, ps.ea,
1227 ps.ipv6_addrs, ps.n_ipv6_addrs);
1228 ovn_lflow_add(lflows, op->od, stage, 90,
1229 ds_cstr(&match), "next;");
1230 ds_destroy(&match);
1231 free(ps.ipv6_addrs);
1232 }
1233
1234 char *match = xasprintf(
1235 "%s == %s && %s == "ETH_ADDR_FMT" && ip", port_direction,
1236 op->json_key, pipeline == P_IN ? "eth.src" : "eth.dst",
1237 ETH_ADDR_ARGS(ps.ea));
1238 ovn_lflow_add(lflows, op->od, stage, 80, match, "drop;");
1239 free(match);
1240 }
1241}
1242
95a9a275
RB
1243static bool
1244lport_is_enabled(const struct nbrec_logical_port *lport)
1245{
1246 return !lport->enabled || *lport->enabled;
1247}
1248
4c7bf534
NS
1249static bool
1250lport_is_up(const struct nbrec_logical_port *lport)
1251{
1252 return !lport->up || *lport->up;
1253}
1254
78aab811
JP
1255static bool
1256has_stateful_acl(struct ovn_datapath *od)
1257{
9975d7be
BP
1258 for (size_t i = 0; i < od->nbs->n_acls; i++) {
1259 struct nbrec_acl *acl = od->nbs->acls[i];
78aab811
JP
1260 if (!strcmp(acl->action, "allow-related")) {
1261 return true;
1262 }
1263 }
1264
1265 return false;
1266}
1267
1268static void
48fcdb47 1269build_acls(struct ovn_datapath *od, struct hmap *lflows, struct hmap *ports)
78aab811
JP
1270{
1271 bool has_stateful = has_stateful_acl(od);
48fcdb47 1272 struct ovn_port *op;
78aab811
JP
1273
1274 /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are
1275 * allowed by default. */
880fcd14
BP
1276 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 0, "1", "next;");
1277 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 0, "1", "next;");
78aab811
JP
1278
1279 /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
1280 * default. A related rule at priority 1 is added below if there
1281 * are any stateful ACLs in this datapath. */
880fcd14
BP
1282 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 0, "1", "next;");
1283 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 0, "1", "next;");
78aab811
JP
1284
1285 /* If there are any stateful ACL rules in this dapapath, we must
1286 * send all IP packets through the conntrack action, which handles
1287 * defragmentation, in order to match L4 headers. */
1288 if (has_stateful) {
48fcdb47
WL
1289 HMAP_FOR_EACH (op, key_node, ports) {
1290 if (op->od == od && !strcmp(op->nbs->type, "router")) {
501f95e1
JP
1291 /* Can't use ct() for router ports. Consider the
1292 * following configuration: lp1(10.0.0.2) on
1293 * hostA--ls1--lr0--ls2--lp2(10.0.1.2) on hostB, For a
1294 * ping from lp1 to lp2, First, the response will go
1295 * through ct() with a zone for lp2 in the ls2 ingress
1296 * pipeline on hostB. That ct zone knows about this
1297 * connection. Next, it goes through ct() with the zone
1298 * for the router port in the egress pipeline of ls2 on
1299 * hostB. This zone does not know about the connection,
1300 * as the icmp request went through the logical router
1301 * on hostA, not hostB. This would only work with
1302 * distributed conntrack state across all chassis. */
1303 struct ds match_in = DS_EMPTY_INITIALIZER;
1304 struct ds match_out = DS_EMPTY_INITIALIZER;
1305
48fcdb47
WL
1306 ds_put_format(&match_in, "ip && inport == %s", op->json_key);
1307 ds_put_format(&match_out, "ip && outport == %s", op->json_key);
501f95e1
JP
1308 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110,
1309 ds_cstr(&match_in), "next;");
1310 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110,
1311 ds_cstr(&match_out), "next;");
48fcdb47
WL
1312
1313 ds_destroy(&match_in);
1314 ds_destroy(&match_out);
1315 }
1316 }
1317
78aab811
JP
1318 /* Ingress and Egress Pre-ACL Table (Priority 100).
1319 *
1320 * Regardless of whether the ACL is "from-lport" or "to-lport",
1321 * we need rules in both the ingress and egress table, because
1322 * the return traffic needs to be followed. */
880fcd14
BP
1323 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 100, "ip", "ct_next;");
1324 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip", "ct_next;");
78aab811
JP
1325
1326 /* Ingress and Egress ACL Table (Priority 1).
1327 *
1328 * By default, traffic is allowed. This is partially handled by
1329 * the Priority 0 ACL flows added earlier, but we also need to
1330 * commit IP flows. This is because, while the initiater's
1331 * direction may not have any stateful rules, the server's may
1332 * and then its return traffic would not have an associated
1333 * conntrack entry and would return "+invalid". */
880fcd14 1334 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 1, "ip",
78aab811 1335 "ct_commit; next;");
880fcd14 1336 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 1, "ip",
78aab811
JP
1337 "ct_commit; next;");
1338
1339 /* Ingress and Egress ACL Table (Priority 65535).
1340 *
1341 * Always drop traffic that's in an invalid state. This is
1342 * enforced at a higher priority than ACLs can be defined. */
880fcd14 1343 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
78aab811 1344 "ct.inv", "drop;");
880fcd14 1345 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
78aab811
JP
1346 "ct.inv", "drop;");
1347
1348 /* Ingress and Egress ACL Table (Priority 65535).
1349 *
1350 * Always allow traffic that is established to a committed
1351 * conntrack entry. This is enforced at a higher priority than
1352 * ACLs can be defined. */
880fcd14 1353 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
78aab811
JP
1354 "ct.est && !ct.rel && !ct.new && !ct.inv",
1355 "next;");
880fcd14 1356 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
78aab811
JP
1357 "ct.est && !ct.rel && !ct.new && !ct.inv",
1358 "next;");
1359
1360 /* Ingress and Egress ACL Table (Priority 65535).
1361 *
1362 * Always allow traffic that is related to an existing conntrack
1363 * entry. This is enforced at a higher priority than ACLs can
1364 * be defined.
1365 *
1366 * NOTE: This does not support related data sessions (eg,
1367 * a dynamically negotiated FTP data channel), but will allow
1368 * related traffic such as an ICMP Port Unreachable through
1369 * that's generated from a non-listening UDP port. */
880fcd14 1370 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
78aab811
JP
1371 "!ct.est && ct.rel && !ct.new && !ct.inv",
1372 "next;");
880fcd14 1373 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
78aab811
JP
1374 "!ct.est && ct.rel && !ct.new && !ct.inv",
1375 "next;");
1376 }
1377
1378 /* Ingress or Egress ACL Table (Various priorities). */
9975d7be
BP
1379 for (size_t i = 0; i < od->nbs->n_acls; i++) {
1380 struct nbrec_acl *acl = od->nbs->acls[i];
78aab811 1381 bool ingress = !strcmp(acl->direction, "from-lport") ? true :false;
880fcd14 1382 enum ovn_stage stage = ingress ? S_SWITCH_IN_ACL : S_SWITCH_OUT_ACL;
78aab811
JP
1383
1384 if (!strcmp(acl->action, "allow")) {
1385 /* If there are any stateful flows, we must even commit "allow"
1386 * actions. This is because, while the initiater's
1387 * direction may not have any stateful rules, the server's
1388 * may and then its return traffic would not have an
1389 * associated conntrack entry and would return "+invalid". */
1390 const char *actions = has_stateful ? "ct_commit; next;" : "next;";
6bb4a18e
JP
1391 ovn_lflow_add(lflows, od, stage,
1392 acl->priority + OVN_ACL_PRI_OFFSET,
78aab811
JP
1393 acl->match, actions);
1394 } else if (!strcmp(acl->action, "allow-related")) {
1395 struct ds match = DS_EMPTY_INITIALIZER;
1396
1397 /* Commit the connection tracking entry, which allows all
1398 * other traffic related to this entry to flow due to the
1399 * 65535 priority flow defined earlier. */
1400 ds_put_format(&match, "ct.new && (%s)", acl->match);
6bb4a18e
JP
1401 ovn_lflow_add(lflows, od, stage,
1402 acl->priority + OVN_ACL_PRI_OFFSET,
78aab811
JP
1403 ds_cstr(&match), "ct_commit; next;");
1404
1405 ds_destroy(&match);
1406 } else if (!strcmp(acl->action, "drop")) {
6bb4a18e
JP
1407 ovn_lflow_add(lflows, od, stage,
1408 acl->priority + OVN_ACL_PRI_OFFSET,
78aab811
JP
1409 acl->match, "drop;");
1410 } else if (!strcmp(acl->action, "reject")) {
1411 /* xxx Need to support "reject". */
1412 VLOG_INFO("reject is not a supported action");
6bb4a18e
JP
1413 ovn_lflow_add(lflows, od, stage,
1414 acl->priority + OVN_ACL_PRI_OFFSET,
78aab811
JP
1415 acl->match, "drop;");
1416 }
1417 }
1418}
1419
bd39395f 1420static void
9975d7be
BP
1421build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
1422 struct hmap *lflows, struct hmap *mcgroups)
bd39395f 1423{
5cff6b99
BP
1424 /* This flow table structure is documented in ovn-northd(8), so please
1425 * update ovn-northd.8.xml if you change anything. */
1426
9975d7be 1427 /* Build pre-ACL and ACL tables for both ingress and egress.
685f4dfe 1428 * Ingress tables 3 and 4. Egress tables 0 and 1. */
5868eb24
BP
1429 struct ovn_datapath *od;
1430 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
1431 if (!od->nbs) {
1432 continue;
1433 }
1434
48fcdb47 1435 build_acls(od, lflows, ports);
9975d7be
BP
1436 }
1437
1438 /* Logical switch ingress table 0: Admission control framework (priority
1439 * 100). */
1440 HMAP_FOR_EACH (od, key_node, datapaths) {
1441 if (!od->nbs) {
1442 continue;
1443 }
1444
bd39395f 1445 /* Logical VLANs not supported. */
685f4dfe 1446 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "vlan.present",
091e3af9 1447 "drop;");
bd39395f
BP
1448
1449 /* Broadcast/multicast source address is invalid. */
685f4dfe 1450 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "eth.src[40]",
091e3af9 1451 "drop;");
bd39395f 1452
35060cdc
BP
1453 /* Port security flows have priority 50 (see below) and will continue
1454 * to the next table if packet source is acceptable. */
bd39395f
BP
1455 }
1456
685f4dfe
NS
1457 /* Logical switch ingress table 0: Ingress port security - L2
1458 * (priority 50).
1459 * Ingress table 1: Ingress port security - IP (priority 90 and 80)
1460 * Ingress table 2: Ingress port security - ND (priority 90 and 80)
1461 */
5868eb24
BP
1462 struct ovn_port *op;
1463 HMAP_FOR_EACH (op, key_node, ports) {
9975d7be
BP
1464 if (!op->nbs) {
1465 continue;
1466 }
1467
1468 if (!lport_is_enabled(op->nbs)) {
96af668a
BP
1469 /* Drop packets from disabled logical ports (since logical flow
1470 * tables are default-drop). */
1471 continue;
1472 }
1473
5868eb24 1474 struct ds match = DS_EMPTY_INITIALIZER;
9975d7be 1475 ds_put_format(&match, "inport == %s", op->json_key);
685f4dfe
NS
1476 build_port_security_l2(
1477 "eth.src", op->nbs->port_security, op->nbs->n_port_security,
1478 &match);
1479 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_L2, 50,
96af668a 1480 ds_cstr(&match), "next;");
5868eb24 1481 ds_destroy(&match);
685f4dfe
NS
1482
1483 if (op->nbs->n_port_security) {
1484 build_port_security_ip(P_IN, op, lflows);
1485 build_port_security_nd(op, lflows);
1486 }
1487 }
1488
1489 /* Ingress table 1 and 2: Port security - IP and ND, by default goto next.
1490 * (priority 0)*/
1491 HMAP_FOR_EACH (od, key_node, datapaths) {
1492 if (!od->nbs) {
1493 continue;
1494 }
1495
1496 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_ND, 0, "1", "next;");
1497 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_IP, 0, "1", "next;");
5868eb24 1498 }
445a266a 1499
fa128126
HZ
1500 /* Ingress table 3: ARP responder, skip requests coming from localnet ports.
1501 * (priority 100). */
1502 HMAP_FOR_EACH (op, key_node, ports) {
1503 if (!op->nbs) {
1504 continue;
1505 }
1506
1507 if (!strcmp(op->nbs->type, "localnet")) {
1508 char *match = xasprintf("inport == %s", op->json_key);
1509 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_RSP, 100,
1510 match, "next;");
1511 free(match);
1512 }
1513 }
1514
685f4dfe 1515 /* Ingress table 5: ARP responder, reply for known IPs.
fa128126 1516 * (priority 50). */
57d143eb
HZ
1517 HMAP_FOR_EACH (op, key_node, ports) {
1518 if (!op->nbs) {
1519 continue;
1520 }
1521
4c7bf534
NS
1522 /*
1523 * Add ARP reply flows if either the
1524 * - port is up or
1525 * - port type is router
1526 */
1527 if (!lport_is_up(op->nbs) && strcmp(op->nbs->type, "router")) {
1528 continue;
1529 }
1530
57d143eb 1531 for (size_t i = 0; i < op->nbs->n_addresses; i++) {
7dc88496
NS
1532 struct lport_addresses laddrs;
1533 if (!extract_lport_addresses(op->nbs->addresses[i], &laddrs,
1534 false)) {
1535 continue;
1536 }
1537 for (size_t j = 0; j < laddrs.n_ipv4_addrs; j++) {
57d143eb 1538 char *match = xasprintf(
7dc88496
NS
1539 "arp.tpa == "IP_FMT" && arp.op == 1",
1540 IP_ARGS(laddrs.ipv4_addrs[j].addr));
57d143eb
HZ
1541 char *actions = xasprintf(
1542 "eth.dst = eth.src; "
1543 "eth.src = "ETH_ADDR_FMT"; "
1544 "arp.op = 2; /* ARP reply */ "
1545 "arp.tha = arp.sha; "
1546 "arp.sha = "ETH_ADDR_FMT"; "
1547 "arp.tpa = arp.spa; "
1548 "arp.spa = "IP_FMT"; "
1549 "outport = inport; "
1550 "inport = \"\"; /* Allow sending out inport. */ "
1551 "output;",
7dc88496
NS
1552 ETH_ADDR_ARGS(laddrs.ea),
1553 ETH_ADDR_ARGS(laddrs.ea),
1554 IP_ARGS(laddrs.ipv4_addrs[j].addr));
fa128126 1555 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_RSP, 50,
57d143eb
HZ
1556 match, actions);
1557 free(match);
1558 free(actions);
1559 }
7dc88496
NS
1560
1561 free(laddrs.ipv4_addrs);
57d143eb
HZ
1562 }
1563 }
1564
685f4dfe 1565 /* Ingress table 5: ARP responder, by default goto next.
fa128126
HZ
1566 * (priority 0)*/
1567 HMAP_FOR_EACH (od, key_node, datapaths) {
1568 if (!od->nbs) {
1569 continue;
1570 }
1571
1572 ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_RSP, 0, "1", "next;");
1573 }
1574
685f4dfe 1575 /* Ingress table 6: Destination lookup, broadcast and multicast handling
5868eb24
BP
1576 * (priority 100). */
1577 HMAP_FOR_EACH (op, key_node, ports) {
9975d7be
BP
1578 if (!op->nbs) {
1579 continue;
1580 }
1581
1582 if (lport_is_enabled(op->nbs)) {
1583 ovn_multicast_add(mcgroups, &mc_flood, op);
445a266a 1584 }
5868eb24
BP
1585 }
1586 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
1587 if (!od->nbs) {
1588 continue;
1589 }
1590
1591 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 100, "eth.mcast",
5868eb24 1592 "outport = \""MC_FLOOD"\"; output;");
bd39395f 1593 }
bd39395f 1594
685f4dfe 1595 /* Ingress table 6: Destination lookup, unicast handling (priority 50), */
5868eb24 1596 HMAP_FOR_EACH (op, key_node, ports) {
9975d7be
BP
1597 if (!op->nbs) {
1598 continue;
1599 }
1600
1601 for (size_t i = 0; i < op->nbs->n_addresses; i++) {
74ff3298 1602 struct eth_addr mac;
5868eb24 1603
9975d7be 1604 if (eth_addr_from_string(op->nbs->addresses[i], &mac)) {
5868eb24
BP
1605 struct ds match, actions;
1606
1607 ds_init(&match);
9975d7be
BP
1608 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
1609 ETH_ADDR_ARGS(mac));
5868eb24
BP
1610
1611 ds_init(&actions);
9975d7be
BP
1612 ds_put_format(&actions, "outport = %s; output;", op->json_key);
1613 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
5868eb24
BP
1614 ds_cstr(&match), ds_cstr(&actions));
1615 ds_destroy(&actions);
1616 ds_destroy(&match);
9975d7be
BP
1617 } else if (!strcmp(op->nbs->addresses[i], "unknown")) {
1618 if (lport_is_enabled(op->nbs)) {
1619 ovn_multicast_add(mcgroups, &mc_unknown, op);
96af668a
BP
1620 op->od->has_unknown = true;
1621 }
5868eb24
BP
1622 } else {
1623 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
445a266a 1624
2fa326a3
BP
1625 VLOG_INFO_RL(&rl,
1626 "%s: invalid syntax '%s' in addresses column",
9975d7be 1627 op->nbs->name, op->nbs->addresses[i]);
445a266a
BP
1628 }
1629 }
bd39395f
BP
1630 }
1631
685f4dfe 1632 /* Ingress table 6: Destination lookup for unknown MACs (priority 0). */
5868eb24 1633 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
1634 if (!od->nbs) {
1635 continue;
1636 }
1637
5868eb24 1638 if (od->has_unknown) {
9975d7be 1639 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1",
5868eb24 1640 "outport = \""MC_UNKNOWN"\"; output;");
445a266a 1641 }
bd39395f
BP
1642 }
1643
685f4dfe
NS
1644 /* Egress table 2: Egress port security - IP (priority 0)
1645 * port security L2 - multicast/broadcast (priority
5868eb24
BP
1646 * 100). */
1647 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
1648 if (!od->nbs) {
1649 continue;
1650 }
1651
685f4dfe
NS
1652 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_IP, 0, "1", "next;");
1653 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_L2, 100, "eth.mcast",
091e3af9 1654 "output;");
48f42f3a
RB
1655 }
1656
685f4dfe
NS
1657 /* Egress table 2: Egress port security - IP (priorities 90 and 80)
1658 * if port security enabled.
1659 *
1660 * Egress table 3: Egress port security - L2 (priorities 50 and 150).
d770a830
BP
1661 *
1662 * Priority 50 rules implement port security for enabled logical port.
1663 *
1664 * Priority 150 rules drop packets to disabled logical ports, so that they
1665 * don't even receive multicast or broadcast packets. */
5868eb24 1666 HMAP_FOR_EACH (op, key_node, ports) {
9975d7be
BP
1667 if (!op->nbs) {
1668 continue;
1669 }
1670
1671 struct ds match = DS_EMPTY_INITIALIZER;
1672 ds_put_format(&match, "outport == %s", op->json_key);
1673 if (lport_is_enabled(op->nbs)) {
685f4dfe
NS
1674 build_port_security_l2("eth.dst", op->nbs->port_security,
1675 op->nbs->n_port_security, &match);
1676 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 50,
d770a830
BP
1677 ds_cstr(&match), "output;");
1678 } else {
685f4dfe 1679 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 150,
d770a830
BP
1680 ds_cstr(&match), "drop;");
1681 }
eb00399e 1682
5868eb24 1683 ds_destroy(&match);
685f4dfe
NS
1684
1685 if (op->nbs->n_port_security) {
1686 build_port_security_ip(P_OUT, op, lflows);
1687 }
eb00399e 1688 }
9975d7be 1689}
eb00399e 1690
9975d7be
BP
1691static bool
1692lrport_is_enabled(const struct nbrec_logical_router_port *lrport)
1693{
1694 return !lrport->enabled || *lrport->enabled;
1695}
1696
1697static void
0bac7164 1698add_route(struct hmap *lflows, const struct ovn_port *op,
9975d7be
BP
1699 ovs_be32 network, ovs_be32 mask, ovs_be32 gateway)
1700{
1701 char *match = xasprintf("ip4.dst == "IP_FMT"/"IP_FMT,
1702 IP_ARGS(network), IP_ARGS(mask));
1703
1704 struct ds actions = DS_EMPTY_INITIALIZER;
47f3b59b 1705 ds_put_cstr(&actions, "ip.ttl--; reg0 = ");
9975d7be
BP
1706 if (gateway) {
1707 ds_put_format(&actions, IP_FMT, IP_ARGS(gateway));
1708 } else {
1709 ds_put_cstr(&actions, "ip4.dst");
1710 }
0bac7164
BP
1711 ds_put_format(&actions,
1712 "; "
1713 "reg1 = "IP_FMT"; "
1714 "eth.src = "ETH_ADDR_FMT"; "
1715 "outport = %s; "
1716 "next;",
1717 IP_ARGS(op->ip), ETH_ADDR_ARGS(op->mac), op->json_key);
9975d7be
BP
1718
1719 /* The priority here is calculated to implement longest-prefix-match
1720 * routing. */
0bac7164 1721 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_ROUTING,
9975d7be
BP
1722 count_1bits(ntohl(mask)), match, ds_cstr(&actions));
1723 ds_destroy(&actions);
1724 free(match);
1725}
1726
28dc3fe9
SR
1727static void
1728build_static_route_flow(struct hmap *lflows, struct ovn_datapath *od,
1729 struct hmap *ports,
1730 const struct nbrec_logical_router_static_route *route)
1731{
1732 ovs_be32 prefix, next_hop, mask;
1733
1734 /* Verify that next hop is an IP address with 32 bits mask. */
1735 char *error = ip_parse_masked(route->nexthop, &next_hop, &mask);
1736 if (error || mask != OVS_BE32_MAX) {
1737 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1738 VLOG_WARN_RL(&rl, "bad next hop ip address %s", route->nexthop);
1739 free(error);
1740 return;
1741 }
1742
1743 /* Verify that ip prefix is a valid CIDR address. */
1744 error = ip_parse_masked(route->ip_prefix, &prefix, &mask);
1745 if (error || !ip_is_cidr(mask)) {
1746 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1747 VLOG_WARN_RL(&rl, "bad 'network' in static routes %s",
1748 route->ip_prefix);
1749 free(error);
1750 return;
1751 }
1752
1753 /* Find the outgoing port. */
1754 struct ovn_port *out_port = NULL;
1755 if (route->output_port) {
1756 out_port = ovn_port_find(ports, route->output_port);
1757 if (!out_port) {
1758 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1759 VLOG_WARN_RL(&rl, "Bad out port %s for static route %s",
1760 route->output_port, route->ip_prefix);
1761 return;
1762 }
1763 } else {
1764 /* output_port is not specified, find the
1765 * router port matching the next hop. */
1766 int i;
1767 for (i = 0; i < od->nbr->n_ports; i++) {
1768 struct nbrec_logical_router_port *lrp = od->nbr->ports[i];
1769 out_port = ovn_port_find(ports, lrp->name);
1770 if (!out_port) {
1771 /* This should not happen. */
1772 continue;
1773 }
1774
1775 if (out_port->network
1776 && !((out_port->network ^ next_hop) & out_port->mask)) {
1777 /* There should be only 1 interface that matches the next hop.
1778 * Otherwise, it's a configuration error, because subnets of
1779 * router's interfaces should NOT overlap. */
1780 break;
1781 }
1782 }
1783 if (i == od->nbr->n_ports) {
1784 /* There is no matched out port. */
1785 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1786 VLOG_WARN_RL(&rl, "No path for static route %s; next hop %s",
1787 route->ip_prefix, route->nexthop);
1788 return;
1789 }
1790 }
1791
1792 add_route(lflows, out_port, prefix, mask, next_hop);
1793}
1794
9975d7be
BP
1795static void
1796build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
1797 struct hmap *lflows)
1798{
1799 /* This flow table structure is documented in ovn-northd(8), so please
1800 * update ovn-northd.8.xml if you change anything. */
1801
9975d7be
BP
1802 /* Logical router ingress table 0: Admission control framework. */
1803 struct ovn_datapath *od;
1804 HMAP_FOR_EACH (od, key_node, datapaths) {
1805 if (!od->nbr) {
1806 continue;
1807 }
1808
1809 /* Logical VLANs not supported.
1810 * Broadcast/multicast source address is invalid. */
1811 ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100,
1812 "vlan.present || eth.src[40]", "drop;");
1813 }
1814
1815 /* Logical router ingress table 0: match (priority 50). */
1816 struct ovn_port *op;
1817 HMAP_FOR_EACH (op, key_node, ports) {
1818 if (!op->nbr) {
1819 continue;
1820 }
1821
1822 if (!lrport_is_enabled(op->nbr)) {
1823 /* Drop packets from disabled logical ports (since logical flow
1824 * tables are default-drop). */
1825 continue;
1826 }
1827
1828 char *match = xasprintf(
1829 "(eth.mcast || eth.dst == "ETH_ADDR_FMT") && inport == %s",
1830 ETH_ADDR_ARGS(op->mac), op->json_key);
1831 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
1832 match, "next;");
e2229be9 1833 free(match);
9975d7be
BP
1834 }
1835
1836 /* Logical router ingress table 1: IP Input. */
78aab811 1837 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
1838 if (!od->nbr) {
1839 continue;
1840 }
1841
1842 /* L3 admission control: drop multicast and broadcast source, localhost
1843 * source or destination, and zero network source or destination
1844 * (priority 100). */
1845 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100,
1846 "ip4.mcast || "
1847 "ip4.src == 255.255.255.255 || "
1848 "ip4.src == 127.0.0.0/8 || "
1849 "ip4.dst == 127.0.0.0/8 || "
1850 "ip4.src == 0.0.0.0/8 || "
1851 "ip4.dst == 0.0.0.0/8",
1852 "drop;");
1853
0bac7164
BP
1854 /* ARP reply handling. Use ARP replies to populate the logical
1855 * router's ARP table. */
1856 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "arp.op == 2",
1857 "put_arp(inport, arp.spa, arp.sha);");
1858
9975d7be
BP
1859 /* Drop Ethernet local broadcast. By definition this traffic should
1860 * not be forwarded.*/
1861 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
1862 "eth.bcast", "drop;");
1863
1864 /* Drop IP multicast. */
1865 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
1866 "ip4.mcast", "drop;");
1867
1868 /* TTL discard.
1869 *
1870 * XXX Need to send ICMP time exceeded if !ip.later_frag. */
1871 char *match = xasprintf("ip4 && ip.ttl == {0, 1}");
1872 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30, match, "drop;");
1873 free(match);
1874
1875 /* Pass other traffic not already handled to the next table for
1876 * routing. */
1877 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;");
78aab811
JP
1878 }
1879
9975d7be
BP
1880 HMAP_FOR_EACH (op, key_node, ports) {
1881 if (!op->nbr) {
1882 continue;
1883 }
1884
1885 /* L3 admission control: drop packets that originate from an IP address
1886 * owned by the router or a broadcast address known to the router
1887 * (priority 100). */
1888 char *match = xasprintf("ip4.src == {"IP_FMT", "IP_FMT"}",
1889 IP_ARGS(op->ip), IP_ARGS(op->bcast));
1890 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
1891 match, "drop;");
1892 free(match);
1893
dd7652e6
JP
1894 /* ICMP echo reply. These flows reply to ICMP echo requests
1895 * received for the router's IP address. */
1896 match = xasprintf(
1897 "inport == %s && (ip4.dst == "IP_FMT" || ip4.dst == "IP_FMT") && "
1898 "icmp4.type == 8 && icmp4.code == 0",
1899 op->json_key, IP_ARGS(op->ip), IP_ARGS(op->bcast));
1900 char *actions = xasprintf(
1901 "ip4.dst = ip4.src; "
1902 "ip4.src = "IP_FMT"; "
1903 "ip.ttl = 255; "
1904 "icmp4.type = 0; "
1905 "inport = \"\"; /* Allow sending out inport. */ "
1906 "next; ",
1907 IP_ARGS(op->ip));
1908 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
1909 match, actions);
1910 free(match);
1911 free(actions);
1912
9975d7be
BP
1913 /* ARP reply. These flows reply to ARP requests for the router's own
1914 * IP address. */
1915 match = xasprintf(
1916 "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1",
1917 op->json_key, IP_ARGS(op->ip));
dd7652e6 1918 actions = xasprintf(
9975d7be
BP
1919 "eth.dst = eth.src; "
1920 "eth.src = "ETH_ADDR_FMT"; "
1921 "arp.op = 2; /* ARP reply */ "
1922 "arp.tha = arp.sha; "
1923 "arp.sha = "ETH_ADDR_FMT"; "
1924 "arp.tpa = arp.spa; "
1925 "arp.spa = "IP_FMT"; "
1926 "outport = %s; "
1927 "inport = \"\"; /* Allow sending out inport. */ "
1928 "output;",
1929 ETH_ADDR_ARGS(op->mac),
1930 ETH_ADDR_ARGS(op->mac),
1931 IP_ARGS(op->ip),
1932 op->json_key);
1933 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
1934 match, actions);
abcec848
JP
1935 free(match);
1936 free(actions);
9975d7be
BP
1937
1938 /* Drop IP traffic to this router. */
1939 match = xasprintf("ip4.dst == "IP_FMT, IP_ARGS(op->ip));
1940 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
1941 match, "drop;");
1942 free(match);
1943 }
1944
1945 /* Logical router ingress table 2: IP Routing.
1946 *
1947 * A packet that arrives at this table is an IP packet that should be
0bac7164
BP
1948 * routed to the address in ip4.dst. This table sets outport to the correct
1949 * output port, eth.src to the output port's MAC address, and reg0 to the
1950 * next-hop IP address (leaving ip4.dst, the packet’s final destination,
1951 * unchanged), and advances to the next table for ARP resolution. */
9975d7be
BP
1952 HMAP_FOR_EACH (op, key_node, ports) {
1953 if (!op->nbr) {
1954 continue;
1955 }
1956
0bac7164 1957 add_route(lflows, op, op->network, op->mask, 0);
9975d7be
BP
1958 }
1959 HMAP_FOR_EACH (od, key_node, datapaths) {
1960 if (!od->nbr) {
1961 continue;
1962 }
1963
28dc3fe9
SR
1964 /* Convert the static routes to flows. */
1965 for (int i = 0; i < od->nbr->n_static_routes; i++) {
1966 const struct nbrec_logical_router_static_route *route;
1967
1968 route = od->nbr->static_routes[i];
1969 build_static_route_flow(lflows, od, ports, route);
1970 }
1971
0bac7164
BP
1972 if (od->gateway && od->gateway_port) {
1973 add_route(lflows, od->gateway_port, 0, 0, od->gateway);
9975d7be
BP
1974 }
1975 }
1976 /* XXX destination unreachable */
1977
1978 /* Local router ingress table 3: ARP Resolution.
1979 *
1980 * Any packet that reaches this table is an IP packet whose next-hop IP
1981 * address is in reg0. (ip4.dst is the final destination.) This table
1982 * resolves the IP address in reg0 into an output port in outport and an
1983 * Ethernet address in eth.dst. */
1984 HMAP_FOR_EACH (op, key_node, ports) {
1985 if (op->nbr) {
509afdc3
GS
1986 /* This is a logical router port. If next-hop IP address in 'reg0'
1987 * matches ip address of this router port, then the packet is
1988 * intended to eventually be sent to this logical port. Set the
1989 * destination mac address using this port's mac address.
1990 *
1991 * The packet is still in peer's logical pipeline. So the match
1992 * should be on peer's outport. */
1993 if (op->nbr->peer) {
1994 struct ovn_port *peer = ovn_port_find(ports, op->nbr->peer);
1995 if (!peer) {
1996 continue;
1997 }
1998
1999 if (!peer->ip || !op->ip) {
2000 continue;
2001 }
2002 char *match = xasprintf("outport == %s && reg0 == "IP_FMT,
2003 peer->json_key, IP_ARGS(op->ip));
2004 char *actions = xasprintf("eth.dst = "ETH_ADDR_FMT"; "
2005 "next;", ETH_ADDR_ARGS(op->mac));
2006 ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE,
2007 100, match, actions);
2008 free(actions);
2009 free(match);
2010 }
86e98048 2011 } else if (op->od->n_router_ports) {
9975d7be 2012 for (size_t i = 0; i < op->nbs->n_addresses; i++) {
7dc88496
NS
2013 struct lport_addresses laddrs;
2014 if (!extract_lport_addresses(op->nbs->addresses[i], &laddrs,
2015 false)) {
2016 continue;
2017 }
9975d7be 2018
7dc88496
NS
2019 for (size_t k = 0; k < laddrs.n_ipv4_addrs; k++) {
2020 ovs_be32 ip = laddrs.ipv4_addrs[k].addr;
86e98048
BP
2021 for (size_t j = 0; j < op->od->n_router_ports; j++) {
2022 /* Get the Logical_Router_Port that the Logical_Port is
2023 * connected to, as 'peer'. */
2024 const char *peer_name = smap_get(
2025 &op->od->router_ports[j]->nbs->options,
2026 "router-port");
2027 if (!peer_name) {
2028 continue;
2029 }
2030
2031 struct ovn_port *peer
2032 = ovn_port_find(ports, peer_name);
2033 if (!peer || !peer->nbr) {
2034 continue;
2035 }
2036
2037 /* Make sure that 'ip' is in 'peer''s network. */
2038 if ((ip ^ peer->network) & peer->mask) {
2039 continue;
2040 }
2041
0bac7164
BP
2042 char *match = xasprintf(
2043 "outport == %s && reg0 == "IP_FMT,
2044 peer->json_key, IP_ARGS(ip));
2045 char *actions = xasprintf("eth.dst = "ETH_ADDR_FMT"; "
2046 "next;",
2047 ETH_ADDR_ARGS(laddrs.ea));
86e98048 2048 ovn_lflow_add(lflows, peer->od,
0bac7164
BP
2049 S_ROUTER_IN_ARP_RESOLVE,
2050 100, match, actions);
86e98048
BP
2051 free(actions);
2052 free(match);
2053 break;
2054 }
9975d7be 2055 }
7dc88496
NS
2056
2057 free(laddrs.ipv4_addrs);
9975d7be
BP
2058 }
2059 }
2060 }
0bac7164
BP
2061 HMAP_FOR_EACH (od, key_node, datapaths) {
2062 if (!od->nbr) {
2063 continue;
2064 }
2065
2066 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "1",
2067 "get_arp(outport, reg0); next;");
2068 }
2069
2070 /* Local router ingress table 4: ARP request.
2071 *
2072 * In the common case where the Ethernet destination has been resolved,
2073 * this table outputs the packet (priority 100). Otherwise, it composes
2074 * and sends an ARP request (priority 0). */
2075 HMAP_FOR_EACH (od, key_node, datapaths) {
2076 if (!od->nbr) {
2077 continue;
2078 }
2079
2080 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100,
2081 "eth.dst == 00:00:00:00:00:00",
2082 "arp { "
2083 "eth.dst = ff:ff:ff:ff:ff:ff; "
2084 "arp.spa = reg1; "
2085 "arp.op = 1; " /* ARP request */
2086 "output; "
2087 "};");
2088 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1", "output;");
2089 }
9975d7be
BP
2090
2091 /* Logical router egress table 0: Delivery (priority 100).
2092 *
2093 * Priority 100 rules deliver packets to enabled logical ports. */
2094 HMAP_FOR_EACH (op, key_node, ports) {
2095 if (!op->nbr) {
2096 continue;
2097 }
2098
2099 if (!lrport_is_enabled(op->nbr)) {
2100 /* Drop packets to disabled logical ports (since logical flow
2101 * tables are default-drop). */
2102 continue;
2103 }
2104
2105 char *match = xasprintf("outport == %s", op->json_key);
2106 ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100,
2107 match, "output;");
2108 free(match);
2109 }
2110}
2111
2112/* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
2113 * constructing their contents based on the OVN_NB database. */
2114static void
2115build_lflows(struct northd_context *ctx, struct hmap *datapaths,
2116 struct hmap *ports)
2117{
2118 struct hmap lflows = HMAP_INITIALIZER(&lflows);
2119 struct hmap mcgroups = HMAP_INITIALIZER(&mcgroups);
2120
2121 build_lswitch_flows(datapaths, ports, &lflows, &mcgroups);
2122 build_lrouter_flows(datapaths, ports, &lflows);
2123
5868eb24
BP
2124 /* Push changes to the Logical_Flow table to database. */
2125 const struct sbrec_logical_flow *sbflow, *next_sbflow;
2126 SBREC_LOGICAL_FLOW_FOR_EACH_SAFE (sbflow, next_sbflow, ctx->ovnsb_idl) {
2127 struct ovn_datapath *od
2128 = ovn_datapath_from_sbrec(datapaths, sbflow->logical_datapath);
2129 if (!od) {
2130 sbrec_logical_flow_delete(sbflow);
2131 continue;
eb00399e 2132 }
eb00399e 2133
9975d7be 2134 enum ovn_datapath_type dp_type = od->nbs ? DP_SWITCH : DP_ROUTER;
880fcd14
BP
2135 enum ovn_pipeline pipeline
2136 = !strcmp(sbflow->pipeline, "ingress") ? P_IN : P_OUT;
5868eb24 2137 struct ovn_lflow *lflow = ovn_lflow_find(
880fcd14
BP
2138 &lflows, od, ovn_stage_build(dp_type, pipeline, sbflow->table_id),
2139 sbflow->priority, sbflow->match, sbflow->actions);
5868eb24
BP
2140 if (lflow) {
2141 ovn_lflow_destroy(&lflows, lflow);
2142 } else {
2143 sbrec_logical_flow_delete(sbflow);
4edcdcf4
RB
2144 }
2145 }
5868eb24
BP
2146 struct ovn_lflow *lflow, *next_lflow;
2147 HMAP_FOR_EACH_SAFE (lflow, next_lflow, hmap_node, &lflows) {
880fcd14
BP
2148 enum ovn_pipeline pipeline = ovn_stage_get_pipeline(lflow->stage);
2149 uint8_t table = ovn_stage_get_table(lflow->stage);
2150
5868eb24
BP
2151 sbflow = sbrec_logical_flow_insert(ctx->ovnsb_txn);
2152 sbrec_logical_flow_set_logical_datapath(sbflow, lflow->od->sb);
9975d7be
BP
2153 sbrec_logical_flow_set_pipeline(
2154 sbflow, pipeline == P_IN ? "ingress" : "egress");
880fcd14 2155 sbrec_logical_flow_set_table_id(sbflow, table);
5868eb24
BP
2156 sbrec_logical_flow_set_priority(sbflow, lflow->priority);
2157 sbrec_logical_flow_set_match(sbflow, lflow->match);
2158 sbrec_logical_flow_set_actions(sbflow, lflow->actions);
091e3af9 2159
880fcd14
BP
2160 const struct smap ids = SMAP_CONST1(&ids, "stage-name",
2161 ovn_stage_to_str(lflow->stage));
aaf881c6 2162 sbrec_logical_flow_set_external_ids(sbflow, &ids);
091e3af9 2163
5868eb24 2164 ovn_lflow_destroy(&lflows, lflow);
eb00399e 2165 }
5868eb24
BP
2166 hmap_destroy(&lflows);
2167
2168 /* Push changes to the Multicast_Group table to database. */
2169 const struct sbrec_multicast_group *sbmc, *next_sbmc;
2170 SBREC_MULTICAST_GROUP_FOR_EACH_SAFE (sbmc, next_sbmc, ctx->ovnsb_idl) {
2171 struct ovn_datapath *od = ovn_datapath_from_sbrec(datapaths,
2172 sbmc->datapath);
2173 if (!od) {
2174 sbrec_multicast_group_delete(sbmc);
2175 continue;
2176 }
eb00399e 2177
5868eb24
BP
2178 struct multicast_group group = { .name = sbmc->name,
2179 .key = sbmc->tunnel_key };
2180 struct ovn_multicast *mc = ovn_multicast_find(&mcgroups, od, &group);
2181 if (mc) {
2182 ovn_multicast_update_sbrec(mc, sbmc);
2183 ovn_multicast_destroy(&mcgroups, mc);
2184 } else {
2185 sbrec_multicast_group_delete(sbmc);
2186 }
2187 }
2188 struct ovn_multicast *mc, *next_mc;
2189 HMAP_FOR_EACH_SAFE (mc, next_mc, hmap_node, &mcgroups) {
2190 sbmc = sbrec_multicast_group_insert(ctx->ovnsb_txn);
2191 sbrec_multicast_group_set_datapath(sbmc, mc->datapath->sb);
2192 sbrec_multicast_group_set_name(sbmc, mc->group->name);
2193 sbrec_multicast_group_set_tunnel_key(sbmc, mc->group->key);
2194 ovn_multicast_update_sbrec(mc, sbmc);
2195 ovn_multicast_destroy(&mcgroups, mc);
4edcdcf4 2196 }
5868eb24 2197 hmap_destroy(&mcgroups);
4edcdcf4 2198}
5868eb24 2199\f
4edcdcf4 2200static void
331e7aef 2201ovnnb_db_run(struct northd_context *ctx)
4edcdcf4 2202{
331e7aef
NS
2203 if (!ctx->ovnsb_txn) {
2204 return;
2205 }
5868eb24
BP
2206 struct hmap datapaths, ports;
2207 build_datapaths(ctx, &datapaths);
2208 build_ports(ctx, &datapaths, &ports);
2209 build_lflows(ctx, &datapaths, &ports);
2210
2211 struct ovn_datapath *dp, *next_dp;
2212 HMAP_FOR_EACH_SAFE (dp, next_dp, key_node, &datapaths) {
2213 ovn_datapath_destroy(&datapaths, dp);
2214 }
2215 hmap_destroy(&datapaths);
2216
2217 struct ovn_port *port, *next_port;
2218 HMAP_FOR_EACH_SAFE (port, next_port, key_node, &ports) {
2219 ovn_port_destroy(&ports, port);
2220 }
2221 hmap_destroy(&ports);
ac0630a2
RB
2222}
2223
f93818dd
RB
2224/*
2225 * The only change we get notified about is if the 'chassis' column of the
dcda6e0d
BP
2226 * 'Port_Binding' table changes. When this column is not empty, it means we
2227 * need to set the corresponding logical port as 'up' in the northbound DB.
f93818dd 2228 */
ac0630a2 2229static void
331e7aef 2230ovnsb_db_run(struct northd_context *ctx)
ac0630a2 2231{
331e7aef
NS
2232 if (!ctx->ovnnb_txn) {
2233 return;
2234 }
fc3113bc 2235 struct hmap lports_hmap;
5868eb24
BP
2236 const struct sbrec_port_binding *sb;
2237 const struct nbrec_logical_port *nb;
fc3113bc
RB
2238
2239 struct lport_hash_node {
2240 struct hmap_node node;
5868eb24 2241 const struct nbrec_logical_port *nb;
4ec3d7c7 2242 } *hash_node;
f93818dd 2243
fc3113bc 2244 hmap_init(&lports_hmap);
f93818dd 2245
5868eb24 2246 NBREC_LOGICAL_PORT_FOR_EACH(nb, ctx->ovnnb_idl) {
fc3113bc 2247 hash_node = xzalloc(sizeof *hash_node);
5868eb24
BP
2248 hash_node->nb = nb;
2249 hmap_insert(&lports_hmap, &hash_node->node, hash_string(nb->name, 0));
fc3113bc
RB
2250 }
2251
5868eb24
BP
2252 SBREC_PORT_BINDING_FOR_EACH(sb, ctx->ovnsb_idl) {
2253 nb = NULL;
fc3113bc 2254 HMAP_FOR_EACH_WITH_HASH(hash_node, node,
5868eb24
BP
2255 hash_string(sb->logical_port, 0),
2256 &lports_hmap) {
2257 if (!strcmp(sb->logical_port, hash_node->nb->name)) {
2258 nb = hash_node->nb;
fc3113bc
RB
2259 break;
2260 }
f93818dd
RB
2261 }
2262
5868eb24 2263 if (!nb) {
dcda6e0d 2264 /* The logical port doesn't exist for this port binding. This can
2e2762d4 2265 * happen under normal circumstances when ovn-northd hasn't gotten
dcda6e0d 2266 * around to pruning the Port_Binding yet. */
f93818dd
RB
2267 continue;
2268 }
2269
5868eb24 2270 if (sb->chassis && (!nb->up || !*nb->up)) {
f93818dd 2271 bool up = true;
5868eb24
BP
2272 nbrec_logical_port_set_up(nb, &up, 1);
2273 } else if (!sb->chassis && (!nb->up || *nb->up)) {
f93818dd 2274 bool up = false;
5868eb24 2275 nbrec_logical_port_set_up(nb, &up, 1);
f93818dd
RB
2276 }
2277 }
fc3113bc 2278
4ec3d7c7 2279 HMAP_FOR_EACH_POP(hash_node, node, &lports_hmap) {
fc3113bc
RB
2280 free(hash_node);
2281 }
2282 hmap_destroy(&lports_hmap);
ac0630a2
RB
2283}
2284\f
45f98d4c 2285
60bdd011 2286static char *default_nb_db_;
45f98d4c 2287
ac0630a2 2288static const char *
60bdd011 2289default_nb_db(void)
ac0630a2 2290{
60bdd011
RM
2291 if (!default_nb_db_) {
2292 default_nb_db_ = xasprintf("unix:%s/ovnnb_db.sock", ovs_rundir());
ac0630a2 2293 }
60bdd011
RM
2294 return default_nb_db_;
2295}
2296
2297static char *default_sb_db_;
2298
2299static const char *
2300default_sb_db(void)
2301{
2302 if (!default_sb_db_) {
2303 default_sb_db_ = xasprintf("unix:%s/ovnsb_db.sock", ovs_rundir());
2304 }
2305 return default_sb_db_;
ac0630a2
RB
2306}
2307
2308static void
2309parse_options(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
2310{
2311 enum {
67d9b930 2312 DAEMON_OPTION_ENUMS,
ac0630a2
RB
2313 VLOG_OPTION_ENUMS,
2314 };
2315 static const struct option long_options[] = {
ec78987f 2316 {"ovnsb-db", required_argument, NULL, 'd'},
ac0630a2
RB
2317 {"ovnnb-db", required_argument, NULL, 'D'},
2318 {"help", no_argument, NULL, 'h'},
2319 {"options", no_argument, NULL, 'o'},
2320 {"version", no_argument, NULL, 'V'},
67d9b930 2321 DAEMON_LONG_OPTIONS,
ac0630a2
RB
2322 VLOG_LONG_OPTIONS,
2323 STREAM_SSL_LONG_OPTIONS,
2324 {NULL, 0, NULL, 0},
2325 };
2326 char *short_options = ovs_cmdl_long_options_to_short_options(long_options);
2327
2328 for (;;) {
2329 int c;
2330
2331 c = getopt_long(argc, argv, short_options, long_options, NULL);
2332 if (c == -1) {
2333 break;
2334 }
2335
2336 switch (c) {
67d9b930 2337 DAEMON_OPTION_HANDLERS;
ac0630a2
RB
2338 VLOG_OPTION_HANDLERS;
2339 STREAM_SSL_OPTION_HANDLERS;
2340
2341 case 'd':
ec78987f 2342 ovnsb_db = optarg;
ac0630a2
RB
2343 break;
2344
2345 case 'D':
2346 ovnnb_db = optarg;
2347 break;
2348
2349 case 'h':
2350 usage();
2351 exit(EXIT_SUCCESS);
2352
2353 case 'o':
2354 ovs_cmdl_print_options(long_options);
2355 exit(EXIT_SUCCESS);
2356
2357 case 'V':
2358 ovs_print_version(0, 0);
2359 exit(EXIT_SUCCESS);
2360
2361 default:
2362 break;
2363 }
2364 }
2365
ec78987f 2366 if (!ovnsb_db) {
60bdd011 2367 ovnsb_db = default_sb_db();
ac0630a2
RB
2368 }
2369
2370 if (!ovnnb_db) {
60bdd011 2371 ovnnb_db = default_nb_db();
ac0630a2
RB
2372 }
2373
2374 free(short_options);
2375}
2376
5868eb24
BP
2377static void
2378add_column_noalert(struct ovsdb_idl *idl,
2379 const struct ovsdb_idl_column *column)
2380{
2381 ovsdb_idl_add_column(idl, column);
2382 ovsdb_idl_omit_alert(idl, column);
2383}
2384
ac0630a2
RB
2385int
2386main(int argc, char *argv[])
2387{
ac0630a2 2388 int res = EXIT_SUCCESS;
7b303ff9
AW
2389 struct unixctl_server *unixctl;
2390 int retval;
2391 bool exiting;
ac0630a2
RB
2392
2393 fatal_ignore_sigpipe();
2394 set_program_name(argv[0]);
485f0696 2395 service_start(&argc, &argv);
ac0630a2 2396 parse_options(argc, argv);
67d9b930 2397
e91b927d 2398 daemonize_start(false);
7b303ff9
AW
2399
2400 retval = unixctl_server_create(NULL, &unixctl);
2401 if (retval) {
2402 exit(EXIT_FAILURE);
2403 }
2404 unixctl_command_register("exit", "", 0, 0, ovn_northd_exit, &exiting);
2405
2406 daemonize_complete();
67d9b930 2407
ac0630a2 2408 nbrec_init();
ec78987f 2409 sbrec_init();
ac0630a2
RB
2410
2411 /* We want to detect all changes to the ovn-nb db. */
331e7aef
NS
2412 struct ovsdb_idl_loop ovnnb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
2413 ovsdb_idl_create(ovnnb_db, &nbrec_idl_class, true, true));
2414
2415 struct ovsdb_idl_loop ovnsb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
2416 ovsdb_idl_create(ovnsb_db, &sbrec_idl_class, false, true));
2417
2418 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_logical_flow);
2419 add_column_noalert(ovnsb_idl_loop.idl,
2420 &sbrec_logical_flow_col_logical_datapath);
2421 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_pipeline);
2422 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_table_id);
2423 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_priority);
2424 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_match);
2425 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_actions);
2426
2427 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_multicast_group);
2428 add_column_noalert(ovnsb_idl_loop.idl,
2429 &sbrec_multicast_group_col_datapath);
2430 add_column_noalert(ovnsb_idl_loop.idl,
2431 &sbrec_multicast_group_col_tunnel_key);
2432 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_name);
2433 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_ports);
2434
2435 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_datapath_binding);
2436 add_column_noalert(ovnsb_idl_loop.idl,
2437 &sbrec_datapath_binding_col_tunnel_key);
2438 add_column_noalert(ovnsb_idl_loop.idl,
2439 &sbrec_datapath_binding_col_external_ids);
2440
2441 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_port_binding);
2442 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_datapath);
2443 add_column_noalert(ovnsb_idl_loop.idl,
2444 &sbrec_port_binding_col_logical_port);
2445 add_column_noalert(ovnsb_idl_loop.idl,
2446 &sbrec_port_binding_col_tunnel_key);
2447 add_column_noalert(ovnsb_idl_loop.idl,
2448 &sbrec_port_binding_col_parent_port);
2449 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_tag);
2450 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_type);
2451 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_options);
2452 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_mac);
2453 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_port_binding_col_chassis);
2454
2455 /* Main loop. */
7b303ff9
AW
2456 exiting = false;
2457 while (!exiting) {
331e7aef
NS
2458 struct northd_context ctx = {
2459 .ovnnb_idl = ovnnb_idl_loop.idl,
2460 .ovnnb_txn = ovsdb_idl_loop_run(&ovnnb_idl_loop),
2461 .ovnsb_idl = ovnsb_idl_loop.idl,
2462 .ovnsb_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop),
2463 };
ac0630a2 2464
8c0fae89
NS
2465 ovnnb_db_run(&ctx);
2466 ovnsb_db_run(&ctx);
f93818dd 2467
331e7aef
NS
2468 unixctl_server_run(unixctl);
2469 unixctl_server_wait(unixctl);
2470 if (exiting) {
2471 poll_immediate_wake();
ac0630a2 2472 }
331e7aef
NS
2473 ovsdb_idl_loop_commit_and_wait(&ovnnb_idl_loop);
2474 ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);
ac0630a2 2475
331e7aef 2476 poll_block();
485f0696
GS
2477 if (should_service_stop()) {
2478 exiting = true;
2479 }
ac0630a2
RB
2480 }
2481
7b303ff9 2482 unixctl_server_destroy(unixctl);
331e7aef
NS
2483 ovsdb_idl_loop_destroy(&ovnnb_idl_loop);
2484 ovsdb_idl_loop_destroy(&ovnsb_idl_loop);
485f0696 2485 service_stop();
ac0630a2 2486
60bdd011
RM
2487 free(default_nb_db_);
2488 free(default_sb_db_);
ac0630a2
RB
2489 exit(res);
2490}
7b303ff9
AW
2491
2492static void
2493ovn_northd_exit(struct unixctl_conn *conn, int argc OVS_UNUSED,
2494 const char *argv[] OVS_UNUSED, void *exiting_)
2495{
2496 bool *exiting = exiting_;
2497 *exiting = true;
2498
2499 unixctl_command_reply(conn, NULL);
2500}