]> git.proxmox.com Git - mirror_ovs.git/blame - ovn/northd/ovn-northd.c
travis: Update datapath target kernel list.
[mirror_ovs.git] / ovn / northd / ovn-northd.c
CommitLineData
ac0630a2
RB
1/*
2 * Licensed under the Apache License, Version 2.0 (the "License");
3 * you may not use this file except in compliance with the License.
4 * You may obtain a copy of the License at:
5 *
6 * http://www.apache.org/licenses/LICENSE-2.0
7 *
8 * Unless required by applicable law or agreed to in writing, software
9 * distributed under the License is distributed on an "AS IS" BASIS,
10 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 * See the License for the specific language governing permissions and
12 * limitations under the License.
13 */
14
15#include <config.h>
16
17#include <getopt.h>
18#include <stdlib.h>
19#include <stdio.h>
20
21#include "command-line.h"
67d9b930 22#include "daemon.h"
ac0630a2 23#include "dirs.h"
3e8a2ad1 24#include "openvswitch/dynamic-string.h"
ac0630a2 25#include "fatal-signal.h"
4edcdcf4 26#include "hash.h"
ee89ea7b
TW
27#include "openvswitch/hmap.h"
28#include "openvswitch/json.h"
bd39395f 29#include "ovn/lib/lex.h"
281977f7 30#include "ovn/lib/ovn-dhcp.h"
e3df8838
BP
31#include "ovn/lib/ovn-nb-idl.h"
32#include "ovn/lib/ovn-sb-idl.h"
218351dd 33#include "ovn/lib/ovn-util.h"
064d7f84 34#include "packets.h"
ac0630a2 35#include "poll-loop.h"
5868eb24 36#include "smap.h"
7a15be69 37#include "sset.h"
ac0630a2
RB
38#include "stream.h"
39#include "stream-ssl.h"
7b303ff9 40#include "unixctl.h"
ac0630a2 41#include "util.h"
4edcdcf4 42#include "uuid.h"
ac0630a2
RB
43#include "openvswitch/vlog.h"
44
2e2762d4 45VLOG_DEFINE_THIS_MODULE(ovn_northd);
ac0630a2 46
7b303ff9
AW
47static unixctl_cb_func ovn_northd_exit;
48
2e2762d4 49struct northd_context {
f93818dd 50 struct ovsdb_idl *ovnnb_idl;
ec78987f 51 struct ovsdb_idl *ovnsb_idl;
f93818dd 52 struct ovsdb_idl_txn *ovnnb_txn;
3c78b3ca 53 struct ovsdb_idl_txn *ovnsb_txn;
f93818dd
RB
54};
55
ac0630a2 56static const char *ovnnb_db;
ec78987f 57static const char *ovnsb_db;
ac0630a2 58
60bdd011
RM
59static const char *default_nb_db(void);
60static const char *default_sb_db(void);
880fcd14
BP
61\f
62/* Pipeline stages. */
ac0630a2 63
880fcd14
BP
64/* The two pipelines in an OVN logical flow table. */
65enum ovn_pipeline {
66 P_IN, /* Ingress pipeline. */
67 P_OUT /* Egress pipeline. */
68};
091e3af9 69
880fcd14
BP
70/* The two purposes for which ovn-northd uses OVN logical datapaths. */
71enum ovn_datapath_type {
72 DP_SWITCH, /* OVN logical switch. */
73 DP_ROUTER /* OVN logical router. */
091e3af9
JP
74};
75
880fcd14
BP
76/* Returns an "enum ovn_stage" built from the arguments.
77 *
78 * (It's better to use ovn_stage_build() for type-safety reasons, but inline
79 * functions can't be used in enums or switch cases.) */
80#define OVN_STAGE_BUILD(DP_TYPE, PIPELINE, TABLE) \
81 (((DP_TYPE) << 9) | ((PIPELINE) << 8) | (TABLE))
82
83/* A stage within an OVN logical switch or router.
091e3af9 84 *
880fcd14
BP
85 * An "enum ovn_stage" indicates whether the stage is part of a logical switch
86 * or router, whether the stage is part of the ingress or egress pipeline, and
87 * the table within that pipeline. The first three components are combined to
685f4dfe 88 * form the stage's full name, e.g. S_SWITCH_IN_PORT_SEC_L2,
880fcd14
BP
89 * S_ROUTER_OUT_DELIVERY. */
90enum ovn_stage {
e0c9e58b
JP
91#define PIPELINE_STAGES \
92 /* Logical switch ingress stages. */ \
685f4dfe
NS
93 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_L2, 0, "ls_in_port_sec_l2") \
94 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_IP, 1, "ls_in_port_sec_ip") \
95 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_ND, 2, "ls_in_port_sec_nd") \
96 PIPELINE_STAGE(SWITCH, IN, PRE_ACL, 3, "ls_in_pre_acl") \
7a15be69
GS
97 PIPELINE_STAGE(SWITCH, IN, PRE_LB, 4, "ls_in_pre_lb") \
98 PIPELINE_STAGE(SWITCH, IN, PRE_STATEFUL, 5, "ls_in_pre_stateful") \
99 PIPELINE_STAGE(SWITCH, IN, ACL, 6, "ls_in_acl") \
100 PIPELINE_STAGE(SWITCH, IN, LB, 7, "ls_in_lb") \
101 PIPELINE_STAGE(SWITCH, IN, STATEFUL, 8, "ls_in_stateful") \
102 PIPELINE_STAGE(SWITCH, IN, ARP_ND_RSP, 9, "ls_in_arp_rsp") \
281977f7
NS
103 PIPELINE_STAGE(SWITCH, IN, DHCP_OPTIONS, 10, "ls_in_dhcp_options") \
104 PIPELINE_STAGE(SWITCH, IN, DHCP_RESPONSE, 11, "ls_in_dhcp_response") \
105 PIPELINE_STAGE(SWITCH, IN, L2_LKUP, 12, "ls_in_l2_lkup") \
e0c9e58b
JP
106 \
107 /* Logical switch egress stages. */ \
7a15be69
GS
108 PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 0, "ls_out_pre_lb") \
109 PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 1, "ls_out_pre_acl") \
110 PIPELINE_STAGE(SWITCH, OUT, PRE_STATEFUL, 2, "ls_out_pre_stateful") \
111 PIPELINE_STAGE(SWITCH, OUT, LB, 3, "ls_out_lb") \
112 PIPELINE_STAGE(SWITCH, OUT, ACL, 4, "ls_out_acl") \
113 PIPELINE_STAGE(SWITCH, OUT, STATEFUL, 5, "ls_out_stateful") \
114 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_IP, 6, "ls_out_port_sec_ip") \
115 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_L2, 7, "ls_out_port_sec_l2") \
e0c9e58b
JP
116 \
117 /* Logical router ingress stages. */ \
118 PIPELINE_STAGE(ROUTER, IN, ADMISSION, 0, "lr_in_admission") \
119 PIPELINE_STAGE(ROUTER, IN, IP_INPUT, 1, "lr_in_ip_input") \
de297547
GS
120 PIPELINE_STAGE(ROUTER, IN, UNSNAT, 2, "lr_in_unsnat") \
121 PIPELINE_STAGE(ROUTER, IN, DNAT, 3, "lr_in_dnat") \
122 PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 4, "lr_in_ip_routing") \
123 PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 5, "lr_in_arp_resolve") \
124 PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 6, "lr_in_arp_request") \
e0c9e58b
JP
125 \
126 /* Logical router egress stages. */ \
de297547
GS
127 PIPELINE_STAGE(ROUTER, OUT, SNAT, 0, "lr_out_snat") \
128 PIPELINE_STAGE(ROUTER, OUT, DELIVERY, 1, "lr_out_delivery")
880fcd14
BP
129
130#define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
131 S_##DP_TYPE##_##PIPELINE##_##STAGE \
132 = OVN_STAGE_BUILD(DP_##DP_TYPE, P_##PIPELINE, TABLE),
133 PIPELINE_STAGES
134#undef PIPELINE_STAGE
091e3af9
JP
135};
136
6bb4a18e
JP
137/* Due to various hard-coded priorities need to implement ACLs, the
138 * northbound database supports a smaller range of ACL priorities than
139 * are available to logical flows. This value is added to an ACL
140 * priority to determine the ACL's logical flow priority. */
141#define OVN_ACL_PRI_OFFSET 1000
142
facf8652 143#define REGBIT_CONNTRACK_DEFRAG "reg0[0]"
fa313a8c 144#define REGBIT_CONNTRACK_COMMIT "reg0[1]"
7a15be69 145#define REGBIT_CONNTRACK_NAT "reg0[2]"
281977f7 146#define REGBIT_DHCP_OPTS_RESULT "reg0[3]"
facf8652 147
880fcd14
BP
148/* Returns an "enum ovn_stage" built from the arguments. */
149static enum ovn_stage
150ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline,
151 uint8_t table)
152{
153 return OVN_STAGE_BUILD(dp_type, pipeline, table);
154}
155
156/* Returns the pipeline to which 'stage' belongs. */
157static enum ovn_pipeline
158ovn_stage_get_pipeline(enum ovn_stage stage)
159{
160 return (stage >> 8) & 1;
161}
162
163/* Returns the table to which 'stage' belongs. */
164static uint8_t
165ovn_stage_get_table(enum ovn_stage stage)
166{
167 return stage & 0xff;
168}
169
170/* Returns a string name for 'stage'. */
171static const char *
172ovn_stage_to_str(enum ovn_stage stage)
173{
174 switch (stage) {
175#define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
176 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return NAME;
177 PIPELINE_STAGES
178#undef PIPELINE_STAGE
179 default: return "<unknown>";
180 }
181}
9a9961d2
BP
182
183/* Returns the type of the datapath to which a flow with the given 'stage' may
184 * be added. */
185static enum ovn_datapath_type
186ovn_stage_to_datapath_type(enum ovn_stage stage)
187{
188 switch (stage) {
189#define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
190 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return DP_##DP_TYPE;
191 PIPELINE_STAGES
192#undef PIPELINE_STAGE
193 default: OVS_NOT_REACHED();
194 }
195}
880fcd14 196\f
ac0630a2
RB
197static void
198usage(void)
199{
200 printf("\
201%s: OVN northbound management daemon\n\
202usage: %s [OPTIONS]\n\
203\n\
204Options:\n\
205 --ovnnb-db=DATABASE connect to ovn-nb database at DATABASE\n\
206 (default: %s)\n\
ec78987f 207 --ovnsb-db=DATABASE connect to ovn-sb database at DATABASE\n\
ac0630a2
RB
208 (default: %s)\n\
209 -h, --help display this help message\n\
210 -o, --options list available options\n\
211 -V, --version display version information\n\
60bdd011 212", program_name, program_name, default_nb_db(), default_sb_db());
67d9b930 213 daemon_usage();
ac0630a2
RB
214 vlog_usage();
215 stream_usage("database", true, true, false);
216}
217\f
5868eb24
BP
218struct tnlid_node {
219 struct hmap_node hmap_node;
220 uint32_t tnlid;
221};
222
223static void
224destroy_tnlids(struct hmap *tnlids)
4edcdcf4 225{
4ec3d7c7
DDP
226 struct tnlid_node *node;
227 HMAP_FOR_EACH_POP (node, hmap_node, tnlids) {
5868eb24
BP
228 free(node);
229 }
230 hmap_destroy(tnlids);
231}
232
233static void
234add_tnlid(struct hmap *set, uint32_t tnlid)
235{
236 struct tnlid_node *node = xmalloc(sizeof *node);
237 hmap_insert(set, &node->hmap_node, hash_int(tnlid, 0));
238 node->tnlid = tnlid;
4edcdcf4
RB
239}
240
4edcdcf4 241static bool
5868eb24 242tnlid_in_use(const struct hmap *set, uint32_t tnlid)
4edcdcf4 243{
5868eb24
BP
244 const struct tnlid_node *node;
245 HMAP_FOR_EACH_IN_BUCKET (node, hmap_node, hash_int(tnlid, 0), set) {
246 if (node->tnlid == tnlid) {
247 return true;
248 }
249 }
250 return false;
251}
4edcdcf4 252
5868eb24
BP
253static uint32_t
254allocate_tnlid(struct hmap *set, const char *name, uint32_t max,
255 uint32_t *hint)
256{
257 for (uint32_t tnlid = *hint + 1; tnlid != *hint;
258 tnlid = tnlid + 1 <= max ? tnlid + 1 : 1) {
259 if (!tnlid_in_use(set, tnlid)) {
260 add_tnlid(set, tnlid);
261 *hint = tnlid;
262 return tnlid;
263 }
4edcdcf4
RB
264 }
265
5868eb24
BP
266 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
267 VLOG_WARN_RL(&rl, "all %s tunnel ids exhausted", name);
268 return 0;
269}
270\f
9975d7be
BP
271/* The 'key' comes from nbs->header_.uuid or nbr->header_.uuid or
272 * sb->external_ids:logical-switch. */
5868eb24
BP
273struct ovn_datapath {
274 struct hmap_node key_node; /* Index on 'key'. */
9975d7be 275 struct uuid key; /* (nbs/nbr)->header_.uuid. */
4edcdcf4 276
9975d7be
BP
277 const struct nbrec_logical_switch *nbs; /* May be NULL. */
278 const struct nbrec_logical_router *nbr; /* May be NULL. */
5868eb24 279 const struct sbrec_datapath_binding *sb; /* May be NULL. */
4edcdcf4 280
5868eb24 281 struct ovs_list list; /* In list of similar records. */
4edcdcf4 282
9975d7be 283 /* Logical switch data. */
86e98048
BP
284 struct ovn_port **router_ports;
285 size_t n_router_ports;
9975d7be 286
5868eb24
BP
287 struct hmap port_tnlids;
288 uint32_t port_key_hint;
289
290 bool has_unknown;
291};
292
293static struct ovn_datapath *
294ovn_datapath_create(struct hmap *datapaths, const struct uuid *key,
9975d7be
BP
295 const struct nbrec_logical_switch *nbs,
296 const struct nbrec_logical_router *nbr,
5868eb24
BP
297 const struct sbrec_datapath_binding *sb)
298{
299 struct ovn_datapath *od = xzalloc(sizeof *od);
300 od->key = *key;
301 od->sb = sb;
9975d7be
BP
302 od->nbs = nbs;
303 od->nbr = nbr;
5868eb24
BP
304 hmap_init(&od->port_tnlids);
305 od->port_key_hint = 0;
306 hmap_insert(datapaths, &od->key_node, uuid_hash(&od->key));
307 return od;
308}
309
310static void
311ovn_datapath_destroy(struct hmap *datapaths, struct ovn_datapath *od)
312{
313 if (od) {
314 /* Don't remove od->list. It is used within build_datapaths() as a
315 * private list and once we've exited that function it is not safe to
316 * use it. */
317 hmap_remove(datapaths, &od->key_node);
318 destroy_tnlids(&od->port_tnlids);
86e98048 319 free(od->router_ports);
5868eb24
BP
320 free(od);
321 }
322}
323
9a9961d2
BP
324/* Returns 'od''s datapath type. */
325static enum ovn_datapath_type
326ovn_datapath_get_type(const struct ovn_datapath *od)
327{
328 return od->nbs ? DP_SWITCH : DP_ROUTER;
329}
330
5868eb24
BP
331static struct ovn_datapath *
332ovn_datapath_find(struct hmap *datapaths, const struct uuid *uuid)
333{
334 struct ovn_datapath *od;
335
336 HMAP_FOR_EACH_WITH_HASH (od, key_node, uuid_hash(uuid), datapaths) {
337 if (uuid_equals(uuid, &od->key)) {
338 return od;
339 }
340 }
341 return NULL;
342}
343
344static struct ovn_datapath *
345ovn_datapath_from_sbrec(struct hmap *datapaths,
346 const struct sbrec_datapath_binding *sb)
347{
348 struct uuid key;
349
9975d7be
BP
350 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
351 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
5868eb24
BP
352 return NULL;
353 }
354 return ovn_datapath_find(datapaths, &key);
355}
356
5412db30
J
357static bool
358lrouter_is_enabled(const struct nbrec_logical_router *lrouter)
359{
360 return !lrouter->enabled || *lrouter->enabled;
361}
362
5868eb24
BP
363static void
364join_datapaths(struct northd_context *ctx, struct hmap *datapaths,
365 struct ovs_list *sb_only, struct ovs_list *nb_only,
366 struct ovs_list *both)
367{
368 hmap_init(datapaths);
417e7e66
BW
369 ovs_list_init(sb_only);
370 ovs_list_init(nb_only);
371 ovs_list_init(both);
5868eb24
BP
372
373 const struct sbrec_datapath_binding *sb, *sb_next;
374 SBREC_DATAPATH_BINDING_FOR_EACH_SAFE (sb, sb_next, ctx->ovnsb_idl) {
375 struct uuid key;
9975d7be
BP
376 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
377 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
378 ovsdb_idl_txn_add_comment(
379 ctx->ovnsb_txn,
380 "deleting Datapath_Binding "UUID_FMT" that lacks "
381 "external-ids:logical-switch and "
382 "external-ids:logical-router",
383 UUID_ARGS(&sb->header_.uuid));
5868eb24
BP
384 sbrec_datapath_binding_delete(sb);
385 continue;
386 }
387
388 if (ovn_datapath_find(datapaths, &key)) {
389 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
9975d7be
BP
390 VLOG_INFO_RL(
391 &rl, "deleting Datapath_Binding "UUID_FMT" with "
392 "duplicate external-ids:logical-switch/router "UUID_FMT,
393 UUID_ARGS(&sb->header_.uuid), UUID_ARGS(&key));
5868eb24
BP
394 sbrec_datapath_binding_delete(sb);
395 continue;
396 }
397
398 struct ovn_datapath *od = ovn_datapath_create(datapaths, &key,
9975d7be 399 NULL, NULL, sb);
417e7e66 400 ovs_list_push_back(sb_only, &od->list);
5868eb24
BP
401 }
402
9975d7be
BP
403 const struct nbrec_logical_switch *nbs;
404 NBREC_LOGICAL_SWITCH_FOR_EACH (nbs, ctx->ovnnb_idl) {
5868eb24 405 struct ovn_datapath *od = ovn_datapath_find(datapaths,
9975d7be 406 &nbs->header_.uuid);
5868eb24 407 if (od) {
9975d7be 408 od->nbs = nbs;
417e7e66
BW
409 ovs_list_remove(&od->list);
410 ovs_list_push_back(both, &od->list);
5868eb24 411 } else {
9975d7be
BP
412 od = ovn_datapath_create(datapaths, &nbs->header_.uuid,
413 nbs, NULL, NULL);
417e7e66 414 ovs_list_push_back(nb_only, &od->list);
5868eb24
BP
415 }
416 }
9975d7be
BP
417
418 const struct nbrec_logical_router *nbr;
419 NBREC_LOGICAL_ROUTER_FOR_EACH (nbr, ctx->ovnnb_idl) {
5412db30
J
420 if (!lrouter_is_enabled(nbr)) {
421 continue;
422 }
423
9975d7be
BP
424 struct ovn_datapath *od = ovn_datapath_find(datapaths,
425 &nbr->header_.uuid);
426 if (od) {
427 if (!od->nbs) {
428 od->nbr = nbr;
417e7e66
BW
429 ovs_list_remove(&od->list);
430 ovs_list_push_back(both, &od->list);
9975d7be
BP
431 } else {
432 /* Can't happen! */
433 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
434 VLOG_WARN_RL(&rl,
435 "duplicate UUID "UUID_FMT" in OVN_Northbound",
436 UUID_ARGS(&nbr->header_.uuid));
437 continue;
438 }
439 } else {
440 od = ovn_datapath_create(datapaths, &nbr->header_.uuid,
441 NULL, nbr, NULL);
417e7e66 442 ovs_list_push_back(nb_only, &od->list);
9975d7be 443 }
9975d7be 444 }
5868eb24
BP
445}
446
447static uint32_t
448ovn_datapath_allocate_key(struct hmap *dp_tnlids)
449{
450 static uint32_t hint;
451 return allocate_tnlid(dp_tnlids, "datapath", (1u << 24) - 1, &hint);
452}
453
0bac7164
BP
454/* Updates the southbound Datapath_Binding table so that it contains the
455 * logical switches and routers specified by the northbound database.
456 *
457 * Initializes 'datapaths' to contain a "struct ovn_datapath" for every logical
458 * switch and router. */
5868eb24
BP
459static void
460build_datapaths(struct northd_context *ctx, struct hmap *datapaths)
461{
462 struct ovs_list sb_only, nb_only, both;
463
464 join_datapaths(ctx, datapaths, &sb_only, &nb_only, &both);
465
417e7e66 466 if (!ovs_list_is_empty(&nb_only)) {
5868eb24
BP
467 /* First index the in-use datapath tunnel IDs. */
468 struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
469 struct ovn_datapath *od;
470 LIST_FOR_EACH (od, list, &both) {
471 add_tnlid(&dp_tnlids, od->sb->tunnel_key);
472 }
473
474 /* Add southbound record for each unmatched northbound record. */
475 LIST_FOR_EACH (od, list, &nb_only) {
476 uint16_t tunnel_key = ovn_datapath_allocate_key(&dp_tnlids);
477 if (!tunnel_key) {
478 break;
479 }
480
481 od->sb = sbrec_datapath_binding_insert(ctx->ovnsb_txn);
482
5868eb24 483 char uuid_s[UUID_LEN + 1];
9975d7be
BP
484 sprintf(uuid_s, UUID_FMT, UUID_ARGS(&od->key));
485 const char *key = od->nbs ? "logical-switch" : "logical-router";
486 const struct smap id = SMAP_CONST1(&id, key, uuid_s);
aaf881c6 487 sbrec_datapath_binding_set_external_ids(od->sb, &id);
5868eb24
BP
488
489 sbrec_datapath_binding_set_tunnel_key(od->sb, tunnel_key);
490 }
491 destroy_tnlids(&dp_tnlids);
492 }
493
494 /* Delete southbound records without northbound matches. */
495 struct ovn_datapath *od, *next;
496 LIST_FOR_EACH_SAFE (od, next, list, &sb_only) {
417e7e66 497 ovs_list_remove(&od->list);
5868eb24
BP
498 sbrec_datapath_binding_delete(od->sb);
499 ovn_datapath_destroy(datapaths, od);
500 }
501}
502\f
503struct ovn_port {
504 struct hmap_node key_node; /* Index on 'key'. */
9975d7be
BP
505 char *key; /* nbs->name, nbr->name, sb->logical_port. */
506 char *json_key; /* 'key', quoted for use in JSON. */
5868eb24 507
9975d7be
BP
508 const struct sbrec_port_binding *sb; /* May be NULL. */
509
e93b43d6 510 /* Logical switch port data. */
0ee00741 511 const struct nbrec_logical_switch_port *nbsp; /* May be NULL. */
e93b43d6
JP
512
513 struct lport_addresses *lsp_addrs; /* Logical switch port addresses. */
514 unsigned int n_lsp_addrs;
515
516 struct lport_addresses *ps_addrs; /* Port security addresses. */
517 unsigned int n_ps_addrs;
518
9975d7be 519 /* Logical router port data. */
0ee00741 520 const struct nbrec_logical_router_port *nbrp; /* May be NULL. */
e93b43d6 521
4685e523 522 struct lport_addresses lrp_networks;
c9bdf7bd 523
ad386c3f
BP
524 /* The port's peer:
525 *
526 * - A switch port S of type "router" has a router port R as a peer,
527 * and R in turn has S has its peer.
528 *
529 * - Two connected logical router ports have each other as peer. */
9975d7be 530 struct ovn_port *peer;
5868eb24
BP
531
532 struct ovn_datapath *od;
533
534 struct ovs_list list; /* In list of similar records. */
535};
536
537static struct ovn_port *
538ovn_port_create(struct hmap *ports, const char *key,
0ee00741
HK
539 const struct nbrec_logical_switch_port *nbsp,
540 const struct nbrec_logical_router_port *nbrp,
5868eb24
BP
541 const struct sbrec_port_binding *sb)
542{
543 struct ovn_port *op = xzalloc(sizeof *op);
9975d7be
BP
544
545 struct ds json_key = DS_EMPTY_INITIALIZER;
546 json_string_escape(key, &json_key);
547 op->json_key = ds_steal_cstr(&json_key);
548
549 op->key = xstrdup(key);
5868eb24 550 op->sb = sb;
0ee00741
HK
551 op->nbsp = nbsp;
552 op->nbrp = nbrp;
5868eb24
BP
553 hmap_insert(ports, &op->key_node, hash_string(op->key, 0));
554 return op;
555}
556
557static void
558ovn_port_destroy(struct hmap *ports, struct ovn_port *port)
559{
560 if (port) {
561 /* Don't remove port->list. It is used within build_ports() as a
562 * private list and once we've exited that function it is not safe to
563 * use it. */
564 hmap_remove(ports, &port->key_node);
e93b43d6
JP
565
566 for (int i = 0; i < port->n_lsp_addrs; i++) {
567 destroy_lport_addresses(&port->lsp_addrs[i]);
568 }
569 free(port->lsp_addrs);
570
571 for (int i = 0; i < port->n_ps_addrs; i++) {
572 destroy_lport_addresses(&port->ps_addrs[i]);
573 }
574 free(port->ps_addrs);
575
4685e523 576 destroy_lport_addresses(&port->lrp_networks);
9975d7be
BP
577 free(port->json_key);
578 free(port->key);
5868eb24
BP
579 free(port);
580 }
581}
582
583static struct ovn_port *
584ovn_port_find(struct hmap *ports, const char *name)
585{
586 struct ovn_port *op;
587
588 HMAP_FOR_EACH_WITH_HASH (op, key_node, hash_string(name, 0), ports) {
589 if (!strcmp(op->key, name)) {
590 return op;
591 }
592 }
593 return NULL;
594}
595
596static uint32_t
597ovn_port_allocate_key(struct ovn_datapath *od)
598{
599 return allocate_tnlid(&od->port_tnlids, "port",
600 (1u << 15) - 1, &od->port_key_hint);
601}
602
603static void
604join_logical_ports(struct northd_context *ctx,
605 struct hmap *datapaths, struct hmap *ports,
606 struct ovs_list *sb_only, struct ovs_list *nb_only,
607 struct ovs_list *both)
608{
609 hmap_init(ports);
417e7e66
BW
610 ovs_list_init(sb_only);
611 ovs_list_init(nb_only);
612 ovs_list_init(both);
5868eb24
BP
613
614 const struct sbrec_port_binding *sb;
615 SBREC_PORT_BINDING_FOR_EACH (sb, ctx->ovnsb_idl) {
616 struct ovn_port *op = ovn_port_create(ports, sb->logical_port,
9975d7be 617 NULL, NULL, sb);
417e7e66 618 ovs_list_push_back(sb_only, &op->list);
5868eb24
BP
619 }
620
621 struct ovn_datapath *od;
622 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
623 if (od->nbs) {
624 for (size_t i = 0; i < od->nbs->n_ports; i++) {
0ee00741
HK
625 const struct nbrec_logical_switch_port *nbsp
626 = od->nbs->ports[i];
627 struct ovn_port *op = ovn_port_find(ports, nbsp->name);
9975d7be 628 if (op) {
0ee00741 629 if (op->nbsp || op->nbrp) {
9975d7be
BP
630 static struct vlog_rate_limit rl
631 = VLOG_RATE_LIMIT_INIT(5, 1);
632 VLOG_WARN_RL(&rl, "duplicate logical port %s",
0ee00741 633 nbsp->name);
9975d7be
BP
634 continue;
635 }
0ee00741 636 op->nbsp = nbsp;
417e7e66
BW
637 ovs_list_remove(&op->list);
638 ovs_list_push_back(both, &op->list);
e93b43d6
JP
639
640 /* This port exists due to a SB binding, but should
641 * not have been initialized fully. */
642 ovs_assert(!op->n_lsp_addrs && !op->n_ps_addrs);
9975d7be 643 } else {
0ee00741 644 op = ovn_port_create(ports, nbsp->name, nbsp, NULL, NULL);
417e7e66 645 ovs_list_push_back(nb_only, &op->list);
9975d7be
BP
646 }
647
e93b43d6 648 op->lsp_addrs
0ee00741
HK
649 = xmalloc(sizeof *op->lsp_addrs * nbsp->n_addresses);
650 for (size_t j = 0; j < nbsp->n_addresses; j++) {
651 if (!strcmp(nbsp->addresses[j], "unknown")) {
e93b43d6
JP
652 continue;
653 }
0ee00741 654 if (!extract_lsp_addresses(nbsp->addresses[j],
e93b43d6
JP
655 &op->lsp_addrs[op->n_lsp_addrs])) {
656 static struct vlog_rate_limit rl
657 = VLOG_RATE_LIMIT_INIT(1, 1);
658 VLOG_INFO_RL(&rl, "invalid syntax '%s' in logical "
659 "switch port addresses. No MAC "
660 "address found",
0ee00741 661 op->nbsp->addresses[j]);
e93b43d6
JP
662 continue;
663 }
664 op->n_lsp_addrs++;
665 }
666
667 op->ps_addrs
0ee00741
HK
668 = xmalloc(sizeof *op->ps_addrs * nbsp->n_port_security);
669 for (size_t j = 0; j < nbsp->n_port_security; j++) {
670 if (!extract_lsp_addresses(nbsp->port_security[j],
e93b43d6
JP
671 &op->ps_addrs[op->n_ps_addrs])) {
672 static struct vlog_rate_limit rl
673 = VLOG_RATE_LIMIT_INIT(1, 1);
674 VLOG_INFO_RL(&rl, "invalid syntax '%s' in port "
675 "security. No MAC address found",
0ee00741 676 op->nbsp->port_security[j]);
e93b43d6
JP
677 continue;
678 }
679 op->n_ps_addrs++;
680 }
681
9975d7be
BP
682 op->od = od;
683 }
684 } else {
685 for (size_t i = 0; i < od->nbr->n_ports; i++) {
0ee00741
HK
686 const struct nbrec_logical_router_port *nbrp
687 = od->nbr->ports[i];
9975d7be 688
4685e523 689 struct lport_addresses lrp_networks;
0ee00741 690 if (!extract_lrp_networks(nbrp, &lrp_networks)) {
9975d7be
BP
691 static struct vlog_rate_limit rl
692 = VLOG_RATE_LIMIT_INIT(5, 1);
0ee00741 693 VLOG_WARN_RL(&rl, "bad 'mac' %s", nbrp->mac);
9975d7be
BP
694 continue;
695 }
696
4685e523 697 if (!lrp_networks.n_ipv4_addrs && !lrp_networks.n_ipv6_addrs) {
9975d7be
BP
698 continue;
699 }
700
0ee00741 701 struct ovn_port *op = ovn_port_find(ports, nbrp->name);
9975d7be 702 if (op) {
0ee00741 703 if (op->nbsp || op->nbrp) {
9975d7be
BP
704 static struct vlog_rate_limit rl
705 = VLOG_RATE_LIMIT_INIT(5, 1);
706 VLOG_WARN_RL(&rl, "duplicate logical router port %s",
0ee00741 707 nbrp->name);
9975d7be
BP
708 continue;
709 }
0ee00741 710 op->nbrp = nbrp;
417e7e66
BW
711 ovs_list_remove(&op->list);
712 ovs_list_push_back(both, &op->list);
4685e523
JP
713
714 /* This port exists but should not have been
715 * initialized fully. */
716 ovs_assert(!op->lrp_networks.n_ipv4_addrs
717 && !op->lrp_networks.n_ipv6_addrs);
9975d7be 718 } else {
0ee00741 719 op = ovn_port_create(ports, nbrp->name, NULL, nbrp, NULL);
417e7e66 720 ovs_list_push_back(nb_only, &op->list);
9975d7be
BP
721 }
722
4685e523 723 op->lrp_networks = lrp_networks;
9975d7be 724 op->od = od;
5868eb24 725 }
9975d7be
BP
726 }
727 }
728
729 /* Connect logical router ports, and logical switch ports of type "router",
730 * to their peers. */
731 struct ovn_port *op;
732 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741
HK
733 if (op->nbsp && !strcmp(op->nbsp->type, "router")) {
734 const char *peer_name = smap_get(&op->nbsp->options, "router-port");
9975d7be
BP
735 if (!peer_name) {
736 continue;
737 }
738
739 struct ovn_port *peer = ovn_port_find(ports, peer_name);
0ee00741 740 if (!peer || !peer->nbrp) {
9975d7be
BP
741 continue;
742 }
743
744 peer->peer = op;
745 op->peer = peer;
86e98048
BP
746 op->od->router_ports = xrealloc(
747 op->od->router_ports,
748 sizeof *op->od->router_ports * (op->od->n_router_ports + 1));
749 op->od->router_ports[op->od->n_router_ports++] = op;
0ee00741 750 } else if (op->nbrp && op->nbrp->peer) {
ad386c3f
BP
751 struct ovn_port *peer = ovn_port_find(ports, op->nbrp->peer);
752 if (peer) {
753 if (peer->nbrp) {
754 op->peer = peer;
755 } else {
756 /* An ovn_port for a switch port of type "router" does have
757 * a router port as its peer (see the case above for
758 * "router" ports), but this is set via options:router-port
759 * in Logical_Switch_Port and does not involve the
760 * Logical_Router_Port's 'peer' column. */
761 static struct vlog_rate_limit rl =
762 VLOG_RATE_LIMIT_INIT(5, 1);
763 VLOG_WARN_RL(&rl, "Bad configuration: The peer of router "
764 "port %s is a switch port", op->key);
765 }
766 }
5868eb24
BP
767 }
768 }
769}
770
771static void
772ovn_port_update_sbrec(const struct ovn_port *op)
773{
774 sbrec_port_binding_set_datapath(op->sb, op->od->sb);
0ee00741 775 if (op->nbrp) {
c1645003
GS
776 /* If the router is for l3 gateway, it resides on a chassis
777 * and its port type is "gateway". */
778 const char *chassis = smap_get(&op->od->nbr->options, "chassis");
779 if (chassis) {
780 sbrec_port_binding_set_type(op->sb, "gateway");
781 } else {
782 sbrec_port_binding_set_type(op->sb, "patch");
783 }
9975d7be
BP
784
785 const char *peer = op->peer ? op->peer->key : "<error>";
c1645003
GS
786 struct smap new;
787 smap_init(&new);
788 smap_add(&new, "peer", peer);
789 if (chassis) {
790 smap_add(&new, "gateway-chassis", chassis);
791 }
792 sbrec_port_binding_set_options(op->sb, &new);
793 smap_destroy(&new);
9975d7be
BP
794
795 sbrec_port_binding_set_parent_port(op->sb, NULL);
796 sbrec_port_binding_set_tag(op->sb, NULL, 0);
797 sbrec_port_binding_set_mac(op->sb, NULL, 0);
798 } else {
0ee00741
HK
799 if (strcmp(op->nbsp->type, "router")) {
800 sbrec_port_binding_set_type(op->sb, op->nbsp->type);
801 sbrec_port_binding_set_options(op->sb, &op->nbsp->options);
9975d7be 802 } else {
c1645003
GS
803 const char *chassis = NULL;
804 if (op->peer && op->peer->od && op->peer->od->nbr) {
805 chassis = smap_get(&op->peer->od->nbr->options, "chassis");
806 }
807
808 /* A switch port connected to a gateway router is also of
809 * type "gateway". */
810 if (chassis) {
811 sbrec_port_binding_set_type(op->sb, "gateway");
812 } else {
813 sbrec_port_binding_set_type(op->sb, "patch");
814 }
9975d7be 815
0ee00741 816 const char *router_port = smap_get(&op->nbsp->options,
9975d7be
BP
817 "router-port");
818 if (!router_port) {
819 router_port = "<error>";
820 }
c1645003
GS
821 struct smap new;
822 smap_init(&new);
823 smap_add(&new, "peer", router_port);
824 if (chassis) {
825 smap_add(&new, "gateway-chassis", chassis);
826 }
827 sbrec_port_binding_set_options(op->sb, &new);
828 smap_destroy(&new);
9975d7be 829 }
0ee00741
HK
830 sbrec_port_binding_set_parent_port(op->sb, op->nbsp->parent_name);
831 sbrec_port_binding_set_tag(op->sb, op->nbsp->tag, op->nbsp->n_tag);
832 sbrec_port_binding_set_mac(op->sb, (const char **) op->nbsp->addresses,
833 op->nbsp->n_addresses);
9975d7be 834 }
5868eb24
BP
835}
836
0bac7164 837/* Updates the southbound Port_Binding table so that it contains the logical
80f408f4 838 * switch ports specified by the northbound database.
0bac7164
BP
839 *
840 * Initializes 'ports' to contain a "struct ovn_port" for every logical port,
841 * using the "struct ovn_datapath"s in 'datapaths' to look up logical
842 * datapaths. */
5868eb24
BP
843static void
844build_ports(struct northd_context *ctx, struct hmap *datapaths,
845 struct hmap *ports)
846{
847 struct ovs_list sb_only, nb_only, both;
848
849 join_logical_ports(ctx, datapaths, ports, &sb_only, &nb_only, &both);
850
851 /* For logical ports that are in both databases, update the southbound
852 * record based on northbound data. Also index the in-use tunnel_keys. */
853 struct ovn_port *op, *next;
854 LIST_FOR_EACH_SAFE (op, next, list, &both) {
855 ovn_port_update_sbrec(op);
856
857 add_tnlid(&op->od->port_tnlids, op->sb->tunnel_key);
858 if (op->sb->tunnel_key > op->od->port_key_hint) {
859 op->od->port_key_hint = op->sb->tunnel_key;
860 }
861 }
862
863 /* Add southbound record for each unmatched northbound record. */
864 LIST_FOR_EACH_SAFE (op, next, list, &nb_only) {
865 uint16_t tunnel_key = ovn_port_allocate_key(op->od);
866 if (!tunnel_key) {
867 continue;
868 }
869
870 op->sb = sbrec_port_binding_insert(ctx->ovnsb_txn);
871 ovn_port_update_sbrec(op);
872
873 sbrec_port_binding_set_logical_port(op->sb, op->key);
874 sbrec_port_binding_set_tunnel_key(op->sb, tunnel_key);
875 }
876
877 /* Delete southbound records without northbound matches. */
878 LIST_FOR_EACH_SAFE(op, next, list, &sb_only) {
417e7e66 879 ovs_list_remove(&op->list);
5868eb24
BP
880 sbrec_port_binding_delete(op->sb);
881 ovn_port_destroy(ports, op);
882 }
883}
884\f
885#define OVN_MIN_MULTICAST 32768
886#define OVN_MAX_MULTICAST 65535
887
888struct multicast_group {
889 const char *name;
890 uint16_t key; /* OVN_MIN_MULTICAST...OVN_MAX_MULTICAST. */
891};
892
893#define MC_FLOOD "_MC_flood"
894static const struct multicast_group mc_flood = { MC_FLOOD, 65535 };
895
896#define MC_UNKNOWN "_MC_unknown"
897static const struct multicast_group mc_unknown = { MC_UNKNOWN, 65534 };
898
899static bool
900multicast_group_equal(const struct multicast_group *a,
901 const struct multicast_group *b)
902{
903 return !strcmp(a->name, b->name) && a->key == b->key;
904}
905
906/* Multicast group entry. */
907struct ovn_multicast {
908 struct hmap_node hmap_node; /* Index on 'datapath' and 'key'. */
909 struct ovn_datapath *datapath;
910 const struct multicast_group *group;
911
912 struct ovn_port **ports;
913 size_t n_ports, allocated_ports;
914};
915
916static uint32_t
917ovn_multicast_hash(const struct ovn_datapath *datapath,
918 const struct multicast_group *group)
919{
920 return hash_pointer(datapath, group->key);
921}
922
923static struct ovn_multicast *
924ovn_multicast_find(struct hmap *mcgroups, struct ovn_datapath *datapath,
925 const struct multicast_group *group)
926{
927 struct ovn_multicast *mc;
928
929 HMAP_FOR_EACH_WITH_HASH (mc, hmap_node,
930 ovn_multicast_hash(datapath, group), mcgroups) {
931 if (mc->datapath == datapath
932 && multicast_group_equal(mc->group, group)) {
933 return mc;
4edcdcf4
RB
934 }
935 }
5868eb24
BP
936 return NULL;
937}
938
939static void
940ovn_multicast_add(struct hmap *mcgroups, const struct multicast_group *group,
941 struct ovn_port *port)
942{
943 struct ovn_datapath *od = port->od;
944 struct ovn_multicast *mc = ovn_multicast_find(mcgroups, od, group);
945 if (!mc) {
946 mc = xmalloc(sizeof *mc);
947 hmap_insert(mcgroups, &mc->hmap_node, ovn_multicast_hash(od, group));
948 mc->datapath = od;
949 mc->group = group;
950 mc->n_ports = 0;
951 mc->allocated_ports = 4;
952 mc->ports = xmalloc(mc->allocated_ports * sizeof *mc->ports);
953 }
954 if (mc->n_ports >= mc->allocated_ports) {
955 mc->ports = x2nrealloc(mc->ports, &mc->allocated_ports,
956 sizeof *mc->ports);
957 }
958 mc->ports[mc->n_ports++] = port;
959}
4edcdcf4 960
5868eb24
BP
961static void
962ovn_multicast_destroy(struct hmap *mcgroups, struct ovn_multicast *mc)
963{
964 if (mc) {
965 hmap_remove(mcgroups, &mc->hmap_node);
966 free(mc->ports);
967 free(mc);
968 }
969}
4edcdcf4 970
5868eb24
BP
971static void
972ovn_multicast_update_sbrec(const struct ovn_multicast *mc,
973 const struct sbrec_multicast_group *sb)
974{
975 struct sbrec_port_binding **ports = xmalloc(mc->n_ports * sizeof *ports);
976 for (size_t i = 0; i < mc->n_ports; i++) {
977 ports[i] = CONST_CAST(struct sbrec_port_binding *, mc->ports[i]->sb);
978 }
979 sbrec_multicast_group_set_ports(sb, ports, mc->n_ports);
980 free(ports);
4edcdcf4 981}
bd39395f 982\f
48605550 983/* Logical flow generation.
bd39395f 984 *
48605550 985 * This code generates the Logical_Flow table in the southbound database, as a
bd39395f
BP
986 * function of most of the northbound database.
987 */
988
5868eb24
BP
989struct ovn_lflow {
990 struct hmap_node hmap_node;
bd39395f 991
5868eb24 992 struct ovn_datapath *od;
880fcd14 993 enum ovn_stage stage;
5868eb24
BP
994 uint16_t priority;
995 char *match;
996 char *actions;
bd39395f
BP
997};
998
999static size_t
5868eb24 1000ovn_lflow_hash(const struct ovn_lflow *lflow)
bd39395f 1001{
5868eb24 1002 size_t hash = uuid_hash(&lflow->od->key);
880fcd14 1003 hash = hash_2words((lflow->stage << 16) | lflow->priority, hash);
5868eb24
BP
1004 hash = hash_string(lflow->match, hash);
1005 return hash_string(lflow->actions, hash);
bd39395f
BP
1006}
1007
5868eb24
BP
1008static bool
1009ovn_lflow_equal(const struct ovn_lflow *a, const struct ovn_lflow *b)
1010{
1011 return (a->od == b->od
880fcd14 1012 && a->stage == b->stage
5868eb24
BP
1013 && a->priority == b->priority
1014 && !strcmp(a->match, b->match)
1015 && !strcmp(a->actions, b->actions));
1016}
1017
1018static void
1019ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
880fcd14 1020 enum ovn_stage stage, uint16_t priority,
5868eb24 1021 char *match, char *actions)
bd39395f 1022{
5868eb24 1023 lflow->od = od;
880fcd14 1024 lflow->stage = stage;
5868eb24
BP
1025 lflow->priority = priority;
1026 lflow->match = match;
1027 lflow->actions = actions;
bd39395f
BP
1028}
1029
48605550 1030/* Adds a row with the specified contents to the Logical_Flow table. */
bd39395f 1031static void
5868eb24 1032ovn_lflow_add(struct hmap *lflow_map, struct ovn_datapath *od,
880fcd14 1033 enum ovn_stage stage, uint16_t priority,
5868eb24
BP
1034 const char *match, const char *actions)
1035{
9a9961d2
BP
1036 ovs_assert(ovn_stage_to_datapath_type(stage) == ovn_datapath_get_type(od));
1037
5868eb24 1038 struct ovn_lflow *lflow = xmalloc(sizeof *lflow);
880fcd14 1039 ovn_lflow_init(lflow, od, stage, priority,
5868eb24
BP
1040 xstrdup(match), xstrdup(actions));
1041 hmap_insert(lflow_map, &lflow->hmap_node, ovn_lflow_hash(lflow));
1042}
1043
1044static struct ovn_lflow *
1045ovn_lflow_find(struct hmap *lflows, struct ovn_datapath *od,
880fcd14 1046 enum ovn_stage stage, uint16_t priority,
5868eb24
BP
1047 const char *match, const char *actions)
1048{
1049 struct ovn_lflow target;
880fcd14 1050 ovn_lflow_init(&target, od, stage, priority,
5868eb24
BP
1051 CONST_CAST(char *, match), CONST_CAST(char *, actions));
1052
1053 struct ovn_lflow *lflow;
1054 HMAP_FOR_EACH_WITH_HASH (lflow, hmap_node, ovn_lflow_hash(&target),
1055 lflows) {
1056 if (ovn_lflow_equal(lflow, &target)) {
1057 return lflow;
bd39395f
BP
1058 }
1059 }
5868eb24
BP
1060 return NULL;
1061}
bd39395f 1062
5868eb24
BP
1063static void
1064ovn_lflow_destroy(struct hmap *lflows, struct ovn_lflow *lflow)
1065{
1066 if (lflow) {
1067 hmap_remove(lflows, &lflow->hmap_node);
1068 free(lflow->match);
1069 free(lflow->actions);
1070 free(lflow);
1071 }
bd39395f
BP
1072}
1073
bd39395f 1074/* Appends port security constraints on L2 address field 'eth_addr_field'
e93b43d6
JP
1075 * (e.g. "eth.src" or "eth.dst") to 'match'. 'ps_addrs', with 'n_ps_addrs'
1076 * elements, is the collection of port_security constraints from an
1077 * OVN_NB Logical_Switch_Port row generated by extract_lsp_addresses(). */
bd39395f 1078static void
685f4dfe 1079build_port_security_l2(const char *eth_addr_field,
e93b43d6
JP
1080 struct lport_addresses *ps_addrs,
1081 unsigned int n_ps_addrs,
685f4dfe 1082 struct ds *match)
bd39395f 1083{
e93b43d6
JP
1084 if (!n_ps_addrs) {
1085 return;
1086 }
bd39395f 1087
e93b43d6 1088 ds_put_format(match, " && %s == {", eth_addr_field);
f7cb14cd 1089
e93b43d6
JP
1090 for (size_t i = 0; i < n_ps_addrs; i++) {
1091 ds_put_format(match, "%s ", ps_addrs[i].ea_s);
bd39395f 1092 }
f7cb14cd 1093 ds_chomp(match, ' ');
bd39395f 1094 ds_put_cstr(match, "}");
bd39395f
BP
1095}
1096
685f4dfe
NS
1097static void
1098build_port_security_ipv6_nd_flow(
1099 struct ds *match, struct eth_addr ea, struct ipv6_netaddr *ipv6_addrs,
1100 int n_ipv6_addrs)
1101{
1102 ds_put_format(match, " && ip6 && nd && ((nd.sll == "ETH_ADDR_FMT" || "
1103 "nd.sll == "ETH_ADDR_FMT") || ((nd.tll == "ETH_ADDR_FMT" || "
1104 "nd.tll == "ETH_ADDR_FMT")", ETH_ADDR_ARGS(eth_addr_zero),
1105 ETH_ADDR_ARGS(ea), ETH_ADDR_ARGS(eth_addr_zero),
1106 ETH_ADDR_ARGS(ea));
1107 if (!n_ipv6_addrs) {
1108 ds_put_cstr(match, "))");
1109 return;
1110 }
1111
1112 char ip6_str[INET6_ADDRSTRLEN + 1];
1113 struct in6_addr lla;
1114 in6_generate_lla(ea, &lla);
1115 memset(ip6_str, 0, sizeof(ip6_str));
1116 ipv6_string_mapped(ip6_str, &lla);
1117 ds_put_format(match, " && (nd.target == %s", ip6_str);
1118
1119 for(int i = 0; i < n_ipv6_addrs; i++) {
1120 memset(ip6_str, 0, sizeof(ip6_str));
1121 ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr);
1122 ds_put_format(match, " || nd.target == %s", ip6_str);
1123 }
1124
1125 ds_put_format(match, ")))");
1126}
1127
1128static void
1129build_port_security_ipv6_flow(
1130 enum ovn_pipeline pipeline, struct ds *match, struct eth_addr ea,
1131 struct ipv6_netaddr *ipv6_addrs, int n_ipv6_addrs)
1132{
1133 char ip6_str[INET6_ADDRSTRLEN + 1];
1134
1135 ds_put_format(match, " && %s == {",
1136 pipeline == P_IN ? "ip6.src" : "ip6.dst");
1137
1138 /* Allow link-local address. */
1139 struct in6_addr lla;
1140 in6_generate_lla(ea, &lla);
1141 ipv6_string_mapped(ip6_str, &lla);
1142 ds_put_format(match, "%s, ", ip6_str);
1143
9e687b23
DL
1144 /* Allow ip6.dst=ff00::/8 for multicast packets */
1145 if (pipeline == P_OUT) {
1146 ds_put_cstr(match, "ff00::/8, ");
1147 }
685f4dfe
NS
1148 for(int i = 0; i < n_ipv6_addrs; i++) {
1149 ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr);
9e687b23 1150 ds_put_format(match, "%s, ", ip6_str);
685f4dfe 1151 }
9e687b23
DL
1152 /* Replace ", " by "}". */
1153 ds_chomp(match, ' ');
1154 ds_chomp(match, ',');
685f4dfe
NS
1155 ds_put_cstr(match, "}");
1156}
1157
1158/**
1159 * Build port security constraints on ARP and IPv6 ND fields
1160 * and add logical flows to S_SWITCH_IN_PORT_SEC_ND stage.
1161 *
1162 * For each port security of the logical port, following
1163 * logical flows are added
1164 * - If the port security has no IP (both IPv4 and IPv6) or
1165 * if it has IPv4 address(es)
1166 * - Priority 90 flow to allow ARP packets for known MAC addresses
1167 * in the eth.src and arp.spa fields. If the port security
1168 * has IPv4 addresses, allow known IPv4 addresses in the arp.tpa field.
1169 *
1170 * - If the port security has no IP (both IPv4 and IPv6) or
1171 * if it has IPv6 address(es)
1172 * - Priority 90 flow to allow IPv6 ND packets for known MAC addresses
1173 * in the eth.src and nd.sll/nd.tll fields. If the port security
1174 * has IPv6 addresses, allow known IPv6 addresses in the nd.target field
1175 * for IPv6 Neighbor Advertisement packet.
1176 *
1177 * - Priority 80 flow to drop ARP and IPv6 ND packets.
1178 */
1179static void
1180build_port_security_nd(struct ovn_port *op, struct hmap *lflows)
1181{
e93b43d6
JP
1182 struct ds match = DS_EMPTY_INITIALIZER;
1183
1184 for (size_t i = 0; i < op->n_ps_addrs; i++) {
1185 struct lport_addresses *ps = &op->ps_addrs[i];
685f4dfe 1186
e93b43d6 1187 bool no_ip = !(ps->n_ipv4_addrs || ps->n_ipv6_addrs);
685f4dfe 1188
e93b43d6
JP
1189 ds_clear(&match);
1190 if (ps->n_ipv4_addrs || no_ip) {
1191 ds_put_format(&match,
1192 "inport == %s && eth.src == %s && arp.sha == %s",
1193 op->json_key, ps->ea_s, ps->ea_s);
685f4dfe 1194
e93b43d6
JP
1195 if (ps->n_ipv4_addrs) {
1196 ds_put_cstr(&match, " && arp.spa == {");
f95523c0 1197 for (size_t j = 0; j < ps->n_ipv4_addrs; j++) {
7d9d86ad
NS
1198 /* When the netmask is applied, if the host portion is
1199 * non-zero, the host can only use the specified
1200 * address in the arp.spa. If zero, the host is allowed
1201 * to use any address in the subnet. */
f95523c0
JP
1202 if (ps->ipv4_addrs[j].plen == 32
1203 || ps->ipv4_addrs[j].addr & ~ps->ipv4_addrs[j].mask) {
1204 ds_put_cstr(&match, ps->ipv4_addrs[j].addr_s);
7d9d86ad 1205 } else {
e93b43d6 1206 ds_put_format(&match, "%s/%d",
f95523c0
JP
1207 ps->ipv4_addrs[j].network_s,
1208 ps->ipv4_addrs[j].plen);
7d9d86ad 1209 }
e93b43d6 1210 ds_put_cstr(&match, ", ");
685f4dfe
NS
1211 }
1212 ds_chomp(&match, ' ');
e93b43d6
JP
1213 ds_chomp(&match, ',');
1214 ds_put_cstr(&match, "}");
685f4dfe
NS
1215 }
1216 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90,
1217 ds_cstr(&match), "next;");
685f4dfe
NS
1218 }
1219
e93b43d6
JP
1220 if (ps->n_ipv6_addrs || no_ip) {
1221 ds_clear(&match);
1222 ds_put_format(&match, "inport == %s && eth.src == %s",
1223 op->json_key, ps->ea_s);
1224 build_port_security_ipv6_nd_flow(&match, ps->ea, ps->ipv6_addrs,
1225 ps->n_ipv6_addrs);
685f4dfe
NS
1226 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90,
1227 ds_cstr(&match), "next;");
685f4dfe 1228 }
685f4dfe
NS
1229 }
1230
e93b43d6
JP
1231 ds_clear(&match);
1232 ds_put_format(&match, "inport == %s && (arp || nd)", op->json_key);
685f4dfe 1233 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 80,
e93b43d6
JP
1234 ds_cstr(&match), "drop;");
1235 ds_destroy(&match);
685f4dfe
NS
1236}
1237
1238/**
1239 * Build port security constraints on IPv4 and IPv6 src and dst fields
1240 * and add logical flows to S_SWITCH_(IN/OUT)_PORT_SEC_IP stage.
1241 *
1242 * For each port security of the logical port, following
1243 * logical flows are added
1244 * - If the port security has IPv4 addresses,
1245 * - Priority 90 flow to allow IPv4 packets for known IPv4 addresses
1246 *
1247 * - If the port security has IPv6 addresses,
1248 * - Priority 90 flow to allow IPv6 packets for known IPv6 addresses
1249 *
1250 * - If the port security has IPv4 addresses or IPv6 addresses or both
1251 * - Priority 80 flow to drop all IPv4 and IPv6 traffic
1252 */
1253static void
1254build_port_security_ip(enum ovn_pipeline pipeline, struct ovn_port *op,
1255 struct hmap *lflows)
1256{
1257 char *port_direction;
1258 enum ovn_stage stage;
1259 if (pipeline == P_IN) {
1260 port_direction = "inport";
1261 stage = S_SWITCH_IN_PORT_SEC_IP;
1262 } else {
1263 port_direction = "outport";
1264 stage = S_SWITCH_OUT_PORT_SEC_IP;
1265 }
1266
e93b43d6
JP
1267 for (size_t i = 0; i < op->n_ps_addrs; i++) {
1268 struct lport_addresses *ps = &op->ps_addrs[i];
685f4dfe 1269
e93b43d6 1270 if (!(ps->n_ipv4_addrs || ps->n_ipv6_addrs)) {
685f4dfe
NS
1271 continue;
1272 }
1273
e93b43d6 1274 if (ps->n_ipv4_addrs) {
685f4dfe
NS
1275 struct ds match = DS_EMPTY_INITIALIZER;
1276 if (pipeline == P_IN) {
9e687b23
DL
1277 /* Permit use of the unspecified address for DHCP discovery */
1278 struct ds dhcp_match = DS_EMPTY_INITIALIZER;
1279 ds_put_format(&dhcp_match, "inport == %s"
e93b43d6 1280 " && eth.src == %s"
9e687b23
DL
1281 " && ip4.src == 0.0.0.0"
1282 " && ip4.dst == 255.255.255.255"
e93b43d6
JP
1283 " && udp.src == 68 && udp.dst == 67",
1284 op->json_key, ps->ea_s);
9e687b23
DL
1285 ovn_lflow_add(lflows, op->od, stage, 90,
1286 ds_cstr(&dhcp_match), "next;");
1287 ds_destroy(&dhcp_match);
e93b43d6 1288 ds_put_format(&match, "inport == %s && eth.src == %s"
9e687b23 1289 " && ip4.src == {", op->json_key,
e93b43d6 1290 ps->ea_s);
685f4dfe 1291 } else {
e93b43d6 1292 ds_put_format(&match, "outport == %s && eth.dst == %s"
685f4dfe 1293 " && ip4.dst == {255.255.255.255, 224.0.0.0/4, ",
e93b43d6 1294 op->json_key, ps->ea_s);
685f4dfe
NS
1295 }
1296
f95523c0
JP
1297 for (int j = 0; j < ps->n_ipv4_addrs; j++) {
1298 ovs_be32 mask = ps->ipv4_addrs[j].mask;
7d9d86ad
NS
1299 /* When the netmask is applied, if the host portion is
1300 * non-zero, the host can only use the specified
1301 * address. If zero, the host is allowed to use any
1302 * address in the subnet.
e93b43d6 1303 */
f95523c0
JP
1304 if (ps->ipv4_addrs[j].plen == 32
1305 || ps->ipv4_addrs[j].addr & ~mask) {
1306 ds_put_format(&match, "%s", ps->ipv4_addrs[j].addr_s);
1307 if (pipeline == P_OUT && ps->ipv4_addrs[j].plen != 32) {
e93b43d6
JP
1308 /* Host is also allowed to receive packets to the
1309 * broadcast address in the specified subnet. */
1310 ds_put_format(&match, ", %s",
f95523c0 1311 ps->ipv4_addrs[j].bcast_s);
7d9d86ad
NS
1312 }
1313 } else {
1314 /* host portion is zero */
f95523c0
JP
1315 ds_put_format(&match, "%s/%d", ps->ipv4_addrs[j].network_s,
1316 ps->ipv4_addrs[j].plen);
7d9d86ad
NS
1317 }
1318 ds_put_cstr(&match, ", ");
685f4dfe
NS
1319 }
1320
1321 /* Replace ", " by "}". */
1322 ds_chomp(&match, ' ');
1323 ds_chomp(&match, ',');
1324 ds_put_cstr(&match, "}");
1325 ovn_lflow_add(lflows, op->od, stage, 90, ds_cstr(&match), "next;");
1326 ds_destroy(&match);
685f4dfe
NS
1327 }
1328
e93b43d6 1329 if (ps->n_ipv6_addrs) {
685f4dfe 1330 struct ds match = DS_EMPTY_INITIALIZER;
9e687b23
DL
1331 if (pipeline == P_IN) {
1332 /* Permit use of unspecified address for duplicate address
1333 * detection */
1334 struct ds dad_match = DS_EMPTY_INITIALIZER;
1335 ds_put_format(&dad_match, "inport == %s"
e93b43d6 1336 " && eth.src == %s"
9e687b23
DL
1337 " && ip6.src == ::"
1338 " && ip6.dst == ff02::/16"
1339 " && icmp6.type == {131, 135, 143}", op->json_key,
e93b43d6 1340 ps->ea_s);
9e687b23
DL
1341 ovn_lflow_add(lflows, op->od, stage, 90,
1342 ds_cstr(&dad_match), "next;");
1343 ds_destroy(&dad_match);
1344 }
e93b43d6 1345 ds_put_format(&match, "%s == %s && %s == %s",
685f4dfe 1346 port_direction, op->json_key,
e93b43d6
JP
1347 pipeline == P_IN ? "eth.src" : "eth.dst", ps->ea_s);
1348 build_port_security_ipv6_flow(pipeline, &match, ps->ea,
1349 ps->ipv6_addrs, ps->n_ipv6_addrs);
685f4dfe
NS
1350 ovn_lflow_add(lflows, op->od, stage, 90,
1351 ds_cstr(&match), "next;");
1352 ds_destroy(&match);
685f4dfe
NS
1353 }
1354
e93b43d6
JP
1355 char *match = xasprintf("%s == %s && %s == %s && ip",
1356 port_direction, op->json_key,
1357 pipeline == P_IN ? "eth.src" : "eth.dst",
1358 ps->ea_s);
685f4dfe
NS
1359 ovn_lflow_add(lflows, op->od, stage, 80, match, "drop;");
1360 free(match);
1361 }
f2a715b5 1362
685f4dfe
NS
1363}
1364
95a9a275 1365static bool
80f408f4 1366lsp_is_enabled(const struct nbrec_logical_switch_port *lsp)
95a9a275 1367{
80f408f4 1368 return !lsp->enabled || *lsp->enabled;
95a9a275
RB
1369}
1370
4c7bf534 1371static bool
80f408f4 1372lsp_is_up(const struct nbrec_logical_switch_port *lsp)
4c7bf534 1373{
80f408f4 1374 return !lsp->up || *lsp->up;
4c7bf534
NS
1375}
1376
281977f7
NS
1377static bool
1378build_dhcpv4_action(struct ovn_port *op, ovs_be32 offer_ip,
1379 struct ds *options_action, struct ds *response_action)
1380{
1381 if (!op->nbsp->dhcpv4_options) {
1382 /* CMS has disabled native DHCPv4 for this lport. */
1383 return false;
1384 }
1385
1386 ovs_be32 host_ip, mask;
1387 char *error = ip_parse_masked(op->nbsp->dhcpv4_options->cidr, &host_ip,
1388 &mask);
1389 if (error || ((offer_ip ^ host_ip) & mask)) {
1390 /* Either
1391 * - cidr defined is invalid or
1392 * - the offer ip of the logical port doesn't belong to the cidr
1393 * defined in the DHCPv4 options.
1394 * */
1395 free(error);
1396 return false;
1397 }
1398
1399 const char *server_ip = smap_get(
1400 &op->nbsp->dhcpv4_options->options, "server_id");
1401 const char *server_mac = smap_get(
1402 &op->nbsp->dhcpv4_options->options, "server_mac");
1403 const char *lease_time = smap_get(
1404 &op->nbsp->dhcpv4_options->options, "lease_time");
1405 const char *router = smap_get(
1406 &op->nbsp->dhcpv4_options->options, "router");
1407
1408 if (!(server_ip && server_mac && lease_time && router)) {
1409 /* "server_id", "server_mac", "lease_time" and "router" should be
1410 * present in the dhcp_options. */
1411 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1412 VLOG_WARN_RL(&rl, "Required DHCPv4 options not defined for lport - %s",
1413 op->json_key);
1414 return false;
1415 }
1416
1417 struct smap dhcpv4_options = SMAP_INITIALIZER(&dhcpv4_options);
1418 smap_clone(&dhcpv4_options, &op->nbsp->dhcpv4_options->options);
1419
1420 /* server_mac is not DHCPv4 option, delete it from the smap. */
1421 smap_remove(&dhcpv4_options, "server_mac");
1422 char *netmask = xasprintf(IP_FMT, IP_ARGS(mask));
1423 smap_add(&dhcpv4_options, "netmask", netmask);
1424 free(netmask);
1425
1426 ds_put_format(options_action,
1427 REGBIT_DHCP_OPTS_RESULT" = put_dhcp_opts(offerip = "
1428 IP_FMT", ", IP_ARGS(offer_ip));
1429 struct smap_node *node;
1430 SMAP_FOR_EACH(node, &dhcpv4_options) {
1431 ds_put_format(options_action, "%s = %s, ", node->key, node->value);
1432 }
1433
1434 ds_chomp(options_action, ' ');
1435 ds_chomp(options_action, ',');
1436 ds_put_cstr(options_action, "); next;");
1437
1438 ds_put_format(response_action, "eth.dst = eth.src; eth.src = %s; "
1439 "ip4.dst = "IP_FMT"; ip4.src = %s; udp.src = 67; "
1440 "udp.dst = 68; outport = inport; inport = \"\";"
1441 " /* Allow sending out inport. */ output;",
1442 server_mac, IP_ARGS(offer_ip), server_ip);
1443
1444 smap_destroy(&dhcpv4_options);
1445 return true;
1446}
1447
78aab811
JP
1448static bool
1449has_stateful_acl(struct ovn_datapath *od)
1450{
9975d7be
BP
1451 for (size_t i = 0; i < od->nbs->n_acls; i++) {
1452 struct nbrec_acl *acl = od->nbs->acls[i];
78aab811
JP
1453 if (!strcmp(acl->action, "allow-related")) {
1454 return true;
1455 }
1456 }
1457
1458 return false;
1459}
1460
1461static void
2d018f9b
GS
1462build_pre_acls(struct ovn_datapath *od, struct hmap *lflows,
1463 struct hmap *ports)
78aab811
JP
1464{
1465 bool has_stateful = has_stateful_acl(od);
48fcdb47 1466 struct ovn_port *op;
78aab811
JP
1467
1468 /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are
1469 * allowed by default. */
880fcd14
BP
1470 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 0, "1", "next;");
1471 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 0, "1", "next;");
78aab811 1472
78aab811
JP
1473 /* If there are any stateful ACL rules in this dapapath, we must
1474 * send all IP packets through the conntrack action, which handles
1475 * defragmentation, in order to match L4 headers. */
1476 if (has_stateful) {
48fcdb47 1477 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 1478 if (op->od == od && !strcmp(op->nbsp->type, "router")) {
501f95e1
JP
1479 /* Can't use ct() for router ports. Consider the
1480 * following configuration: lp1(10.0.0.2) on
1481 * hostA--ls1--lr0--ls2--lp2(10.0.1.2) on hostB, For a
1482 * ping from lp1 to lp2, First, the response will go
1483 * through ct() with a zone for lp2 in the ls2 ingress
1484 * pipeline on hostB. That ct zone knows about this
1485 * connection. Next, it goes through ct() with the zone
1486 * for the router port in the egress pipeline of ls2 on
1487 * hostB. This zone does not know about the connection,
1488 * as the icmp request went through the logical router
1489 * on hostA, not hostB. This would only work with
1490 * distributed conntrack state across all chassis. */
1491 struct ds match_in = DS_EMPTY_INITIALIZER;
1492 struct ds match_out = DS_EMPTY_INITIALIZER;
1493
48fcdb47
WL
1494 ds_put_format(&match_in, "ip && inport == %s", op->json_key);
1495 ds_put_format(&match_out, "ip && outport == %s", op->json_key);
501f95e1
JP
1496 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110,
1497 ds_cstr(&match_in), "next;");
1498 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110,
1499 ds_cstr(&match_out), "next;");
48fcdb47
WL
1500
1501 ds_destroy(&match_in);
1502 ds_destroy(&match_out);
1503 }
1504 }
2d018f9b
GS
1505 /* Ingress and Egress Pre-ACL Table (Priority 110).
1506 *
1507 * Not to do conntrack on ND packets. */
1508 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110, "nd", "next;");
1509 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110, "nd", "next;");
48fcdb47 1510
78aab811
JP
1511 /* Ingress and Egress Pre-ACL Table (Priority 100).
1512 *
1513 * Regardless of whether the ACL is "from-lport" or "to-lport",
1514 * we need rules in both the ingress and egress table, because
facf8652
GS
1515 * the return traffic needs to be followed.
1516 *
1517 * 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send
1518 * it to conntrack for tracking and defragmentation. */
1519 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 100, "ip",
1520 REGBIT_CONNTRACK_DEFRAG" = 1; next;");
1521 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip",
1522 REGBIT_CONNTRACK_DEFRAG" = 1; next;");
2d018f9b
GS
1523 }
1524}
78aab811 1525
7a15be69
GS
1526/* For a 'key' of the form "IP:port" or just "IP", sets 'port' and
1527 * 'ip_address'. The caller must free() the memory allocated for
1528 * 'ip_address'. */
1529static void
1530ip_address_and_port_from_lb_key(const char *key, char **ip_address,
1531 uint16_t *port)
1532{
1533 char *ip_str, *start, *next;
1534 *ip_address = NULL;
1535 *port = 0;
1536
1537 next = start = xstrdup(key);
1538 ip_str = strsep(&next, ":");
1539 if (!ip_str || !ip_str[0]) {
1540 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1541 VLOG_WARN_RL(&rl, "bad ip address for load balancer key %s", key);
1542 free(start);
1543 return;
1544 }
1545
1546 ovs_be32 ip, mask;
1547 char *error = ip_parse_masked(ip_str, &ip, &mask);
1548 if (error || mask != OVS_BE32_MAX) {
1549 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1550 VLOG_WARN_RL(&rl, "bad ip address for load balancer key %s", key);
1551 free(start);
1552 free(error);
1553 return;
1554 }
1555
1556 int l4_port = 0;
1557 if (next && next[0]) {
1558 if (!str_to_int(next, 0, &l4_port) || l4_port < 0 || l4_port > 65535) {
1559 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1560 VLOG_WARN_RL(&rl, "bad ip port for load balancer key %s", key);
1561 free(start);
1562 return;
1563 }
1564 }
1565
1566 *port = l4_port;
1567 *ip_address = strdup(ip_str);
1568 free(start);
1569}
1570
1571static void
1572build_pre_lb(struct ovn_datapath *od, struct hmap *lflows)
1573{
1574 /* Allow all packets to go to next tables by default. */
1575 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 0, "1", "next;");
1576 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 0, "1", "next;");
1577
1578 struct sset all_ips = SSET_INITIALIZER(&all_ips);
1579 if (od->nbs->load_balancer) {
1580 struct nbrec_load_balancer *lb = od->nbs->load_balancer;
1581 struct smap *vips = &lb->vips;
1582 struct smap_node *node;
1583 bool vip_configured = false;
1584
1585 SMAP_FOR_EACH (node, vips) {
1586 vip_configured = true;
1587
1588 /* node->key contains IP:port or just IP. */
1589 char *ip_address = NULL;
1590 uint16_t port;
1591 ip_address_and_port_from_lb_key(node->key, &ip_address, &port);
1592 if (!ip_address) {
1593 continue;
1594 }
1595
1596 if (!sset_contains(&all_ips, ip_address)) {
1597 sset_add(&all_ips, ip_address);
1598 }
1599
1600 free(ip_address);
1601
1602 /* Ignore L4 port information in the key because fragmented packets
1603 * may not have L4 information. The pre-stateful table will send
1604 * the packet through ct() action to de-fragment. In stateful
1605 * table, we will eventually look at L4 information. */
1606 }
1607
1608 /* 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send
1609 * packet to conntrack for defragmentation. */
1610 const char *ip_address;
1611 SSET_FOR_EACH(ip_address, &all_ips) {
1612 char *match = xasprintf("ip && ip4.dst == %s", ip_address);
1613 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB,
1614 100, match, REGBIT_CONNTRACK_DEFRAG" = 1; next;");
1615 free(match);
1616 }
1617
1618 sset_destroy(&all_ips);
1619
1620 if (vip_configured) {
1621 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB,
1622 100, "ip", REGBIT_CONNTRACK_DEFRAG" = 1; next;");
1623 }
1624 }
1625}
1626
facf8652
GS
1627static void
1628build_pre_stateful(struct ovn_datapath *od, struct hmap *lflows)
1629{
1630 /* Ingress and Egress pre-stateful Table (Priority 0): Packets are
1631 * allowed by default. */
1632 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 0, "1", "next;");
1633 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 0, "1", "next;");
1634
1635 /* If REGBIT_CONNTRACK_DEFRAG is set as 1, then the packets should be
1636 * sent to conntrack for tracking and defragmentation. */
1637 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 100,
1638 REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;");
1639 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 100,
1640 REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;");
1641}
1642
2d018f9b
GS
1643static void
1644build_acls(struct ovn_datapath *od, struct hmap *lflows)
1645{
1646 bool has_stateful = has_stateful_acl(od);
e75451fe 1647
2d018f9b
GS
1648 /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
1649 * default. A related rule at priority 1 is added below if there
1650 * are any stateful ACLs in this datapath. */
1651 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 0, "1", "next;");
1652 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 0, "1", "next;");
1653
1654 if (has_stateful) {
78aab811
JP
1655 /* Ingress and Egress ACL Table (Priority 1).
1656 *
1657 * By default, traffic is allowed. This is partially handled by
1658 * the Priority 0 ACL flows added earlier, but we also need to
1659 * commit IP flows. This is because, while the initiater's
1660 * direction may not have any stateful rules, the server's may
1661 * and then its return traffic would not have an associated
cc58e1f2
RB
1662 * conntrack entry and would return "+invalid".
1663 *
1664 * We use "ct_commit" for a connection that is not already known
1665 * by the connection tracker. Once a connection is committed,
1666 * subsequent packets will hit the flow at priority 0 that just
1667 * uses "next;"
1668 *
1669 * We also check for established connections that have ct_label[0]
1670 * set on them. That's a connection that was disallowed, but is
1671 * now allowed by policy again since it hit this default-allow flow.
1672 * We need to set ct_label[0]=0 to let the connection continue,
1673 * which will be done by ct_commit() in the "stateful" stage.
1674 * Subsequent packets will hit the flow at priority 0 that just
1675 * uses "next;". */
1676 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 1,
1677 "ip && (!ct.est || (ct.est && ct_label[0] == 1))",
1678 REGBIT_CONNTRACK_COMMIT" = 1; next;");
1679 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 1,
1680 "ip && (!ct.est || (ct.est && ct_label[0] == 1))",
1681 REGBIT_CONNTRACK_COMMIT" = 1; next;");
78aab811
JP
1682
1683 /* Ingress and Egress ACL Table (Priority 65535).
1684 *
cc58e1f2
RB
1685 * Always drop traffic that's in an invalid state. Also drop
1686 * reply direction packets for connections that have been marked
1687 * for deletion (bit 0 of ct_label is set).
1688 *
1689 * This is enforced at a higher priority than ACLs can be defined. */
880fcd14 1690 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
cc58e1f2
RB
1691 "ct.inv || (ct.est && ct.rpl && ct_label[0] == 1)",
1692 "drop;");
880fcd14 1693 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
cc58e1f2
RB
1694 "ct.inv || (ct.est && ct.rpl && ct_label[0] == 1)",
1695 "drop;");
78aab811
JP
1696
1697 /* Ingress and Egress ACL Table (Priority 65535).
1698 *
cc58e1f2
RB
1699 * Allow reply traffic that is part of an established
1700 * conntrack entry that has not been marked for deletion
1701 * (bit 0 of ct_label). We only match traffic in the
1702 * reply direction because we want traffic in the request
1703 * direction to hit the currently defined policy from ACLs.
1704 *
1705 * This is enforced at a higher priority than ACLs can be defined. */
880fcd14 1706 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
cc58e1f2
RB
1707 "ct.est && !ct.rel && !ct.new && !ct.inv "
1708 "&& ct.rpl && ct_label[0] == 0",
78aab811 1709 "next;");
880fcd14 1710 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
cc58e1f2
RB
1711 "ct.est && !ct.rel && !ct.new && !ct.inv "
1712 "&& ct.rpl && ct_label[0] == 0",
78aab811
JP
1713 "next;");
1714
1715 /* Ingress and Egress ACL Table (Priority 65535).
1716 *
cc58e1f2
RB
1717 * Allow traffic that is related to an existing conntrack entry that
1718 * has not been marked for deletion (bit 0 of ct_label).
1719 *
1720 * This is enforced at a higher priority than ACLs can be defined.
78aab811
JP
1721 *
1722 * NOTE: This does not support related data sessions (eg,
1723 * a dynamically negotiated FTP data channel), but will allow
1724 * related traffic such as an ICMP Port Unreachable through
1725 * that's generated from a non-listening UDP port. */
880fcd14 1726 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
cc58e1f2
RB
1727 "!ct.est && ct.rel && !ct.new && !ct.inv "
1728 "&& ct_label[0] == 0",
78aab811 1729 "next;");
880fcd14 1730 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
cc58e1f2
RB
1731 "!ct.est && ct.rel && !ct.new && !ct.inv "
1732 "&& ct_label[0] == 0",
78aab811 1733 "next;");
e75451fe
ZKL
1734
1735 /* Ingress and Egress ACL Table (Priority 65535).
1736 *
1737 * Not to do conntrack on ND packets. */
1738 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, "nd", "next;");
1739 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, "nd", "next;");
78aab811
JP
1740 }
1741
1742 /* Ingress or Egress ACL Table (Various priorities). */
9975d7be
BP
1743 for (size_t i = 0; i < od->nbs->n_acls; i++) {
1744 struct nbrec_acl *acl = od->nbs->acls[i];
78aab811 1745 bool ingress = !strcmp(acl->direction, "from-lport") ? true :false;
880fcd14 1746 enum ovn_stage stage = ingress ? S_SWITCH_IN_ACL : S_SWITCH_OUT_ACL;
78aab811 1747
cc58e1f2
RB
1748 if (!strcmp(acl->action, "allow")
1749 || !strcmp(acl->action, "allow-related")) {
78aab811
JP
1750 /* If there are any stateful flows, we must even commit "allow"
1751 * actions. This is because, while the initiater's
1752 * direction may not have any stateful rules, the server's
1753 * may and then its return traffic would not have an
1754 * associated conntrack entry and would return "+invalid". */
cc58e1f2
RB
1755 if (!has_stateful) {
1756 ovn_lflow_add(lflows, od, stage,
1757 acl->priority + OVN_ACL_PRI_OFFSET,
1758 acl->match, "next;");
1759 } else {
1760 struct ds match = DS_EMPTY_INITIALIZER;
1761
1762 /* Commit the connection tracking entry if it's a new
1763 * connection that matches this ACL. After this commit,
1764 * the reply traffic is allowed by a flow we create at
1765 * priority 65535, defined earlier.
1766 *
1767 * It's also possible that a known connection was marked for
1768 * deletion after a policy was deleted, but the policy was
1769 * re-added while that connection is still known. We catch
1770 * that case here and un-set ct_label[0] (which will be done
1771 * by ct_commit in the "stateful" stage) to indicate that the
1772 * connection should be allowed to resume.
1773 */
1774 ds_put_format(&match, "((ct.new && !ct.est)"
1775 " || (!ct.new && ct.est && !ct.rpl "
1776 "&& ct_label[0] == 1)) "
1777 "&& (%s)", acl->match);
1778 ovn_lflow_add(lflows, od, stage,
1779 acl->priority + OVN_ACL_PRI_OFFSET,
1780 ds_cstr(&match),
1781 REGBIT_CONNTRACK_COMMIT" = 1; next;");
1782
1783 /* Match on traffic in the request direction for an established
1784 * connection tracking entry that has not been marked for
1785 * deletion. There is no need to commit here, so we can just
1786 * proceed to the next table. We use this to ensure that this
1787 * connection is still allowed by the currently defined
1788 * policy. */
1789 ds_clear(&match);
1790 ds_put_format(&match,
1791 "!ct.new && ct.est && !ct.rpl"
1792 " && ct_label[0] == 0 && (%s)",
1793 acl->match);
1794 ovn_lflow_add(lflows, od, stage,
1795 acl->priority + OVN_ACL_PRI_OFFSET,
1796 ds_cstr(&match), "next;");
1797
1798 ds_destroy(&match);
1799 }
1800 } else if (!strcmp(acl->action, "drop")
1801 || !strcmp(acl->action, "reject")) {
78aab811
JP
1802 struct ds match = DS_EMPTY_INITIALIZER;
1803
cc58e1f2
RB
1804 /* XXX Need to support "reject", treat it as "drop;" for now. */
1805 if (!strcmp(acl->action, "reject")) {
1806 VLOG_INFO("reject is not a supported action");
1807 }
78aab811 1808
cc58e1f2
RB
1809 /* The implementation of "drop" differs if stateful ACLs are in
1810 * use for this datapath. In that case, the actions differ
1811 * depending on whether the connection was previously committed
1812 * to the connection tracker with ct_commit. */
1813 if (has_stateful) {
1814 /* If the packet is not part of an established connection, then
1815 * we can simply drop it. */
1816 ds_put_format(&match,
1817 "(!ct.est || (ct.est && ct_label[0] == 1)) "
1818 "&& (%s)",
1819 acl->match);
1820 ovn_lflow_add(lflows, od, stage, acl->priority +
1821 OVN_ACL_PRI_OFFSET, ds_cstr(&match), "drop;");
1822
1823 /* For an existing connection without ct_label set, we've
1824 * encountered a policy change. ACLs previously allowed
1825 * this connection and we committed the connection tracking
1826 * entry. Current policy says that we should drop this
1827 * connection. First, we set bit 0 of ct_label to indicate
1828 * that this connection is set for deletion. By not
1829 * specifying "next;", we implicitly drop the packet after
1830 * updating conntrack state. We would normally defer
1831 * ct_commit() to the "stateful" stage, but since we're
1832 * dropping the packet, we go ahead and do it here. */
1833 ds_clear(&match);
1834 ds_put_format(&match,
1835 "ct.est && ct_label[0] == 0 && (%s)",
1836 acl->match);
1837 ovn_lflow_add(lflows, od, stage,
1838 acl->priority + OVN_ACL_PRI_OFFSET,
1839 ds_cstr(&match), "ct_commit(ct_label=1/1);");
1840
1841 ds_destroy(&match);
1842 } else {
1843 /* There are no stateful ACLs in use on this datapath,
1844 * so a "drop" ACL is simply the "drop" logical flow action
1845 * in all cases. */
1846 ovn_lflow_add(lflows, od, stage,
1847 acl->priority + OVN_ACL_PRI_OFFSET,
1848 acl->match, "drop;");
1849 }
78aab811
JP
1850 }
1851 }
281977f7
NS
1852
1853 /* Add 34000 priority flow to allow DHCP reply from ovn-controller to all
1854 * logical ports of the datapath if the CMS has configured DHCPv4 options*/
1855 if (od->nbs && od->nbs->n_ports) {
1856 for (size_t i = 0; i < od->nbs->n_ports; i++) {
1857 if (od->nbs->ports[i]->dhcpv4_options) {
1858 const char *server_id = smap_get(
1859 &od->nbs->ports[i]->dhcpv4_options->options, "server_id");
1860 const char *server_mac = smap_get(
1861 &od->nbs->ports[i]->dhcpv4_options->options, "server_mac");
1862 const char *lease_time = smap_get(
1863 &od->nbs->ports[i]->dhcpv4_options->options, "lease_time");
1864 const char *router = smap_get(
1865 &od->nbs->ports[i]->dhcpv4_options->options, "router");
1866 if (server_id && server_mac && lease_time && router) {
1867 struct ds match = DS_EMPTY_INITIALIZER;
1868 const char *actions =
1869 has_stateful ? "ct_commit; next;" : "next;";
1870 ds_put_format(&match, "outport == \"%s\" && eth.src == %s "
1871 "&& ip4.src == %s && udp && udp.src == 67 "
1872 "&& udp.dst == 68", od->nbs->ports[i]->name,
1873 server_mac, server_id);
1874 ovn_lflow_add(
1875 lflows, od, S_SWITCH_OUT_ACL, 34000, ds_cstr(&match),
1876 actions);
1877 }
1878 }
1879 }
1880 }
78aab811
JP
1881}
1882
7a15be69
GS
1883static void
1884build_lb(struct ovn_datapath *od, struct hmap *lflows)
1885{
1886 /* Ingress and Egress LB Table (Priority 0): Packets are allowed by
1887 * default. */
1888 ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, 0, "1", "next;");
1889 ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, 0, "1", "next;");
1890
1891 if (od->nbs->load_balancer) {
1892 /* Ingress and Egress LB Table (Priority 65535).
1893 *
1894 * Send established traffic through conntrack for just NAT. */
1895 ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, UINT16_MAX,
1896 "ct.est && !ct.rel && !ct.new && !ct.inv",
1897 REGBIT_CONNTRACK_NAT" = 1; next;");
1898 ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, UINT16_MAX,
1899 "ct.est && !ct.rel && !ct.new && !ct.inv",
1900 REGBIT_CONNTRACK_NAT" = 1; next;");
1901 }
1902}
1903
fa313a8c
GS
1904static void
1905build_stateful(struct ovn_datapath *od, struct hmap *lflows)
1906{
1907 /* Ingress and Egress stateful Table (Priority 0): Packets are
1908 * allowed by default. */
1909 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 0, "1", "next;");
1910 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 0, "1", "next;");
1911
1912 /* If REGBIT_CONNTRACK_COMMIT is set as 1, then the packets should be
cc58e1f2
RB
1913 * committed to conntrack. We always set ct_label[0] to 0 here as
1914 * any packet that makes it this far is part of a connection we
1915 * want to allow to continue. */
fa313a8c 1916 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100,
cc58e1f2 1917 REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit(ct_label=0/1); next;");
fa313a8c 1918 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100,
cc58e1f2 1919 REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit(ct_label=0/1); next;");
7a15be69
GS
1920
1921 /* If REGBIT_CONNTRACK_NAT is set as 1, then packets should just be sent
1922 * through nat (without committing).
1923 *
1924 * REGBIT_CONNTRACK_COMMIT is set for new connections and
1925 * REGBIT_CONNTRACK_NAT is set for established connections. So they
1926 * don't overlap.
1927 */
1928 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100,
1929 REGBIT_CONNTRACK_NAT" == 1", "ct_lb;");
1930 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100,
1931 REGBIT_CONNTRACK_NAT" == 1", "ct_lb;");
1932
1933 /* Load balancing rules for new connections get committed to conntrack
1934 * table. So even if REGBIT_CONNTRACK_COMMIT is set in a previous table
1935 * a higher priority rule for load balancing below also commits the
1936 * connection, so it is okay if we do not hit the above match on
1937 * REGBIT_CONNTRACK_COMMIT. */
1938 if (od->nbs->load_balancer) {
1939 struct nbrec_load_balancer *lb = od->nbs->load_balancer;
1940 struct smap *vips = &lb->vips;
1941 struct smap_node *node;
1942
1943 SMAP_FOR_EACH (node, vips) {
1944 uint16_t port = 0;
1945
1946 /* node->key contains IP:port or just IP. */
1947 char *ip_address = NULL;
1948 ip_address_and_port_from_lb_key(node->key, &ip_address, &port);
1949 if (!ip_address) {
1950 continue;
1951 }
1952
1953 /* New connections in Ingress table. */
1954 char *action = xasprintf("ct_lb(%s);", node->value);
1955 struct ds match = DS_EMPTY_INITIALIZER;
1956 ds_put_format(&match, "ct.new && ip && ip4.dst == %s", ip_address);
1957 if (port) {
1958 if (lb->protocol && !strcmp(lb->protocol, "udp")) {
1959 ds_put_format(&match, "&& udp && udp.dst == %d", port);
1960 } else {
1961 ds_put_format(&match, "&& tcp && tcp.dst == %d", port);
1962 }
1963 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL,
1964 120, ds_cstr(&match), action);
1965 } else {
1966 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL,
1967 110, ds_cstr(&match), action);
1968 }
1969
1970 ds_destroy(&match);
1971 free(action);
1972 }
1973 }
fa313a8c
GS
1974}
1975
bd39395f 1976static void
9975d7be
BP
1977build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
1978 struct hmap *lflows, struct hmap *mcgroups)
bd39395f 1979{
5cff6b99
BP
1980 /* This flow table structure is documented in ovn-northd(8), so please
1981 * update ovn-northd.8.xml if you change anything. */
1982
09b39248
JP
1983 struct ds match = DS_EMPTY_INITIALIZER;
1984 struct ds actions = DS_EMPTY_INITIALIZER;
1985
9975d7be 1986 /* Build pre-ACL and ACL tables for both ingress and egress.
685f4dfe 1987 * Ingress tables 3 and 4. Egress tables 0 and 1. */
5868eb24
BP
1988 struct ovn_datapath *od;
1989 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
1990 if (!od->nbs) {
1991 continue;
1992 }
1993
2d018f9b 1994 build_pre_acls(od, lflows, ports);
7a15be69 1995 build_pre_lb(od, lflows);
facf8652 1996 build_pre_stateful(od, lflows);
2d018f9b 1997 build_acls(od, lflows);
7a15be69 1998 build_lb(od, lflows);
fa313a8c 1999 build_stateful(od, lflows);
9975d7be
BP
2000 }
2001
2002 /* Logical switch ingress table 0: Admission control framework (priority
2003 * 100). */
2004 HMAP_FOR_EACH (od, key_node, datapaths) {
2005 if (!od->nbs) {
2006 continue;
2007 }
2008
bd39395f 2009 /* Logical VLANs not supported. */
685f4dfe 2010 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "vlan.present",
091e3af9 2011 "drop;");
bd39395f
BP
2012
2013 /* Broadcast/multicast source address is invalid. */
685f4dfe 2014 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "eth.src[40]",
091e3af9 2015 "drop;");
bd39395f 2016
35060cdc
BP
2017 /* Port security flows have priority 50 (see below) and will continue
2018 * to the next table if packet source is acceptable. */
bd39395f
BP
2019 }
2020
685f4dfe
NS
2021 /* Logical switch ingress table 0: Ingress port security - L2
2022 * (priority 50).
2023 * Ingress table 1: Ingress port security - IP (priority 90 and 80)
2024 * Ingress table 2: Ingress port security - ND (priority 90 and 80)
2025 */
5868eb24
BP
2026 struct ovn_port *op;
2027 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 2028 if (!op->nbsp) {
9975d7be
BP
2029 continue;
2030 }
2031
0ee00741 2032 if (!lsp_is_enabled(op->nbsp)) {
96af668a
BP
2033 /* Drop packets from disabled logical ports (since logical flow
2034 * tables are default-drop). */
2035 continue;
2036 }
2037
09b39248 2038 ds_clear(&match);
9975d7be 2039 ds_put_format(&match, "inport == %s", op->json_key);
e93b43d6
JP
2040 build_port_security_l2("eth.src", op->ps_addrs, op->n_ps_addrs,
2041 &match);
685f4dfe 2042 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_L2, 50,
96af668a 2043 ds_cstr(&match), "next;");
685f4dfe 2044
0ee00741 2045 if (op->nbsp->n_port_security) {
685f4dfe
NS
2046 build_port_security_ip(P_IN, op, lflows);
2047 build_port_security_nd(op, lflows);
2048 }
2049 }
2050
2051 /* Ingress table 1 and 2: Port security - IP and ND, by default goto next.
2052 * (priority 0)*/
2053 HMAP_FOR_EACH (od, key_node, datapaths) {
2054 if (!od->nbs) {
2055 continue;
2056 }
2057
2058 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_ND, 0, "1", "next;");
2059 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_IP, 0, "1", "next;");
5868eb24 2060 }
445a266a 2061
281977f7
NS
2062 /* Ingress table 9: ARP/ND responder, skip requests coming from localnet
2063 * ports. (priority 100). */
fa128126 2064 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 2065 if (!op->nbsp) {
fa128126
HZ
2066 continue;
2067 }
2068
0ee00741 2069 if (!strcmp(op->nbsp->type, "localnet")) {
09b39248
JP
2070 ds_clear(&match);
2071 ds_put_format(&match, "inport == %s", op->json_key);
e75451fe 2072 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100,
09b39248 2073 ds_cstr(&match), "next;");
fa128126
HZ
2074 }
2075 }
2076
94300e09 2077 /* Ingress table 9: ARP/ND responder, reply for known IPs.
fa128126 2078 * (priority 50). */
57d143eb 2079 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 2080 if (!op->nbsp) {
57d143eb
HZ
2081 continue;
2082 }
2083
4c7bf534 2084 /*
e75451fe 2085 * Add ARP/ND reply flows if either the
4c7bf534
NS
2086 * - port is up or
2087 * - port type is router
2088 */
0ee00741 2089 if (!lsp_is_up(op->nbsp) && strcmp(op->nbsp->type, "router")) {
4c7bf534
NS
2090 continue;
2091 }
2092
e93b43d6
JP
2093 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
2094 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
09b39248 2095 ds_clear(&match);
e93b43d6
JP
2096 ds_put_format(&match, "arp.tpa == %s && arp.op == 1",
2097 op->lsp_addrs[i].ipv4_addrs[j].addr_s);
09b39248
JP
2098 ds_clear(&actions);
2099 ds_put_format(&actions,
57d143eb 2100 "eth.dst = eth.src; "
e93b43d6 2101 "eth.src = %s; "
57d143eb
HZ
2102 "arp.op = 2; /* ARP reply */ "
2103 "arp.tha = arp.sha; "
e93b43d6 2104 "arp.sha = %s; "
57d143eb 2105 "arp.tpa = arp.spa; "
e93b43d6 2106 "arp.spa = %s; "
57d143eb
HZ
2107 "outport = inport; "
2108 "inport = \"\"; /* Allow sending out inport. */ "
2109 "output;",
e93b43d6
JP
2110 op->lsp_addrs[i].ea_s, op->lsp_addrs[i].ea_s,
2111 op->lsp_addrs[i].ipv4_addrs[j].addr_s);
e75451fe 2112 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50,
09b39248 2113 ds_cstr(&match), ds_cstr(&actions));
57d143eb 2114 }
7dc88496 2115
e93b43d6 2116 if (op->lsp_addrs[i].n_ipv6_addrs > 0) {
09b39248 2117 ds_clear(&match);
e75451fe 2118 ds_put_cstr(&match, "icmp6 && icmp6.type == 135 && ");
e93b43d6
JP
2119 if (op->lsp_addrs[i].n_ipv6_addrs == 1) {
2120 ds_put_format(&match, "nd.target == %s",
2121 op->lsp_addrs[i].ipv6_addrs[0].addr_s);
e75451fe 2122 } else {
e93b43d6
JP
2123 ds_put_format(&match, "nd.target == {");
2124 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
2defb84e 2125 ds_put_format(&match, "%s, ",
e93b43d6 2126 op->lsp_addrs[i].ipv6_addrs[j].addr_s);
e75451fe
ZKL
2127 }
2128 ds_chomp(&match, ' ');
e93b43d6
JP
2129 ds_chomp(&match, ',');
2130 ds_put_cstr(&match, "}");
e75451fe 2131 }
09b39248
JP
2132 ds_clear(&actions);
2133 ds_put_format(&actions,
e93b43d6
JP
2134 "na { eth.src = %s; "
2135 "nd.tll = %s; "
e75451fe
ZKL
2136 "outport = inport; "
2137 "inport = \"\"; /* Allow sending out inport. */ "
2138 "output; };",
e93b43d6
JP
2139 op->lsp_addrs[i].ea_s,
2140 op->lsp_addrs[i].ea_s);
e75451fe
ZKL
2141
2142 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50,
09b39248 2143 ds_cstr(&match), ds_cstr(&actions));
e75451fe 2144
e75451fe 2145 }
57d143eb
HZ
2146 }
2147 }
2148
94300e09 2149 /* Ingress table 9: ARP/ND responder, by default goto next.
fa128126
HZ
2150 * (priority 0)*/
2151 HMAP_FOR_EACH (od, key_node, datapaths) {
2152 if (!od->nbs) {
2153 continue;
2154 }
2155
e75451fe 2156 ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_ND_RSP, 0, "1", "next;");
fa128126
HZ
2157 }
2158
281977f7
NS
2159 /* Logical switch ingress table 10 and 11: DHCP options and response
2160 * priority 100 flows. */
2161 HMAP_FOR_EACH (op, key_node, ports) {
2162 if (!op->nbsp) {
2163 continue;
2164 }
2165
2166 if (!lsp_is_enabled(op->nbsp) || !strcmp(op->nbsp->type, "router")) {
2167 /* Don't add the DHCP flows if the port is not enabled or if the
2168 * port is a router port. */
2169 continue;
2170 }
2171
2172 if (!op->nbsp->dhcpv4_options) {
2173 /* CMS has disabled native DHCPv4 for this lport. */
2174 continue;
2175 }
2176
2177 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
2178 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
2179 struct ds options_action = DS_EMPTY_INITIALIZER;
2180 struct ds response_action = DS_EMPTY_INITIALIZER;
2181 if (build_dhcpv4_action(
2182 op, op->lsp_addrs[i].ipv4_addrs[j].addr,
2183 &options_action, &response_action)) {
2184 struct ds match = DS_EMPTY_INITIALIZER;
2185 ds_put_format(
2186 &match, "inport == %s && eth.src == %s && "
2187 "ip4.src == 0.0.0.0 && ip4.dst == 255.255.255.255 && "
2188 "udp.src == 68 && udp.dst == 67", op->json_key,
2189 op->lsp_addrs[i].ea_s);
2190
2191 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS,
2192 100, ds_cstr(&match),
2193 ds_cstr(&options_action));
2194 /* If REGBIT_DHCP_OPTS_RESULT is set, it means the
2195 * put_dhcp_opts action is successful */
2196 ds_put_cstr(&match, " && "REGBIT_DHCP_OPTS_RESULT);
2197 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_RESPONSE,
2198 100, ds_cstr(&match),
2199 ds_cstr(&response_action));
2200 ds_destroy(&match);
2201 ds_destroy(&options_action);
2202 ds_destroy(&response_action);
2203 break;
2204 }
2205 }
2206 }
2207 }
2208
2209 /* Ingress table 10 and 11: DHCP options and response, by default goto next.
2210 * (priority 0). */
2211
2212 HMAP_FOR_EACH (od, key_node, datapaths) {
2213 if (!od->nbs) {
2214 continue;
2215 }
2216
2217 ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_OPTIONS, 0, "1", "next;");
2218 ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_RESPONSE, 0, "1", "next;");
2219 }
2220
2221 /* Ingress table 12: Destination lookup, broadcast and multicast handling
5868eb24
BP
2222 * (priority 100). */
2223 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 2224 if (!op->nbsp) {
9975d7be
BP
2225 continue;
2226 }
2227
0ee00741 2228 if (lsp_is_enabled(op->nbsp)) {
9975d7be 2229 ovn_multicast_add(mcgroups, &mc_flood, op);
445a266a 2230 }
5868eb24
BP
2231 }
2232 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
2233 if (!od->nbs) {
2234 continue;
2235 }
2236
2237 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 100, "eth.mcast",
5868eb24 2238 "outport = \""MC_FLOOD"\"; output;");
bd39395f 2239 }
bd39395f 2240
281977f7 2241 /* Ingress table 12: Destination lookup, unicast handling (priority 50), */
5868eb24 2242 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 2243 if (!op->nbsp) {
9975d7be
BP
2244 continue;
2245 }
2246
0ee00741 2247 for (size_t i = 0; i < op->nbsp->n_addresses; i++) {
74ff3298 2248 struct eth_addr mac;
5868eb24 2249
0ee00741 2250 if (eth_addr_from_string(op->nbsp->addresses[i], &mac)) {
09b39248 2251 ds_clear(&match);
9975d7be
BP
2252 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
2253 ETH_ADDR_ARGS(mac));
5868eb24 2254
09b39248 2255 ds_clear(&actions);
9975d7be
BP
2256 ds_put_format(&actions, "outport = %s; output;", op->json_key);
2257 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
5868eb24 2258 ds_cstr(&match), ds_cstr(&actions));
0ee00741
HK
2259 } else if (!strcmp(op->nbsp->addresses[i], "unknown")) {
2260 if (lsp_is_enabled(op->nbsp)) {
9975d7be 2261 ovn_multicast_add(mcgroups, &mc_unknown, op);
96af668a
BP
2262 op->od->has_unknown = true;
2263 }
5868eb24
BP
2264 } else {
2265 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
445a266a 2266
2fa326a3
BP
2267 VLOG_INFO_RL(&rl,
2268 "%s: invalid syntax '%s' in addresses column",
0ee00741 2269 op->nbsp->name, op->nbsp->addresses[i]);
445a266a
BP
2270 }
2271 }
bd39395f
BP
2272 }
2273
281977f7 2274 /* Ingress table 12: Destination lookup for unknown MACs (priority 0). */
5868eb24 2275 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
2276 if (!od->nbs) {
2277 continue;
2278 }
2279
5868eb24 2280 if (od->has_unknown) {
9975d7be 2281 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1",
5868eb24 2282 "outport = \""MC_UNKNOWN"\"; output;");
445a266a 2283 }
bd39395f
BP
2284 }
2285
94300e09
JP
2286 /* Egress tables 6: Egress port security - IP (priority 0)
2287 * Egress table 7: Egress port security L2 - multicast/broadcast
2288 * (priority 100). */
5868eb24 2289 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
2290 if (!od->nbs) {
2291 continue;
2292 }
2293
685f4dfe
NS
2294 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_IP, 0, "1", "next;");
2295 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_L2, 100, "eth.mcast",
091e3af9 2296 "output;");
48f42f3a
RB
2297 }
2298
94300e09 2299 /* Egress table 6: Egress port security - IP (priorities 90 and 80)
685f4dfe
NS
2300 * if port security enabled.
2301 *
94300e09 2302 * Egress table 7: Egress port security - L2 (priorities 50 and 150).
d770a830
BP
2303 *
2304 * Priority 50 rules implement port security for enabled logical port.
2305 *
2306 * Priority 150 rules drop packets to disabled logical ports, so that they
2307 * don't even receive multicast or broadcast packets. */
5868eb24 2308 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 2309 if (!op->nbsp) {
9975d7be
BP
2310 continue;
2311 }
2312
09b39248 2313 ds_clear(&match);
9975d7be 2314 ds_put_format(&match, "outport == %s", op->json_key);
0ee00741 2315 if (lsp_is_enabled(op->nbsp)) {
e93b43d6
JP
2316 build_port_security_l2("eth.dst", op->ps_addrs, op->n_ps_addrs,
2317 &match);
685f4dfe 2318 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 50,
d770a830
BP
2319 ds_cstr(&match), "output;");
2320 } else {
685f4dfe 2321 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 150,
d770a830
BP
2322 ds_cstr(&match), "drop;");
2323 }
eb00399e 2324
0ee00741 2325 if (op->nbsp->n_port_security) {
685f4dfe
NS
2326 build_port_security_ip(P_OUT, op, lflows);
2327 }
eb00399e 2328 }
09b39248
JP
2329
2330 ds_destroy(&match);
2331 ds_destroy(&actions);
9975d7be 2332}
eb00399e 2333
9975d7be
BP
2334static bool
2335lrport_is_enabled(const struct nbrec_logical_router_port *lrport)
2336{
2337 return !lrport->enabled || *lrport->enabled;
2338}
2339
4685e523
JP
2340/* Returns a string of the IP address of the router port 'op' that
2341 * overlaps with 'ip_s". If one is not found, returns NULL.
2342 *
2343 * The caller must not free the returned string. */
2344static const char *
2345find_lrp_member_ip(const struct ovn_port *op, const char *ip_s)
2346{
6026f534 2347 ovs_be32 ip;
4685e523
JP
2348
2349 if (!ip_parse(ip_s, &ip)) {
2350 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2351 VLOG_WARN_RL(&rl, "bad ip address %s", ip_s);
2352 return NULL;
2353 }
2354
2355 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
2356 const struct ipv4_netaddr *na = &op->lrp_networks.ipv4_addrs[i];
2357
2358 if (!((na->network ^ ip) & na->mask)) {
2359 /* There should be only 1 interface that matches the
2360 * next hop. Otherwise, it's a configuration error,
2361 * because subnets of router's interfaces should NOT
2362 * overlap. */
2363 return na->addr_s;
2364 }
2365 }
2366
2367 return NULL;
2368}
2369
9975d7be 2370static void
0bac7164 2371add_route(struct hmap *lflows, const struct ovn_port *op,
4685e523
JP
2372 const char *lrp_addr_s, const char *network_s, int plen,
2373 const char *gateway)
9975d7be 2374{
c9bdf7bd 2375 char *match = xasprintf("ip4.dst == %s/%d", network_s, plen);
9975d7be
BP
2376
2377 struct ds actions = DS_EMPTY_INITIALIZER;
47f3b59b 2378 ds_put_cstr(&actions, "ip.ttl--; reg0 = ");
9975d7be 2379 if (gateway) {
c9bdf7bd 2380 ds_put_cstr(&actions, gateway);
9975d7be
BP
2381 } else {
2382 ds_put_cstr(&actions, "ip4.dst");
2383 }
4685e523 2384 ds_put_format(&actions, "; "
c9bdf7bd 2385 "reg1 = %s; "
4685e523 2386 "eth.src = %s; "
0bac7164 2387 "outport = %s; "
4685e523 2388 "inport = \"\"; /* Allow sending out inport. */ "
0bac7164 2389 "next;",
4685e523
JP
2390 lrp_addr_s,
2391 op->lrp_networks.ea_s,
2392 op->json_key);
9975d7be
BP
2393
2394 /* The priority here is calculated to implement longest-prefix-match
2395 * routing. */
c9bdf7bd
JP
2396 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_ROUTING, plen, match,
2397 ds_cstr(&actions));
9975d7be
BP
2398 ds_destroy(&actions);
2399 free(match);
2400}
2401
28dc3fe9
SR
2402static void
2403build_static_route_flow(struct hmap *lflows, struct ovn_datapath *od,
2404 struct hmap *ports,
2405 const struct nbrec_logical_router_static_route *route)
2406{
4685e523
JP
2407 ovs_be32 prefix, nexthop, mask;
2408 const char *lrp_addr_s;
28dc3fe9
SR
2409
2410 /* Verify that next hop is an IP address with 32 bits mask. */
4685e523 2411 char *error = ip_parse_masked(route->nexthop, &nexthop, &mask);
28dc3fe9
SR
2412 if (error || mask != OVS_BE32_MAX) {
2413 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2414 VLOG_WARN_RL(&rl, "bad next hop ip address %s", route->nexthop);
2415 free(error);
2416 return;
2417 }
2418
2419 /* Verify that ip prefix is a valid CIDR address. */
2420 error = ip_parse_masked(route->ip_prefix, &prefix, &mask);
2421 if (error || !ip_is_cidr(mask)) {
2422 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
c9bdf7bd 2423 VLOG_WARN_RL(&rl, "bad 'ip_prefix' in static routes %s",
28dc3fe9
SR
2424 route->ip_prefix);
2425 free(error);
2426 return;
2427 }
2428
2429 /* Find the outgoing port. */
2430 struct ovn_port *out_port = NULL;
2431 if (route->output_port) {
2432 out_port = ovn_port_find(ports, route->output_port);
2433 if (!out_port) {
2434 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2435 VLOG_WARN_RL(&rl, "Bad out port %s for static route %s",
2436 route->output_port, route->ip_prefix);
2437 return;
2438 }
4685e523 2439 lrp_addr_s = find_lrp_member_ip(out_port, route->nexthop);
28dc3fe9
SR
2440 } else {
2441 /* output_port is not specified, find the
2442 * router port matching the next hop. */
2443 int i;
2444 for (i = 0; i < od->nbr->n_ports; i++) {
2445 struct nbrec_logical_router_port *lrp = od->nbr->ports[i];
2446 out_port = ovn_port_find(ports, lrp->name);
2447 if (!out_port) {
2448 /* This should not happen. */
2449 continue;
2450 }
2451
4685e523
JP
2452 lrp_addr_s = find_lrp_member_ip(out_port, route->nexthop);
2453 if (lrp_addr_s) {
28dc3fe9
SR
2454 break;
2455 }
2456 }
28dc3fe9
SR
2457 }
2458
4685e523
JP
2459 if (!lrp_addr_s) {
2460 /* There is no matched out port. */
2461 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2462 VLOG_WARN_RL(&rl, "No path for static route %s; next hop %s",
2463 route->ip_prefix, route->nexthop);
2464 return;
2465 }
2466
2467 char *prefix_s = xasprintf(IP_FMT, IP_ARGS(prefix & mask));
2468 add_route(lflows, out_port, lrp_addr_s, prefix_s,
2469 ip_count_cidr_bits(mask), route->nexthop);
c9bdf7bd 2470 free(prefix_s);
28dc3fe9
SR
2471}
2472
4685e523
JP
2473static void
2474op_put_networks(struct ds *ds, const struct ovn_port *op, bool add_bcast)
2475{
2476 if (!add_bcast && op->lrp_networks.n_ipv4_addrs == 1) {
2477 ds_put_format(ds, "%s", op->lrp_networks.ipv4_addrs[0].addr_s);
2478 return;
2479 }
2480
2481 ds_put_cstr(ds, "{");
2482 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
2483 ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].addr_s);
2484 if (add_bcast) {
2485 ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].bcast_s);
2486 }
2487 }
2488 ds_chomp(ds, ' ');
2489 ds_chomp(ds, ',');
2490 ds_put_cstr(ds, "}");
2491}
2492
9975d7be
BP
2493static void
2494build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
2495 struct hmap *lflows)
2496{
2497 /* This flow table structure is documented in ovn-northd(8), so please
2498 * update ovn-northd.8.xml if you change anything. */
2499
09b39248
JP
2500 struct ds match = DS_EMPTY_INITIALIZER;
2501 struct ds actions = DS_EMPTY_INITIALIZER;
2502
9975d7be
BP
2503 /* Logical router ingress table 0: Admission control framework. */
2504 struct ovn_datapath *od;
2505 HMAP_FOR_EACH (od, key_node, datapaths) {
2506 if (!od->nbr) {
2507 continue;
2508 }
2509
2510 /* Logical VLANs not supported.
2511 * Broadcast/multicast source address is invalid. */
2512 ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100,
2513 "vlan.present || eth.src[40]", "drop;");
2514 }
2515
2516 /* Logical router ingress table 0: match (priority 50). */
2517 struct ovn_port *op;
2518 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 2519 if (!op->nbrp) {
9975d7be
BP
2520 continue;
2521 }
2522
0ee00741 2523 if (!lrport_is_enabled(op->nbrp)) {
9975d7be
BP
2524 /* Drop packets from disabled logical ports (since logical flow
2525 * tables are default-drop). */
2526 continue;
2527 }
2528
09b39248 2529 ds_clear(&match);
4685e523
JP
2530 ds_put_format(&match, "(eth.mcast || eth.dst == %s) && inport == %s",
2531 op->lrp_networks.ea_s, op->json_key);
9975d7be 2532 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
09b39248 2533 ds_cstr(&match), "next;");
9975d7be
BP
2534 }
2535
2536 /* Logical router ingress table 1: IP Input. */
78aab811 2537 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
2538 if (!od->nbr) {
2539 continue;
2540 }
2541
2542 /* L3 admission control: drop multicast and broadcast source, localhost
2543 * source or destination, and zero network source or destination
2544 * (priority 100). */
2545 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100,
2546 "ip4.mcast || "
2547 "ip4.src == 255.255.255.255 || "
2548 "ip4.src == 127.0.0.0/8 || "
2549 "ip4.dst == 127.0.0.0/8 || "
2550 "ip4.src == 0.0.0.0/8 || "
2551 "ip4.dst == 0.0.0.0/8",
2552 "drop;");
2553
0bac7164
BP
2554 /* ARP reply handling. Use ARP replies to populate the logical
2555 * router's ARP table. */
2556 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "arp.op == 2",
2557 "put_arp(inport, arp.spa, arp.sha);");
2558
9975d7be
BP
2559 /* Drop Ethernet local broadcast. By definition this traffic should
2560 * not be forwarded.*/
2561 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
2562 "eth.bcast", "drop;");
2563
9975d7be
BP
2564 /* TTL discard.
2565 *
2566 * XXX Need to send ICMP time exceeded if !ip.later_frag. */
09b39248
JP
2567 ds_clear(&match);
2568 ds_put_cstr(&match, "ip4 && ip.ttl == {0, 1}");
2569 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30,
2570 ds_cstr(&match), "drop;");
9975d7be
BP
2571
2572 /* Pass other traffic not already handled to the next table for
2573 * routing. */
2574 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;");
78aab811
JP
2575 }
2576
9975d7be 2577 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 2578 if (!op->nbrp) {
9975d7be
BP
2579 continue;
2580 }
2581
2582 /* L3 admission control: drop packets that originate from an IP address
2583 * owned by the router or a broadcast address known to the router
2584 * (priority 100). */
09b39248 2585 ds_clear(&match);
4685e523
JP
2586 ds_put_cstr(&match, "ip4.src == ");
2587 op_put_networks(&match, op, true);
9975d7be 2588 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
09b39248 2589 ds_cstr(&match), "drop;");
9975d7be 2590
dd7652e6 2591 /* ICMP echo reply. These flows reply to ICMP echo requests
bb3c4568
FF
2592 * received for the router's IP address. Since packets only
2593 * get here as part of the logical router datapath, the inport
2594 * (i.e. the incoming locally attached net) does not matter.
2595 * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */
09b39248 2596 ds_clear(&match);
4685e523
JP
2597 ds_put_cstr(&match, "ip4.dst == ");
2598 op_put_networks(&match, op, false);
2599 ds_put_cstr(&match, " && icmp4.type == 8 && icmp4.code == 0");
2600
09b39248
JP
2601 ds_clear(&actions);
2602 ds_put_format(&actions,
4685e523 2603 "ip4.dst <-> ip4.src; "
dd7652e6
JP
2604 "ip.ttl = 255; "
2605 "icmp4.type = 0; "
2606 "inport = \"\"; /* Allow sending out inport. */ "
4685e523 2607 "next; ");
dd7652e6 2608 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
09b39248 2609 ds_cstr(&match), ds_cstr(&actions));
dd7652e6 2610
9975d7be
BP
2611 /* ARP reply. These flows reply to ARP requests for the router's own
2612 * IP address. */
4685e523
JP
2613 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
2614 ds_clear(&match);
2615 ds_put_format(&match,
2616 "inport == %s && arp.tpa == %s && arp.op == 1",
2617 op->json_key, op->lrp_networks.ipv4_addrs[i].addr_s);
2618
2619 ds_clear(&actions);
2620 ds_put_format(&actions,
2621 "eth.dst = eth.src; "
2622 "eth.src = %s; "
2623 "arp.op = 2; /* ARP reply */ "
2624 "arp.tha = arp.sha; "
2625 "arp.sha = %s; "
2626 "arp.tpa = arp.spa; "
2627 "arp.spa = %s; "
2628 "outport = %s; "
2629 "inport = \"\"; /* Allow sending out inport. */ "
2630 "output;",
2631 op->lrp_networks.ea_s,
2632 op->lrp_networks.ea_s,
2633 op->lrp_networks.ipv4_addrs[i].addr_s,
2634 op->json_key);
2635 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
2636 ds_cstr(&match), ds_cstr(&actions));
2637 }
9975d7be 2638
dde5ea7b
GS
2639 ovs_be32 *snat_ips = xmalloc(sizeof *snat_ips * op->od->nbr->n_nat);
2640 size_t n_snat_ips = 0;
de297547
GS
2641 for (int i = 0; i < op->od->nbr->n_nat; i++) {
2642 const struct nbrec_nat *nat;
2643
2644 nat = op->od->nbr->nat[i];
2645
de297547
GS
2646 ovs_be32 ip;
2647 if (!ip_parse(nat->external_ip, &ip) || !ip) {
2648 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
dde5ea7b 2649 VLOG_WARN_RL(&rl, "bad ip address %s in nat configuration "
de297547
GS
2650 "for router %s", nat->external_ip, op->key);
2651 continue;
2652 }
2653
dde5ea7b
GS
2654 if (!strcmp(nat->type, "snat")) {
2655 snat_ips[n_snat_ips++] = ip;
2656 continue;
2657 }
2658
2659 /* ARP handling for external IP addresses.
2660 *
2661 * DNAT IP addresses are external IP addresses that need ARP
2662 * handling. */
09b39248
JP
2663 ds_clear(&match);
2664 ds_put_format(&match,
2665 "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1",
2666 op->json_key, IP_ARGS(ip));
4685e523 2667
09b39248
JP
2668 ds_clear(&actions);
2669 ds_put_format(&actions,
de297547 2670 "eth.dst = eth.src; "
4685e523 2671 "eth.src = %s; "
de297547
GS
2672 "arp.op = 2; /* ARP reply */ "
2673 "arp.tha = arp.sha; "
4685e523 2674 "arp.sha = %s; "
de297547
GS
2675 "arp.tpa = arp.spa; "
2676 "arp.spa = "IP_FMT"; "
2677 "outport = %s; "
2678 "inport = \"\"; /* Allow sending out inport. */ "
2679 "output;",
4685e523
JP
2680 op->lrp_networks.ea_s,
2681 op->lrp_networks.ea_s,
de297547
GS
2682 IP_ARGS(ip),
2683 op->json_key);
2684 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
09b39248 2685 ds_cstr(&match), ds_cstr(&actions));
de297547
GS
2686 }
2687
4685e523
JP
2688 ds_clear(&match);
2689 ds_put_cstr(&match, "ip4.dst == {");
2690 bool has_drop_ips = false;
2691 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
dde5ea7b
GS
2692 for (int j = 0; j < n_snat_ips; j++) {
2693 /* Packets to SNAT IPs should not be dropped. */
2694 if (op->lrp_networks.ipv4_addrs[i].addr == snat_ips[j]) {
4685e523
JP
2695 continue;
2696 }
4ef48e9d 2697 }
4685e523
JP
2698 ds_put_format(&match, "%s, ",
2699 op->lrp_networks.ipv4_addrs[i].addr_s);
2700 has_drop_ips = true;
4ef48e9d 2701 }
4685e523
JP
2702 ds_chomp(&match, ' ');
2703 ds_chomp(&match, ',');
2704 ds_put_cstr(&match, "}");
4ef48e9d 2705
4685e523
JP
2706 if (has_drop_ips) {
2707 /* Drop IP traffic to this router. */
09b39248
JP
2708 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
2709 ds_cstr(&match), "drop;");
4ef48e9d 2710 }
4685e523 2711
dde5ea7b 2712 free(snat_ips);
9975d7be
BP
2713 }
2714
de297547
GS
2715 /* NAT in Gateway routers. */
2716 HMAP_FOR_EACH (od, key_node, datapaths) {
2717 if (!od->nbr) {
2718 continue;
2719 }
2720
2721 /* Packets are allowed by default. */
2722 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;");
2723 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;");
2724 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;");
2725
2726 /* NAT rules are only valid on Gateway routers. */
2727 if (!smap_get(&od->nbr->options, "chassis")) {
2728 continue;
2729 }
2730
2731 for (int i = 0; i < od->nbr->n_nat; i++) {
2732 const struct nbrec_nat *nat;
2733
2734 nat = od->nbr->nat[i];
2735
2736 ovs_be32 ip, mask;
2737
2738 char *error = ip_parse_masked(nat->external_ip, &ip, &mask);
2739 if (error || mask != OVS_BE32_MAX) {
2740 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2741 VLOG_WARN_RL(&rl, "bad external ip %s for nat",
2742 nat->external_ip);
2743 free(error);
2744 continue;
2745 }
2746
2747 /* Check the validity of nat->logical_ip. 'logical_ip' can
2748 * be a subnet when the type is "snat". */
2749 error = ip_parse_masked(nat->logical_ip, &ip, &mask);
2750 if (!strcmp(nat->type, "snat")) {
2751 if (error) {
2752 static struct vlog_rate_limit rl =
2753 VLOG_RATE_LIMIT_INIT(5, 1);
2754 VLOG_WARN_RL(&rl, "bad ip network or ip %s for snat "
2755 "in router "UUID_FMT"",
2756 nat->logical_ip, UUID_ARGS(&od->key));
2757 free(error);
2758 continue;
2759 }
2760 } else {
2761 if (error || mask != OVS_BE32_MAX) {
2762 static struct vlog_rate_limit rl =
2763 VLOG_RATE_LIMIT_INIT(5, 1);
2764 VLOG_WARN_RL(&rl, "bad ip %s for dnat in router "
2765 ""UUID_FMT"", nat->logical_ip, UUID_ARGS(&od->key));
2766 free(error);
2767 continue;
2768 }
2769 }
2770
de297547
GS
2771 /* Ingress UNSNAT table: It is for already established connections'
2772 * reverse traffic. i.e., SNAT has already been done in egress
2773 * pipeline and now the packet has entered the ingress pipeline as
2774 * part of a reply. We undo the SNAT here.
2775 *
2776 * Undoing SNAT has to happen before DNAT processing. This is
2777 * because when the packet was DNATed in ingress pipeline, it did
2778 * not know about the possibility of eventual additional SNAT in
2779 * egress pipeline. */
2780 if (!strcmp(nat->type, "snat")
2781 || !strcmp(nat->type, "dnat_and_snat")) {
09b39248
JP
2782 ds_clear(&match);
2783 ds_put_format(&match, "ip && ip4.dst == %s", nat->external_ip);
de297547 2784 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 100,
09b39248 2785 ds_cstr(&match), "ct_snat; next;");
de297547
GS
2786 }
2787
2788 /* Ingress DNAT table: Packets enter the pipeline with destination
2789 * IP address that needs to be DNATted from a external IP address
2790 * to a logical IP address. */
2791 if (!strcmp(nat->type, "dnat")
2792 || !strcmp(nat->type, "dnat_and_snat")) {
2793 /* Packet when it goes from the initiator to destination.
2794 * We need to zero the inport because the router can
2795 * send the packet back through the same interface. */
09b39248
JP
2796 ds_clear(&match);
2797 ds_put_format(&match, "ip && ip4.dst == %s", nat->external_ip);
2798 ds_clear(&actions);
2799 ds_put_format(&actions,"inport = \"\"; ct_dnat(%s);",
2800 nat->logical_ip);
de297547 2801 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100,
09b39248 2802 ds_cstr(&match), ds_cstr(&actions));
de297547
GS
2803 }
2804
2805 /* Egress SNAT table: Packets enter the egress pipeline with
2806 * source ip address that needs to be SNATted to a external ip
2807 * address. */
2808 if (!strcmp(nat->type, "snat")
2809 || !strcmp(nat->type, "dnat_and_snat")) {
09b39248
JP
2810 ds_clear(&match);
2811 ds_put_format(&match, "ip && ip4.src == %s", nat->logical_ip);
2812 ds_clear(&actions);
2813 ds_put_format(&actions, "ct_snat(%s);", nat->external_ip);
de297547
GS
2814
2815 /* The priority here is calculated such that the
2816 * nat->logical_ip with the longest mask gets a higher
2817 * priority. */
2818 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT,
09b39248
JP
2819 count_1bits(ntohl(mask)) + 1,
2820 ds_cstr(&match), ds_cstr(&actions));
de297547
GS
2821 }
2822 }
2823
2824 /* Re-circulate every packet through the DNAT zone.
2825 * This helps with two things.
2826 *
2827 * 1. Any packet that needs to be unDNATed in the reverse
2828 * direction gets unDNATed. Ideally this could be done in
2829 * the egress pipeline. But since the gateway router
2830 * does not have any feature that depends on the source
2831 * ip address being external IP address for IP routing,
2832 * we can do it here, saving a future re-circulation.
2833 *
2834 * 2. Any packet that was sent through SNAT zone in the
2835 * previous table automatically gets re-circulated to get
2836 * back the new destination IP address that is needed for
2837 * routing in the openflow pipeline. */
2838 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50,
2839 "ip", "inport = \"\"; ct_dnat;");
2840 }
2841
94300e09 2842 /* Logical router ingress table 4: IP Routing.
9975d7be
BP
2843 *
2844 * A packet that arrives at this table is an IP packet that should be
0bac7164
BP
2845 * routed to the address in ip4.dst. This table sets outport to the correct
2846 * output port, eth.src to the output port's MAC address, and reg0 to the
2847 * next-hop IP address (leaving ip4.dst, the packet’s final destination,
2848 * unchanged), and advances to the next table for ARP resolution. */
9975d7be 2849 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 2850 if (!op->nbrp) {
9975d7be
BP
2851 continue;
2852 }
2853
4685e523
JP
2854 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
2855 add_route(lflows, op, op->lrp_networks.ipv4_addrs[i].addr_s,
2856 op->lrp_networks.ipv4_addrs[i].network_s,
2857 op->lrp_networks.ipv4_addrs[i].plen, NULL);
2858 }
9975d7be 2859 }
4685e523 2860
9975d7be
BP
2861 HMAP_FOR_EACH (od, key_node, datapaths) {
2862 if (!od->nbr) {
2863 continue;
2864 }
2865
28dc3fe9
SR
2866 /* Convert the static routes to flows. */
2867 for (int i = 0; i < od->nbr->n_static_routes; i++) {
2868 const struct nbrec_logical_router_static_route *route;
2869
2870 route = od->nbr->static_routes[i];
2871 build_static_route_flow(lflows, od, ports, route);
2872 }
9975d7be
BP
2873 }
2874 /* XXX destination unreachable */
2875
94300e09 2876 /* Local router ingress table 5: ARP Resolution.
9975d7be
BP
2877 *
2878 * Any packet that reaches this table is an IP packet whose next-hop IP
2879 * address is in reg0. (ip4.dst is the final destination.) This table
2880 * resolves the IP address in reg0 into an output port in outport and an
2881 * Ethernet address in eth.dst. */
2882 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 2883 if (op->nbrp) {
509afdc3
GS
2884 /* This is a logical router port. If next-hop IP address in 'reg0'
2885 * matches ip address of this router port, then the packet is
2886 * intended to eventually be sent to this logical port. Set the
2887 * destination mac address using this port's mac address.
2888 *
2889 * The packet is still in peer's logical pipeline. So the match
2890 * should be on peer's outport. */
cd150899 2891 if (op->peer && op->peer->nbrp) {
09b39248 2892 ds_clear(&match);
4685e523 2893 ds_put_format(&match, "outport == %s && reg0 == ",
cd150899 2894 op->peer->json_key);
4685e523
JP
2895 op_put_networks(&match, op, false);
2896
09b39248 2897 ds_clear(&actions);
4685e523
JP
2898 ds_put_format(&actions, "eth.dst = %s; next;",
2899 op->lrp_networks.ea_s);
cd150899 2900 ovn_lflow_add(lflows, op->peer->od, S_ROUTER_IN_ARP_RESOLVE,
09b39248 2901 100, ds_cstr(&match), ds_cstr(&actions));
509afdc3 2902 }
0ee00741 2903 } else if (op->od->n_router_ports && strcmp(op->nbsp->type, "router")) {
75cf9d2b
GS
2904 /* This is a logical switch port that backs a VM or a container.
2905 * Extract its addresses. For each of the address, go through all
2906 * the router ports attached to the switch (to which this port
2907 * connects) and if the address in question is reachable from the
2908 * router port, add an ARP entry in that router's pipeline. */
2909
e93b43d6 2910 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
4685e523 2911 const char *ea_s = op->lsp_addrs[i].ea_s;
e93b43d6 2912 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
4685e523 2913 const char *ip_s = op->lsp_addrs[i].ipv4_addrs[j].addr_s;
e93b43d6 2914 for (size_t k = 0; k < op->od->n_router_ports; k++) {
80f408f4
JP
2915 /* Get the Logical_Router_Port that the
2916 * Logical_Switch_Port is connected to, as
2917 * 'peer'. */
86e98048 2918 const char *peer_name = smap_get(
0ee00741 2919 &op->od->router_ports[k]->nbsp->options,
86e98048
BP
2920 "router-port");
2921 if (!peer_name) {
2922 continue;
2923 }
2924
e93b43d6 2925 struct ovn_port *peer = ovn_port_find(ports, peer_name);
0ee00741 2926 if (!peer || !peer->nbrp) {
86e98048
BP
2927 continue;
2928 }
2929
4685e523 2930 if (!find_lrp_member_ip(peer, ip_s)) {
86e98048
BP
2931 continue;
2932 }
2933
09b39248 2934 ds_clear(&match);
e93b43d6 2935 ds_put_format(&match, "outport == %s && reg0 == %s",
4685e523
JP
2936 peer->json_key, ip_s);
2937
09b39248 2938 ds_clear(&actions);
4685e523 2939 ds_put_format(&actions, "eth.dst = %s; next;", ea_s);
86e98048 2940 ovn_lflow_add(lflows, peer->od,
09b39248
JP
2941 S_ROUTER_IN_ARP_RESOLVE, 100,
2942 ds_cstr(&match), ds_cstr(&actions));
86e98048 2943 }
9975d7be
BP
2944 }
2945 }
0ee00741 2946 } else if (!strcmp(op->nbsp->type, "router")) {
75cf9d2b
GS
2947 /* This is a logical switch port that connects to a router. */
2948
2949 /* The peer of this switch port is the router port for which
2950 * we need to add logical flows such that it can resolve
2951 * ARP entries for all the other router ports connected to
2952 * the switch in question. */
2953
0ee00741 2954 const char *peer_name = smap_get(&op->nbsp->options,
75cf9d2b
GS
2955 "router-port");
2956 if (!peer_name) {
2957 continue;
2958 }
2959
2960 struct ovn_port *peer = ovn_port_find(ports, peer_name);
0ee00741 2961 if (!peer || !peer->nbrp) {
75cf9d2b
GS
2962 continue;
2963 }
2964
4685e523 2965 for (size_t i = 0; i < op->od->n_router_ports; i++) {
75cf9d2b 2966 const char *router_port_name = smap_get(
0ee00741 2967 &op->od->router_ports[i]->nbsp->options,
75cf9d2b
GS
2968 "router-port");
2969 struct ovn_port *router_port = ovn_port_find(ports,
2970 router_port_name);
0ee00741 2971 if (!router_port || !router_port->nbrp) {
75cf9d2b
GS
2972 continue;
2973 }
2974
2975 /* Skip the router port under consideration. */
2976 if (router_port == peer) {
2977 continue;
2978 }
2979
09b39248 2980 ds_clear(&match);
4685e523
JP
2981 ds_put_format(&match, "outport == %s && reg0 == ",
2982 peer->json_key);
2983 op_put_networks(&match, router_port, false);
2984
09b39248 2985 ds_clear(&actions);
4685e523
JP
2986 ds_put_format(&actions, "eth.dst = %s; next;",
2987 router_port->lrp_networks.ea_s);
75cf9d2b 2988 ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE,
09b39248 2989 100, ds_cstr(&match), ds_cstr(&actions));
75cf9d2b 2990 }
9975d7be
BP
2991 }
2992 }
75cf9d2b 2993
0bac7164
BP
2994 HMAP_FOR_EACH (od, key_node, datapaths) {
2995 if (!od->nbr) {
2996 continue;
2997 }
2998
2999 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "1",
3000 "get_arp(outport, reg0); next;");
3001 }
3002
94300e09 3003 /* Local router ingress table 6: ARP request.
0bac7164
BP
3004 *
3005 * In the common case where the Ethernet destination has been resolved,
94300e09
JP
3006 * this table outputs the packet (priority 0). Otherwise, it composes
3007 * and sends an ARP request (priority 100). */
0bac7164
BP
3008 HMAP_FOR_EACH (od, key_node, datapaths) {
3009 if (!od->nbr) {
3010 continue;
3011 }
3012
3013 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100,
3014 "eth.dst == 00:00:00:00:00:00",
3015 "arp { "
3016 "eth.dst = ff:ff:ff:ff:ff:ff; "
3017 "arp.spa = reg1; "
3018 "arp.op = 1; " /* ARP request */
3019 "output; "
3020 "};");
3021 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1", "output;");
3022 }
9975d7be 3023
de297547 3024 /* Logical router egress table 1: Delivery (priority 100).
9975d7be
BP
3025 *
3026 * Priority 100 rules deliver packets to enabled logical ports. */
3027 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 3028 if (!op->nbrp) {
9975d7be
BP
3029 continue;
3030 }
3031
0ee00741 3032 if (!lrport_is_enabled(op->nbrp)) {
9975d7be
BP
3033 /* Drop packets to disabled logical ports (since logical flow
3034 * tables are default-drop). */
3035 continue;
3036 }
3037
09b39248
JP
3038 ds_clear(&match);
3039 ds_put_format(&match, "outport == %s", op->json_key);
9975d7be 3040 ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100,
09b39248 3041 ds_cstr(&match), "output;");
9975d7be 3042 }
09b39248
JP
3043
3044 ds_destroy(&match);
3045 ds_destroy(&actions);
9975d7be
BP
3046}
3047
3048/* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
3049 * constructing their contents based on the OVN_NB database. */
3050static void
3051build_lflows(struct northd_context *ctx, struct hmap *datapaths,
3052 struct hmap *ports)
3053{
3054 struct hmap lflows = HMAP_INITIALIZER(&lflows);
3055 struct hmap mcgroups = HMAP_INITIALIZER(&mcgroups);
3056
3057 build_lswitch_flows(datapaths, ports, &lflows, &mcgroups);
3058 build_lrouter_flows(datapaths, ports, &lflows);
3059
5868eb24
BP
3060 /* Push changes to the Logical_Flow table to database. */
3061 const struct sbrec_logical_flow *sbflow, *next_sbflow;
3062 SBREC_LOGICAL_FLOW_FOR_EACH_SAFE (sbflow, next_sbflow, ctx->ovnsb_idl) {
3063 struct ovn_datapath *od
3064 = ovn_datapath_from_sbrec(datapaths, sbflow->logical_datapath);
3065 if (!od) {
3066 sbrec_logical_flow_delete(sbflow);
3067 continue;
eb00399e 3068 }
eb00399e 3069
9975d7be 3070 enum ovn_datapath_type dp_type = od->nbs ? DP_SWITCH : DP_ROUTER;
880fcd14
BP
3071 enum ovn_pipeline pipeline
3072 = !strcmp(sbflow->pipeline, "ingress") ? P_IN : P_OUT;
5868eb24 3073 struct ovn_lflow *lflow = ovn_lflow_find(
880fcd14
BP
3074 &lflows, od, ovn_stage_build(dp_type, pipeline, sbflow->table_id),
3075 sbflow->priority, sbflow->match, sbflow->actions);
5868eb24
BP
3076 if (lflow) {
3077 ovn_lflow_destroy(&lflows, lflow);
3078 } else {
3079 sbrec_logical_flow_delete(sbflow);
4edcdcf4
RB
3080 }
3081 }
5868eb24
BP
3082 struct ovn_lflow *lflow, *next_lflow;
3083 HMAP_FOR_EACH_SAFE (lflow, next_lflow, hmap_node, &lflows) {
880fcd14
BP
3084 enum ovn_pipeline pipeline = ovn_stage_get_pipeline(lflow->stage);
3085 uint8_t table = ovn_stage_get_table(lflow->stage);
3086
5868eb24
BP
3087 sbflow = sbrec_logical_flow_insert(ctx->ovnsb_txn);
3088 sbrec_logical_flow_set_logical_datapath(sbflow, lflow->od->sb);
9975d7be
BP
3089 sbrec_logical_flow_set_pipeline(
3090 sbflow, pipeline == P_IN ? "ingress" : "egress");
880fcd14 3091 sbrec_logical_flow_set_table_id(sbflow, table);
5868eb24
BP
3092 sbrec_logical_flow_set_priority(sbflow, lflow->priority);
3093 sbrec_logical_flow_set_match(sbflow, lflow->match);
3094 sbrec_logical_flow_set_actions(sbflow, lflow->actions);
091e3af9 3095
880fcd14
BP
3096 const struct smap ids = SMAP_CONST1(&ids, "stage-name",
3097 ovn_stage_to_str(lflow->stage));
aaf881c6 3098 sbrec_logical_flow_set_external_ids(sbflow, &ids);
091e3af9 3099
5868eb24 3100 ovn_lflow_destroy(&lflows, lflow);
eb00399e 3101 }
5868eb24
BP
3102 hmap_destroy(&lflows);
3103
3104 /* Push changes to the Multicast_Group table to database. */
3105 const struct sbrec_multicast_group *sbmc, *next_sbmc;
3106 SBREC_MULTICAST_GROUP_FOR_EACH_SAFE (sbmc, next_sbmc, ctx->ovnsb_idl) {
3107 struct ovn_datapath *od = ovn_datapath_from_sbrec(datapaths,
3108 sbmc->datapath);
3109 if (!od) {
3110 sbrec_multicast_group_delete(sbmc);
3111 continue;
3112 }
eb00399e 3113
5868eb24
BP
3114 struct multicast_group group = { .name = sbmc->name,
3115 .key = sbmc->tunnel_key };
3116 struct ovn_multicast *mc = ovn_multicast_find(&mcgroups, od, &group);
3117 if (mc) {
3118 ovn_multicast_update_sbrec(mc, sbmc);
3119 ovn_multicast_destroy(&mcgroups, mc);
3120 } else {
3121 sbrec_multicast_group_delete(sbmc);
3122 }
3123 }
3124 struct ovn_multicast *mc, *next_mc;
3125 HMAP_FOR_EACH_SAFE (mc, next_mc, hmap_node, &mcgroups) {
3126 sbmc = sbrec_multicast_group_insert(ctx->ovnsb_txn);
3127 sbrec_multicast_group_set_datapath(sbmc, mc->datapath->sb);
3128 sbrec_multicast_group_set_name(sbmc, mc->group->name);
3129 sbrec_multicast_group_set_tunnel_key(sbmc, mc->group->key);
3130 ovn_multicast_update_sbrec(mc, sbmc);
3131 ovn_multicast_destroy(&mcgroups, mc);
4edcdcf4 3132 }
5868eb24 3133 hmap_destroy(&mcgroups);
4edcdcf4 3134}
ea382567
RB
3135
3136/* OVN_Northbound and OVN_Southbound have an identical Address_Set table.
3137 * We always update OVN_Southbound to match the current data in
3138 * OVN_Northbound, so that the address sets used in Logical_Flows in
3139 * OVN_Southbound is checked against the proper set.*/
3140static void
3141sync_address_sets(struct northd_context *ctx)
3142{
3143 struct shash sb_address_sets = SHASH_INITIALIZER(&sb_address_sets);
3144
3145 const struct sbrec_address_set *sb_address_set;
3146 SBREC_ADDRESS_SET_FOR_EACH (sb_address_set, ctx->ovnsb_idl) {
3147 shash_add(&sb_address_sets, sb_address_set->name, sb_address_set);
3148 }
3149
3150 const struct nbrec_address_set *nb_address_set;
3151 NBREC_ADDRESS_SET_FOR_EACH (nb_address_set, ctx->ovnnb_idl) {
3152 sb_address_set = shash_find_and_delete(&sb_address_sets,
3153 nb_address_set->name);
3154 if (!sb_address_set) {
3155 sb_address_set = sbrec_address_set_insert(ctx->ovnsb_txn);
3156 sbrec_address_set_set_name(sb_address_set, nb_address_set->name);
3157 }
3158
3159 sbrec_address_set_set_addresses(sb_address_set,
3160 /* "char **" is not compatible with "const char **" */
3161 (const char **) nb_address_set->addresses,
3162 nb_address_set->n_addresses);
3163 }
3164
3165 struct shash_node *node, *next;
3166 SHASH_FOR_EACH_SAFE (node, next, &sb_address_sets) {
3167 sbrec_address_set_delete(node->data);
3168 shash_delete(&sb_address_sets, node);
3169 }
3170 shash_destroy(&sb_address_sets);
3171}
5868eb24 3172\f
4edcdcf4 3173static void
331e7aef 3174ovnnb_db_run(struct northd_context *ctx)
4edcdcf4 3175{
331e7aef
NS
3176 if (!ctx->ovnsb_txn) {
3177 return;
3178 }
5868eb24
BP
3179 struct hmap datapaths, ports;
3180 build_datapaths(ctx, &datapaths);
3181 build_ports(ctx, &datapaths, &ports);
3182 build_lflows(ctx, &datapaths, &ports);
3183
ea382567
RB
3184 sync_address_sets(ctx);
3185
5868eb24
BP
3186 struct ovn_datapath *dp, *next_dp;
3187 HMAP_FOR_EACH_SAFE (dp, next_dp, key_node, &datapaths) {
3188 ovn_datapath_destroy(&datapaths, dp);
3189 }
3190 hmap_destroy(&datapaths);
3191
3192 struct ovn_port *port, *next_port;
3193 HMAP_FOR_EACH_SAFE (port, next_port, key_node, &ports) {
3194 ovn_port_destroy(&ports, port);
3195 }
3196 hmap_destroy(&ports);
ac0630a2
RB
3197}
3198
f93818dd
RB
3199/*
3200 * The only change we get notified about is if the 'chassis' column of the
dcda6e0d
BP
3201 * 'Port_Binding' table changes. When this column is not empty, it means we
3202 * need to set the corresponding logical port as 'up' in the northbound DB.
f93818dd 3203 */
ac0630a2 3204static void
331e7aef 3205ovnsb_db_run(struct northd_context *ctx)
ac0630a2 3206{
331e7aef
NS
3207 if (!ctx->ovnnb_txn) {
3208 return;
3209 }
fc3113bc 3210 struct hmap lports_hmap;
5868eb24 3211 const struct sbrec_port_binding *sb;
0ee00741 3212 const struct nbrec_logical_switch_port *nbsp;
fc3113bc
RB
3213
3214 struct lport_hash_node {
3215 struct hmap_node node;
0ee00741 3216 const struct nbrec_logical_switch_port *nbsp;
4ec3d7c7 3217 } *hash_node;
f93818dd 3218
fc3113bc 3219 hmap_init(&lports_hmap);
f93818dd 3220
0ee00741 3221 NBREC_LOGICAL_SWITCH_PORT_FOR_EACH(nbsp, ctx->ovnnb_idl) {
fc3113bc 3222 hash_node = xzalloc(sizeof *hash_node);
0ee00741
HK
3223 hash_node->nbsp = nbsp;
3224 hmap_insert(&lports_hmap, &hash_node->node, hash_string(nbsp->name, 0));
fc3113bc
RB
3225 }
3226
5868eb24 3227 SBREC_PORT_BINDING_FOR_EACH(sb, ctx->ovnsb_idl) {
0ee00741 3228 nbsp = NULL;
fc3113bc 3229 HMAP_FOR_EACH_WITH_HASH(hash_node, node,
5868eb24
BP
3230 hash_string(sb->logical_port, 0),
3231 &lports_hmap) {
0ee00741
HK
3232 if (!strcmp(sb->logical_port, hash_node->nbsp->name)) {
3233 nbsp = hash_node->nbsp;
fc3113bc
RB
3234 break;
3235 }
f93818dd
RB
3236 }
3237
0ee00741 3238 if (!nbsp) {
dcda6e0d 3239 /* The logical port doesn't exist for this port binding. This can
2e2762d4 3240 * happen under normal circumstances when ovn-northd hasn't gotten
dcda6e0d 3241 * around to pruning the Port_Binding yet. */
f93818dd
RB
3242 continue;
3243 }
3244
0ee00741 3245 if (sb->chassis && (!nbsp->up || !*nbsp->up)) {
f93818dd 3246 bool up = true;
0ee00741
HK
3247 nbrec_logical_switch_port_set_up(nbsp, &up, 1);
3248 } else if (!sb->chassis && (!nbsp->up || *nbsp->up)) {
f93818dd 3249 bool up = false;
0ee00741 3250 nbrec_logical_switch_port_set_up(nbsp, &up, 1);
f93818dd
RB
3251 }
3252 }
fc3113bc 3253
4ec3d7c7 3254 HMAP_FOR_EACH_POP(hash_node, node, &lports_hmap) {
fc3113bc
RB
3255 free(hash_node);
3256 }
3257 hmap_destroy(&lports_hmap);
ac0630a2
RB
3258}
3259\f
45f98d4c 3260
281977f7
NS
3261static struct dhcp_opts_map supported_dhcp_opts[] = {
3262 OFFERIP,
3263 DHCP_OPT_NETMASK,
3264 DHCP_OPT_ROUTER,
3265 DHCP_OPT_DNS_SERVER,
3266 DHCP_OPT_LOG_SERVER,
3267 DHCP_OPT_LPR_SERVER,
3268 DHCP_OPT_SWAP_SERVER,
3269 DHCP_OPT_POLICY_FILTER,
3270 DHCP_OPT_ROUTER_SOLICITATION,
3271 DHCP_OPT_NIS_SERVER,
3272 DHCP_OPT_NTP_SERVER,
3273 DHCP_OPT_SERVER_ID,
3274 DHCP_OPT_TFTP_SERVER,
3275 DHCP_OPT_CLASSLESS_STATIC_ROUTE,
3276 DHCP_OPT_MS_CLASSLESS_STATIC_ROUTE,
3277 DHCP_OPT_IP_FORWARD_ENABLE,
3278 DHCP_OPT_ROUTER_DISCOVERY,
3279 DHCP_OPT_ETHERNET_ENCAP,
3280 DHCP_OPT_DEFAULT_TTL,
3281 DHCP_OPT_TCP_TTL,
3282 DHCP_OPT_MTU,
3283 DHCP_OPT_LEASE_TIME,
3284 DHCP_OPT_T1,
3285 DHCP_OPT_T2
3286};
3287
3288static void
3289check_and_add_supported_dhcp_opts_to_sb_db(struct northd_context *ctx)
3290{
3291 struct hmap dhcp_opts_to_add = HMAP_INITIALIZER(&dhcp_opts_to_add);
3292 for (size_t i = 0; (i < sizeof(supported_dhcp_opts) /
3293 sizeof(supported_dhcp_opts[0])); i++) {
3294 hmap_insert(&dhcp_opts_to_add, &supported_dhcp_opts[i].hmap_node,
3295 dhcp_opt_hash(supported_dhcp_opts[i].name));
3296 }
3297
3298 const struct sbrec_dhcp_options *opt_row, *opt_row_next;
3299 SBREC_DHCP_OPTIONS_FOR_EACH_SAFE(opt_row, opt_row_next, ctx->ovnsb_idl) {
3300 struct dhcp_opts_map *dhcp_opt =
3301 dhcp_opts_find(&dhcp_opts_to_add, opt_row->name);
3302 if (dhcp_opt) {
3303 hmap_remove(&dhcp_opts_to_add, &dhcp_opt->hmap_node);
3304 } else {
3305 sbrec_dhcp_options_delete(opt_row);
3306 }
3307 }
3308
3309 struct dhcp_opts_map *opt;
3310 HMAP_FOR_EACH (opt, hmap_node, &dhcp_opts_to_add) {
3311 struct sbrec_dhcp_options *sbrec_dhcp_option =
3312 sbrec_dhcp_options_insert(ctx->ovnsb_txn);
3313 sbrec_dhcp_options_set_name(sbrec_dhcp_option, opt->name);
3314 sbrec_dhcp_options_set_code(sbrec_dhcp_option, opt->code);
3315 sbrec_dhcp_options_set_type(sbrec_dhcp_option, opt->type);
3316 }
3317
3318 hmap_destroy(&dhcp_opts_to_add);
3319}
3320
60bdd011 3321static char *default_nb_db_;
45f98d4c 3322
ac0630a2 3323static const char *
60bdd011 3324default_nb_db(void)
ac0630a2 3325{
60bdd011
RM
3326 if (!default_nb_db_) {
3327 default_nb_db_ = xasprintf("unix:%s/ovnnb_db.sock", ovs_rundir());
ac0630a2 3328 }
60bdd011
RM
3329 return default_nb_db_;
3330}
3331
3332static char *default_sb_db_;
3333
3334static const char *
3335default_sb_db(void)
3336{
3337 if (!default_sb_db_) {
3338 default_sb_db_ = xasprintf("unix:%s/ovnsb_db.sock", ovs_rundir());
3339 }
3340 return default_sb_db_;
ac0630a2
RB
3341}
3342
3343static void
3344parse_options(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
3345{
3346 enum {
67d9b930 3347 DAEMON_OPTION_ENUMS,
ac0630a2
RB
3348 VLOG_OPTION_ENUMS,
3349 };
3350 static const struct option long_options[] = {
ec78987f 3351 {"ovnsb-db", required_argument, NULL, 'd'},
ac0630a2
RB
3352 {"ovnnb-db", required_argument, NULL, 'D'},
3353 {"help", no_argument, NULL, 'h'},
3354 {"options", no_argument, NULL, 'o'},
3355 {"version", no_argument, NULL, 'V'},
67d9b930 3356 DAEMON_LONG_OPTIONS,
ac0630a2
RB
3357 VLOG_LONG_OPTIONS,
3358 STREAM_SSL_LONG_OPTIONS,
3359 {NULL, 0, NULL, 0},
3360 };
3361 char *short_options = ovs_cmdl_long_options_to_short_options(long_options);
3362
3363 for (;;) {
3364 int c;
3365
3366 c = getopt_long(argc, argv, short_options, long_options, NULL);
3367 if (c == -1) {
3368 break;
3369 }
3370
3371 switch (c) {
67d9b930 3372 DAEMON_OPTION_HANDLERS;
ac0630a2
RB
3373 VLOG_OPTION_HANDLERS;
3374 STREAM_SSL_OPTION_HANDLERS;
3375
3376 case 'd':
ec78987f 3377 ovnsb_db = optarg;
ac0630a2
RB
3378 break;
3379
3380 case 'D':
3381 ovnnb_db = optarg;
3382 break;
3383
3384 case 'h':
3385 usage();
3386 exit(EXIT_SUCCESS);
3387
3388 case 'o':
3389 ovs_cmdl_print_options(long_options);
3390 exit(EXIT_SUCCESS);
3391
3392 case 'V':
3393 ovs_print_version(0, 0);
3394 exit(EXIT_SUCCESS);
3395
3396 default:
3397 break;
3398 }
3399 }
3400
ec78987f 3401 if (!ovnsb_db) {
60bdd011 3402 ovnsb_db = default_sb_db();
ac0630a2
RB
3403 }
3404
3405 if (!ovnnb_db) {
60bdd011 3406 ovnnb_db = default_nb_db();
ac0630a2
RB
3407 }
3408
3409 free(short_options);
3410}
3411
5868eb24
BP
3412static void
3413add_column_noalert(struct ovsdb_idl *idl,
3414 const struct ovsdb_idl_column *column)
3415{
3416 ovsdb_idl_add_column(idl, column);
3417 ovsdb_idl_omit_alert(idl, column);
3418}
3419
ac0630a2
RB
3420int
3421main(int argc, char *argv[])
3422{
ac0630a2 3423 int res = EXIT_SUCCESS;
7b303ff9
AW
3424 struct unixctl_server *unixctl;
3425 int retval;
3426 bool exiting;
ac0630a2
RB
3427
3428 fatal_ignore_sigpipe();
3429 set_program_name(argv[0]);
485f0696 3430 service_start(&argc, &argv);
ac0630a2 3431 parse_options(argc, argv);
67d9b930 3432
e91b927d 3433 daemonize_start(false);
7b303ff9
AW
3434
3435 retval = unixctl_server_create(NULL, &unixctl);
3436 if (retval) {
3437 exit(EXIT_FAILURE);
3438 }
3439 unixctl_command_register("exit", "", 0, 0, ovn_northd_exit, &exiting);
3440
3441 daemonize_complete();
67d9b930 3442
ac0630a2 3443 nbrec_init();
ec78987f 3444 sbrec_init();
ac0630a2
RB
3445
3446 /* We want to detect all changes to the ovn-nb db. */
331e7aef
NS
3447 struct ovsdb_idl_loop ovnnb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
3448 ovsdb_idl_create(ovnnb_db, &nbrec_idl_class, true, true));
3449
3450 struct ovsdb_idl_loop ovnsb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
3451 ovsdb_idl_create(ovnsb_db, &sbrec_idl_class, false, true));
3452
3453 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_logical_flow);
3454 add_column_noalert(ovnsb_idl_loop.idl,
3455 &sbrec_logical_flow_col_logical_datapath);
3456 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_pipeline);
3457 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_table_id);
3458 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_priority);
3459 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_match);
3460 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_actions);
3461
3462 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_multicast_group);
3463 add_column_noalert(ovnsb_idl_loop.idl,
3464 &sbrec_multicast_group_col_datapath);
3465 add_column_noalert(ovnsb_idl_loop.idl,
3466 &sbrec_multicast_group_col_tunnel_key);
3467 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_name);
3468 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_ports);
3469
3470 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_datapath_binding);
3471 add_column_noalert(ovnsb_idl_loop.idl,
3472 &sbrec_datapath_binding_col_tunnel_key);
3473 add_column_noalert(ovnsb_idl_loop.idl,
3474 &sbrec_datapath_binding_col_external_ids);
3475
3476 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_port_binding);
3477 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_datapath);
3478 add_column_noalert(ovnsb_idl_loop.idl,
3479 &sbrec_port_binding_col_logical_port);
3480 add_column_noalert(ovnsb_idl_loop.idl,
3481 &sbrec_port_binding_col_tunnel_key);
3482 add_column_noalert(ovnsb_idl_loop.idl,
3483 &sbrec_port_binding_col_parent_port);
3484 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_tag);
3485 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_type);
3486 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_options);
3487 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_mac);
3488 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_port_binding_col_chassis);
281977f7
NS
3489 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_dhcp_options);
3490 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_code);
3491 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_type);
3492 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_name);
331e7aef 3493
ea382567
RB
3494 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_address_set);
3495 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_name);
3496 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_addresses);
3497
331e7aef 3498 /* Main loop. */
7b303ff9
AW
3499 exiting = false;
3500 while (!exiting) {
331e7aef
NS
3501 struct northd_context ctx = {
3502 .ovnnb_idl = ovnnb_idl_loop.idl,
3503 .ovnnb_txn = ovsdb_idl_loop_run(&ovnnb_idl_loop),
3504 .ovnsb_idl = ovnsb_idl_loop.idl,
3505 .ovnsb_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop),
3506 };
ac0630a2 3507
8c0fae89
NS
3508 ovnnb_db_run(&ctx);
3509 ovnsb_db_run(&ctx);
281977f7
NS
3510 if (ctx.ovnsb_txn) {
3511 check_and_add_supported_dhcp_opts_to_sb_db(&ctx);
3512 }
f93818dd 3513
331e7aef
NS
3514 unixctl_server_run(unixctl);
3515 unixctl_server_wait(unixctl);
3516 if (exiting) {
3517 poll_immediate_wake();
ac0630a2 3518 }
331e7aef
NS
3519 ovsdb_idl_loop_commit_and_wait(&ovnnb_idl_loop);
3520 ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);
ac0630a2 3521
331e7aef 3522 poll_block();
485f0696
GS
3523 if (should_service_stop()) {
3524 exiting = true;
3525 }
ac0630a2
RB
3526 }
3527
7b303ff9 3528 unixctl_server_destroy(unixctl);
331e7aef
NS
3529 ovsdb_idl_loop_destroy(&ovnnb_idl_loop);
3530 ovsdb_idl_loop_destroy(&ovnsb_idl_loop);
485f0696 3531 service_stop();
ac0630a2 3532
60bdd011
RM
3533 free(default_nb_db_);
3534 free(default_sb_db_);
ac0630a2
RB
3535 exit(res);
3536}
7b303ff9
AW
3537
3538static void
3539ovn_northd_exit(struct unixctl_conn *conn, int argc OVS_UNUSED,
3540 const char *argv[] OVS_UNUSED, void *exiting_)
3541{
3542 bool *exiting = exiting_;
3543 *exiting = true;
3544
3545 unixctl_command_reply(conn, NULL);
3546}