]> git.proxmox.com Git - mirror_ovs.git/blame - ovn/northd/ovn-northd.c
ovn-northd: support IPAM with externally specified MAC
[mirror_ovs.git] / ovn / northd / ovn-northd.c
CommitLineData
ac0630a2
RB
1/*
2 * Licensed under the Apache License, Version 2.0 (the "License");
3 * you may not use this file except in compliance with the License.
4 * You may obtain a copy of the License at:
5 *
6 * http://www.apache.org/licenses/LICENSE-2.0
7 *
8 * Unless required by applicable law or agreed to in writing, software
9 * distributed under the License is distributed on an "AS IS" BASIS,
10 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 * See the License for the specific language governing permissions and
12 * limitations under the License.
13 */
14
15#include <config.h>
16
17#include <getopt.h>
18#include <stdlib.h>
19#include <stdio.h>
20
b511690b 21#include "bitmap.h"
ac0630a2 22#include "command-line.h"
67d9b930 23#include "daemon.h"
ac0630a2 24#include "dirs.h"
3e8a2ad1 25#include "openvswitch/dynamic-string.h"
ac0630a2 26#include "fatal-signal.h"
4edcdcf4 27#include "hash.h"
ee89ea7b
TW
28#include "openvswitch/hmap.h"
29#include "openvswitch/json.h"
8b2ed684 30#include "ovn/lex.h"
281977f7 31#include "ovn/lib/ovn-dhcp.h"
e3df8838
BP
32#include "ovn/lib/ovn-nb-idl.h"
33#include "ovn/lib/ovn-sb-idl.h"
218351dd 34#include "ovn/lib/ovn-util.h"
a6095f81 35#include "ovn/actions.h"
064d7f84 36#include "packets.h"
ac0630a2 37#include "poll-loop.h"
5868eb24 38#include "smap.h"
7a15be69 39#include "sset.h"
ac0630a2
RB
40#include "stream.h"
41#include "stream-ssl.h"
7b303ff9 42#include "unixctl.h"
ac0630a2 43#include "util.h"
4edcdcf4 44#include "uuid.h"
ac0630a2
RB
45#include "openvswitch/vlog.h"
46
2e2762d4 47VLOG_DEFINE_THIS_MODULE(ovn_northd);
ac0630a2 48
7b303ff9
AW
49static unixctl_cb_func ovn_northd_exit;
50
2e2762d4 51struct northd_context {
f93818dd 52 struct ovsdb_idl *ovnnb_idl;
ec78987f 53 struct ovsdb_idl *ovnsb_idl;
f93818dd 54 struct ovsdb_idl_txn *ovnnb_txn;
3c78b3ca 55 struct ovsdb_idl_txn *ovnsb_txn;
f93818dd
RB
56};
57
ac0630a2 58static const char *ovnnb_db;
ec78987f 59static const char *ovnsb_db;
ac0630a2 60
8639f9be
ND
61#define MAC_ADDR_PREFIX 0x0A0000000000ULL
62#define MAC_ADDR_SPACE 0xffffff
63
64/* MAC address management (macam) table of "struct eth_addr"s, that holds the
65 * MAC addresses allocated by the OVN ipam module. */
66static struct hmap macam = HMAP_INITIALIZER(&macam);
b511690b
GS
67
68#define MAX_OVN_TAGS 4096
880fcd14
BP
69\f
70/* Pipeline stages. */
ac0630a2 71
880fcd14
BP
72/* The two pipelines in an OVN logical flow table. */
73enum ovn_pipeline {
74 P_IN, /* Ingress pipeline. */
75 P_OUT /* Egress pipeline. */
76};
091e3af9 77
880fcd14
BP
78/* The two purposes for which ovn-northd uses OVN logical datapaths. */
79enum ovn_datapath_type {
80 DP_SWITCH, /* OVN logical switch. */
81 DP_ROUTER /* OVN logical router. */
091e3af9
JP
82};
83
880fcd14
BP
84/* Returns an "enum ovn_stage" built from the arguments.
85 *
86 * (It's better to use ovn_stage_build() for type-safety reasons, but inline
87 * functions can't be used in enums or switch cases.) */
88#define OVN_STAGE_BUILD(DP_TYPE, PIPELINE, TABLE) \
89 (((DP_TYPE) << 9) | ((PIPELINE) << 8) | (TABLE))
90
91/* A stage within an OVN logical switch or router.
091e3af9 92 *
880fcd14
BP
93 * An "enum ovn_stage" indicates whether the stage is part of a logical switch
94 * or router, whether the stage is part of the ingress or egress pipeline, and
95 * the table within that pipeline. The first three components are combined to
685f4dfe 96 * form the stage's full name, e.g. S_SWITCH_IN_PORT_SEC_L2,
880fcd14
BP
97 * S_ROUTER_OUT_DELIVERY. */
98enum ovn_stage {
1a03fc7d
BS
99#define PIPELINE_STAGES \
100 /* Logical switch ingress stages. */ \
101 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_L2, 0, "ls_in_port_sec_l2") \
102 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_IP, 1, "ls_in_port_sec_ip") \
103 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_ND, 2, "ls_in_port_sec_nd") \
104 PIPELINE_STAGE(SWITCH, IN, PRE_ACL, 3, "ls_in_pre_acl") \
105 PIPELINE_STAGE(SWITCH, IN, PRE_LB, 4, "ls_in_pre_lb") \
106 PIPELINE_STAGE(SWITCH, IN, PRE_STATEFUL, 5, "ls_in_pre_stateful") \
107 PIPELINE_STAGE(SWITCH, IN, ACL, 6, "ls_in_acl") \
108 PIPELINE_STAGE(SWITCH, IN, QOS_MARK, 7, "ls_in_qos_mark") \
109 PIPELINE_STAGE(SWITCH, IN, LB, 8, "ls_in_lb") \
110 PIPELINE_STAGE(SWITCH, IN, STATEFUL, 9, "ls_in_stateful") \
111 PIPELINE_STAGE(SWITCH, IN, ARP_ND_RSP, 10, "ls_in_arp_rsp") \
112 PIPELINE_STAGE(SWITCH, IN, DHCP_OPTIONS, 11, "ls_in_dhcp_options") \
113 PIPELINE_STAGE(SWITCH, IN, DHCP_RESPONSE, 12, "ls_in_dhcp_response") \
114 PIPELINE_STAGE(SWITCH, IN, L2_LKUP, 13, "ls_in_l2_lkup") \
e0c9e58b
JP
115 \
116 /* Logical switch egress stages. */ \
7a15be69
GS
117 PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 0, "ls_out_pre_lb") \
118 PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 1, "ls_out_pre_acl") \
119 PIPELINE_STAGE(SWITCH, OUT, PRE_STATEFUL, 2, "ls_out_pre_stateful") \
120 PIPELINE_STAGE(SWITCH, OUT, LB, 3, "ls_out_lb") \
121 PIPELINE_STAGE(SWITCH, OUT, ACL, 4, "ls_out_acl") \
1a03fc7d
BS
122 PIPELINE_STAGE(SWITCH, OUT, QOS_MARK, 5, "ls_out_qos_mark") \
123 PIPELINE_STAGE(SWITCH, OUT, STATEFUL, 6, "ls_out_stateful") \
124 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_IP, 7, "ls_out_port_sec_ip") \
125 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_L2, 8, "ls_out_port_sec_l2") \
e0c9e58b
JP
126 \
127 /* Logical router ingress stages. */ \
128 PIPELINE_STAGE(ROUTER, IN, ADMISSION, 0, "lr_in_admission") \
129 PIPELINE_STAGE(ROUTER, IN, IP_INPUT, 1, "lr_in_ip_input") \
cc4583aa
GS
130 PIPELINE_STAGE(ROUTER, IN, DEFRAG, 2, "lr_in_defrag") \
131 PIPELINE_STAGE(ROUTER, IN, UNSNAT, 3, "lr_in_unsnat") \
132 PIPELINE_STAGE(ROUTER, IN, DNAT, 4, "lr_in_dnat") \
133 PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 5, "lr_in_ip_routing") \
134 PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 6, "lr_in_arp_resolve") \
135 PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 7, "lr_in_arp_request") \
e0c9e58b
JP
136 \
137 /* Logical router egress stages. */ \
de297547
GS
138 PIPELINE_STAGE(ROUTER, OUT, SNAT, 0, "lr_out_snat") \
139 PIPELINE_STAGE(ROUTER, OUT, DELIVERY, 1, "lr_out_delivery")
880fcd14
BP
140
141#define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
142 S_##DP_TYPE##_##PIPELINE##_##STAGE \
143 = OVN_STAGE_BUILD(DP_##DP_TYPE, P_##PIPELINE, TABLE),
144 PIPELINE_STAGES
145#undef PIPELINE_STAGE
091e3af9
JP
146};
147
6bb4a18e
JP
148/* Due to various hard-coded priorities need to implement ACLs, the
149 * northbound database supports a smaller range of ACL priorities than
150 * are available to logical flows. This value is added to an ACL
151 * priority to determine the ACL's logical flow priority. */
152#define OVN_ACL_PRI_OFFSET 1000
153
facf8652 154#define REGBIT_CONNTRACK_DEFRAG "reg0[0]"
fa313a8c 155#define REGBIT_CONNTRACK_COMMIT "reg0[1]"
7a15be69 156#define REGBIT_CONNTRACK_NAT "reg0[2]"
281977f7 157#define REGBIT_DHCP_OPTS_RESULT "reg0[3]"
facf8652 158
880fcd14
BP
159/* Returns an "enum ovn_stage" built from the arguments. */
160static enum ovn_stage
161ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline,
162 uint8_t table)
163{
164 return OVN_STAGE_BUILD(dp_type, pipeline, table);
165}
166
167/* Returns the pipeline to which 'stage' belongs. */
168static enum ovn_pipeline
169ovn_stage_get_pipeline(enum ovn_stage stage)
170{
171 return (stage >> 8) & 1;
172}
173
174/* Returns the table to which 'stage' belongs. */
175static uint8_t
176ovn_stage_get_table(enum ovn_stage stage)
177{
178 return stage & 0xff;
179}
180
181/* Returns a string name for 'stage'. */
182static const char *
183ovn_stage_to_str(enum ovn_stage stage)
184{
185 switch (stage) {
186#define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
187 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return NAME;
188 PIPELINE_STAGES
189#undef PIPELINE_STAGE
190 default: return "<unknown>";
191 }
192}
9a9961d2
BP
193
194/* Returns the type of the datapath to which a flow with the given 'stage' may
195 * be added. */
196static enum ovn_datapath_type
197ovn_stage_to_datapath_type(enum ovn_stage stage)
198{
199 switch (stage) {
200#define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
201 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return DP_##DP_TYPE;
202 PIPELINE_STAGES
203#undef PIPELINE_STAGE
204 default: OVS_NOT_REACHED();
205 }
206}
880fcd14 207\f
ac0630a2
RB
208static void
209usage(void)
210{
211 printf("\
212%s: OVN northbound management daemon\n\
213usage: %s [OPTIONS]\n\
214\n\
215Options:\n\
216 --ovnnb-db=DATABASE connect to ovn-nb database at DATABASE\n\
217 (default: %s)\n\
ec78987f 218 --ovnsb-db=DATABASE connect to ovn-sb database at DATABASE\n\
ac0630a2
RB
219 (default: %s)\n\
220 -h, --help display this help message\n\
221 -o, --options list available options\n\
222 -V, --version display version information\n\
60bdd011 223", program_name, program_name, default_nb_db(), default_sb_db());
67d9b930 224 daemon_usage();
ac0630a2
RB
225 vlog_usage();
226 stream_usage("database", true, true, false);
227}
228\f
5868eb24
BP
229struct tnlid_node {
230 struct hmap_node hmap_node;
231 uint32_t tnlid;
232};
233
234static void
235destroy_tnlids(struct hmap *tnlids)
4edcdcf4 236{
4ec3d7c7
DDP
237 struct tnlid_node *node;
238 HMAP_FOR_EACH_POP (node, hmap_node, tnlids) {
5868eb24
BP
239 free(node);
240 }
241 hmap_destroy(tnlids);
242}
243
244static void
245add_tnlid(struct hmap *set, uint32_t tnlid)
246{
247 struct tnlid_node *node = xmalloc(sizeof *node);
248 hmap_insert(set, &node->hmap_node, hash_int(tnlid, 0));
249 node->tnlid = tnlid;
4edcdcf4
RB
250}
251
4edcdcf4 252static bool
5868eb24 253tnlid_in_use(const struct hmap *set, uint32_t tnlid)
4edcdcf4 254{
5868eb24
BP
255 const struct tnlid_node *node;
256 HMAP_FOR_EACH_IN_BUCKET (node, hmap_node, hash_int(tnlid, 0), set) {
257 if (node->tnlid == tnlid) {
258 return true;
259 }
260 }
261 return false;
262}
4edcdcf4 263
5868eb24
BP
264static uint32_t
265allocate_tnlid(struct hmap *set, const char *name, uint32_t max,
266 uint32_t *hint)
267{
268 for (uint32_t tnlid = *hint + 1; tnlid != *hint;
269 tnlid = tnlid + 1 <= max ? tnlid + 1 : 1) {
270 if (!tnlid_in_use(set, tnlid)) {
271 add_tnlid(set, tnlid);
272 *hint = tnlid;
273 return tnlid;
274 }
4edcdcf4
RB
275 }
276
5868eb24
BP
277 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
278 VLOG_WARN_RL(&rl, "all %s tunnel ids exhausted", name);
279 return 0;
280}
281\f
a6095f81
BS
282struct ovn_chassis_qdisc_queues {
283 struct hmap_node key_node;
284 uint32_t queue_id;
285 struct uuid chassis_uuid;
286};
287
288static void
289destroy_chassis_queues(struct hmap *set)
290{
291 struct ovn_chassis_qdisc_queues *node;
292 HMAP_FOR_EACH_POP (node, key_node, set) {
293 free(node);
294 }
295 hmap_destroy(set);
296}
297
298static void
299add_chassis_queue(struct hmap *set, struct uuid *chassis_uuid,
300 uint32_t queue_id)
301{
302 struct ovn_chassis_qdisc_queues *node = xmalloc(sizeof *node);
303 node->queue_id = queue_id;
304 memcpy(&node->chassis_uuid, chassis_uuid, sizeof node->chassis_uuid);
305 hmap_insert(set, &node->key_node, uuid_hash(chassis_uuid));
306}
307
308static bool
309chassis_queueid_in_use(const struct hmap *set, struct uuid *chassis_uuid,
310 uint32_t queue_id)
311{
312 const struct ovn_chassis_qdisc_queues *node;
313 HMAP_FOR_EACH_WITH_HASH (node, key_node, uuid_hash(chassis_uuid), set) {
314 if (uuid_equals(chassis_uuid, &node->chassis_uuid)
315 && node->queue_id == queue_id) {
316 return true;
317 }
318 }
319 return false;
320}
321
322static uint32_t
323allocate_chassis_queueid(struct hmap *set, struct sbrec_chassis *chassis)
324{
325 for (uint32_t queue_id = QDISC_MIN_QUEUE_ID + 1;
326 queue_id <= QDISC_MAX_QUEUE_ID;
327 queue_id++) {
328 if (!chassis_queueid_in_use(set, &chassis->header_.uuid, queue_id)) {
329 add_chassis_queue(set, &chassis->header_.uuid, queue_id);
330 return queue_id;
331 }
332 }
333
334 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
335 VLOG_WARN_RL(&rl, "all %s queue ids exhausted", chassis->name);
336 return 0;
337}
338
339static void
340free_chassis_queueid(struct hmap *set, struct sbrec_chassis *chassis,
341 uint32_t queue_id)
342{
343 struct ovn_chassis_qdisc_queues *node;
344 HMAP_FOR_EACH_WITH_HASH (node, key_node,
345 uuid_hash(&chassis->header_.uuid),
346 set) {
347 if (uuid_equals(&chassis->header_.uuid, &node->chassis_uuid)
348 && node->queue_id == queue_id) {
349 hmap_remove(set, &node->key_node);
350 break;
351 }
352 }
353}
354
355static inline bool
356port_has_qos_params(const struct smap *opts)
357{
358 return (smap_get(opts, "qos_max_rate") ||
359 smap_get(opts, "qos_burst"));
360}
361\f
9975d7be
BP
362/* The 'key' comes from nbs->header_.uuid or nbr->header_.uuid or
363 * sb->external_ids:logical-switch. */
5868eb24
BP
364struct ovn_datapath {
365 struct hmap_node key_node; /* Index on 'key'. */
9975d7be 366 struct uuid key; /* (nbs/nbr)->header_.uuid. */
4edcdcf4 367
9975d7be
BP
368 const struct nbrec_logical_switch *nbs; /* May be NULL. */
369 const struct nbrec_logical_router *nbr; /* May be NULL. */
5868eb24 370 const struct sbrec_datapath_binding *sb; /* May be NULL. */
4edcdcf4 371
5868eb24 372 struct ovs_list list; /* In list of similar records. */
4edcdcf4 373
9975d7be 374 /* Logical switch data. */
86e98048
BP
375 struct ovn_port **router_ports;
376 size_t n_router_ports;
9975d7be 377
5868eb24
BP
378 struct hmap port_tnlids;
379 uint32_t port_key_hint;
380
381 bool has_unknown;
8639f9be
ND
382
383 /* IPAM data. */
384 struct hmap ipam;
385};
386
387struct macam_node {
388 struct hmap_node hmap_node;
389 struct eth_addr mac_addr; /* Allocated MAC address. */
5868eb24
BP
390};
391
8639f9be
ND
392static void
393cleanup_macam(struct hmap *macam)
394{
395 struct macam_node *node;
396 HMAP_FOR_EACH_POP (node, hmap_node, macam) {
397 free(node);
398 }
399}
400
401struct ipam_node {
402 struct hmap_node hmap_node;
403 uint32_t ip_addr; /* Allocated IP address. */
404};
405
406static void
407destroy_ipam(struct hmap *ipam)
408{
409 struct ipam_node *node;
410 HMAP_FOR_EACH_POP (node, hmap_node, ipam) {
411 free(node);
412 }
413 hmap_destroy(ipam);
414}
415
5868eb24
BP
416static struct ovn_datapath *
417ovn_datapath_create(struct hmap *datapaths, const struct uuid *key,
9975d7be
BP
418 const struct nbrec_logical_switch *nbs,
419 const struct nbrec_logical_router *nbr,
5868eb24
BP
420 const struct sbrec_datapath_binding *sb)
421{
422 struct ovn_datapath *od = xzalloc(sizeof *od);
423 od->key = *key;
424 od->sb = sb;
9975d7be
BP
425 od->nbs = nbs;
426 od->nbr = nbr;
5868eb24 427 hmap_init(&od->port_tnlids);
8639f9be 428 hmap_init(&od->ipam);
5868eb24
BP
429 od->port_key_hint = 0;
430 hmap_insert(datapaths, &od->key_node, uuid_hash(&od->key));
431 return od;
432}
433
434static void
435ovn_datapath_destroy(struct hmap *datapaths, struct ovn_datapath *od)
436{
437 if (od) {
438 /* Don't remove od->list. It is used within build_datapaths() as a
439 * private list and once we've exited that function it is not safe to
440 * use it. */
441 hmap_remove(datapaths, &od->key_node);
442 destroy_tnlids(&od->port_tnlids);
8639f9be 443 destroy_ipam(&od->ipam);
86e98048 444 free(od->router_ports);
5868eb24
BP
445 free(od);
446 }
447}
448
9a9961d2
BP
449/* Returns 'od''s datapath type. */
450static enum ovn_datapath_type
451ovn_datapath_get_type(const struct ovn_datapath *od)
452{
453 return od->nbs ? DP_SWITCH : DP_ROUTER;
454}
455
5868eb24
BP
456static struct ovn_datapath *
457ovn_datapath_find(struct hmap *datapaths, const struct uuid *uuid)
458{
459 struct ovn_datapath *od;
460
461 HMAP_FOR_EACH_WITH_HASH (od, key_node, uuid_hash(uuid), datapaths) {
462 if (uuid_equals(uuid, &od->key)) {
463 return od;
464 }
465 }
466 return NULL;
467}
468
469static struct ovn_datapath *
470ovn_datapath_from_sbrec(struct hmap *datapaths,
471 const struct sbrec_datapath_binding *sb)
472{
473 struct uuid key;
474
9975d7be
BP
475 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
476 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
5868eb24
BP
477 return NULL;
478 }
479 return ovn_datapath_find(datapaths, &key);
480}
481
5412db30
J
482static bool
483lrouter_is_enabled(const struct nbrec_logical_router *lrouter)
484{
485 return !lrouter->enabled || *lrouter->enabled;
486}
487
5868eb24
BP
488static void
489join_datapaths(struct northd_context *ctx, struct hmap *datapaths,
490 struct ovs_list *sb_only, struct ovs_list *nb_only,
491 struct ovs_list *both)
492{
493 hmap_init(datapaths);
417e7e66
BW
494 ovs_list_init(sb_only);
495 ovs_list_init(nb_only);
496 ovs_list_init(both);
5868eb24
BP
497
498 const struct sbrec_datapath_binding *sb, *sb_next;
499 SBREC_DATAPATH_BINDING_FOR_EACH_SAFE (sb, sb_next, ctx->ovnsb_idl) {
500 struct uuid key;
9975d7be
BP
501 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
502 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
503 ovsdb_idl_txn_add_comment(
504 ctx->ovnsb_txn,
505 "deleting Datapath_Binding "UUID_FMT" that lacks "
506 "external-ids:logical-switch and "
507 "external-ids:logical-router",
508 UUID_ARGS(&sb->header_.uuid));
5868eb24
BP
509 sbrec_datapath_binding_delete(sb);
510 continue;
511 }
512
513 if (ovn_datapath_find(datapaths, &key)) {
514 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
9975d7be
BP
515 VLOG_INFO_RL(
516 &rl, "deleting Datapath_Binding "UUID_FMT" with "
517 "duplicate external-ids:logical-switch/router "UUID_FMT,
518 UUID_ARGS(&sb->header_.uuid), UUID_ARGS(&key));
5868eb24
BP
519 sbrec_datapath_binding_delete(sb);
520 continue;
521 }
522
523 struct ovn_datapath *od = ovn_datapath_create(datapaths, &key,
9975d7be 524 NULL, NULL, sb);
417e7e66 525 ovs_list_push_back(sb_only, &od->list);
5868eb24
BP
526 }
527
9975d7be
BP
528 const struct nbrec_logical_switch *nbs;
529 NBREC_LOGICAL_SWITCH_FOR_EACH (nbs, ctx->ovnnb_idl) {
5868eb24 530 struct ovn_datapath *od = ovn_datapath_find(datapaths,
9975d7be 531 &nbs->header_.uuid);
5868eb24 532 if (od) {
9975d7be 533 od->nbs = nbs;
417e7e66
BW
534 ovs_list_remove(&od->list);
535 ovs_list_push_back(both, &od->list);
5868eb24 536 } else {
9975d7be
BP
537 od = ovn_datapath_create(datapaths, &nbs->header_.uuid,
538 nbs, NULL, NULL);
417e7e66 539 ovs_list_push_back(nb_only, &od->list);
5868eb24
BP
540 }
541 }
9975d7be
BP
542
543 const struct nbrec_logical_router *nbr;
544 NBREC_LOGICAL_ROUTER_FOR_EACH (nbr, ctx->ovnnb_idl) {
5412db30
J
545 if (!lrouter_is_enabled(nbr)) {
546 continue;
547 }
548
9975d7be
BP
549 struct ovn_datapath *od = ovn_datapath_find(datapaths,
550 &nbr->header_.uuid);
551 if (od) {
552 if (!od->nbs) {
553 od->nbr = nbr;
417e7e66
BW
554 ovs_list_remove(&od->list);
555 ovs_list_push_back(both, &od->list);
9975d7be
BP
556 } else {
557 /* Can't happen! */
558 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
559 VLOG_WARN_RL(&rl,
560 "duplicate UUID "UUID_FMT" in OVN_Northbound",
561 UUID_ARGS(&nbr->header_.uuid));
562 continue;
563 }
564 } else {
565 od = ovn_datapath_create(datapaths, &nbr->header_.uuid,
566 NULL, nbr, NULL);
417e7e66 567 ovs_list_push_back(nb_only, &od->list);
9975d7be 568 }
9975d7be 569 }
5868eb24
BP
570}
571
572static uint32_t
573ovn_datapath_allocate_key(struct hmap *dp_tnlids)
574{
575 static uint32_t hint;
576 return allocate_tnlid(dp_tnlids, "datapath", (1u << 24) - 1, &hint);
577}
578
0bac7164
BP
579/* Updates the southbound Datapath_Binding table so that it contains the
580 * logical switches and routers specified by the northbound database.
581 *
582 * Initializes 'datapaths' to contain a "struct ovn_datapath" for every logical
583 * switch and router. */
5868eb24
BP
584static void
585build_datapaths(struct northd_context *ctx, struct hmap *datapaths)
586{
587 struct ovs_list sb_only, nb_only, both;
588
589 join_datapaths(ctx, datapaths, &sb_only, &nb_only, &both);
590
417e7e66 591 if (!ovs_list_is_empty(&nb_only)) {
5868eb24
BP
592 /* First index the in-use datapath tunnel IDs. */
593 struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
594 struct ovn_datapath *od;
595 LIST_FOR_EACH (od, list, &both) {
596 add_tnlid(&dp_tnlids, od->sb->tunnel_key);
597 }
598
599 /* Add southbound record for each unmatched northbound record. */
600 LIST_FOR_EACH (od, list, &nb_only) {
601 uint16_t tunnel_key = ovn_datapath_allocate_key(&dp_tnlids);
602 if (!tunnel_key) {
603 break;
604 }
605
606 od->sb = sbrec_datapath_binding_insert(ctx->ovnsb_txn);
607
0f8e9c12
BP
608 /* Get the logical-switch or logical-router UUID to set in
609 * external-ids. */
5868eb24 610 char uuid_s[UUID_LEN + 1];
9975d7be
BP
611 sprintf(uuid_s, UUID_FMT, UUID_ARGS(&od->key));
612 const char *key = od->nbs ? "logical-switch" : "logical-router";
0f8e9c12
BP
613
614 /* Get name to set in external-ids. */
615 const char *name = od->nbs ? od->nbs->name : od->nbr->name;
616
617 /* Set external-ids. */
618 struct smap ids = SMAP_INITIALIZER(&ids);
619 smap_add(&ids, key, uuid_s);
620 if (*name) {
621 smap_add(&ids, "name", name);
622 }
623 sbrec_datapath_binding_set_external_ids(od->sb, &ids);
624 smap_destroy(&ids);
5868eb24
BP
625
626 sbrec_datapath_binding_set_tunnel_key(od->sb, tunnel_key);
627 }
628 destroy_tnlids(&dp_tnlids);
629 }
630
631 /* Delete southbound records without northbound matches. */
632 struct ovn_datapath *od, *next;
633 LIST_FOR_EACH_SAFE (od, next, list, &sb_only) {
417e7e66 634 ovs_list_remove(&od->list);
5868eb24
BP
635 sbrec_datapath_binding_delete(od->sb);
636 ovn_datapath_destroy(datapaths, od);
637 }
638}
639\f
640struct ovn_port {
641 struct hmap_node key_node; /* Index on 'key'. */
9975d7be
BP
642 char *key; /* nbs->name, nbr->name, sb->logical_port. */
643 char *json_key; /* 'key', quoted for use in JSON. */
5868eb24 644
9975d7be
BP
645 const struct sbrec_port_binding *sb; /* May be NULL. */
646
e93b43d6 647 /* Logical switch port data. */
0ee00741 648 const struct nbrec_logical_switch_port *nbsp; /* May be NULL. */
e93b43d6
JP
649
650 struct lport_addresses *lsp_addrs; /* Logical switch port addresses. */
651 unsigned int n_lsp_addrs;
652
653 struct lport_addresses *ps_addrs; /* Port security addresses. */
654 unsigned int n_ps_addrs;
655
9975d7be 656 /* Logical router port data. */
0ee00741 657 const struct nbrec_logical_router_port *nbrp; /* May be NULL. */
e93b43d6 658
4685e523 659 struct lport_addresses lrp_networks;
c9bdf7bd 660
ad386c3f
BP
661 /* The port's peer:
662 *
663 * - A switch port S of type "router" has a router port R as a peer,
664 * and R in turn has S has its peer.
665 *
666 * - Two connected logical router ports have each other as peer. */
9975d7be 667 struct ovn_port *peer;
5868eb24
BP
668
669 struct ovn_datapath *od;
670
671 struct ovs_list list; /* In list of similar records. */
672};
673
674static struct ovn_port *
675ovn_port_create(struct hmap *ports, const char *key,
0ee00741
HK
676 const struct nbrec_logical_switch_port *nbsp,
677 const struct nbrec_logical_router_port *nbrp,
5868eb24
BP
678 const struct sbrec_port_binding *sb)
679{
680 struct ovn_port *op = xzalloc(sizeof *op);
9975d7be
BP
681
682 struct ds json_key = DS_EMPTY_INITIALIZER;
683 json_string_escape(key, &json_key);
684 op->json_key = ds_steal_cstr(&json_key);
685
686 op->key = xstrdup(key);
5868eb24 687 op->sb = sb;
0ee00741
HK
688 op->nbsp = nbsp;
689 op->nbrp = nbrp;
5868eb24
BP
690 hmap_insert(ports, &op->key_node, hash_string(op->key, 0));
691 return op;
692}
693
694static void
695ovn_port_destroy(struct hmap *ports, struct ovn_port *port)
696{
697 if (port) {
698 /* Don't remove port->list. It is used within build_ports() as a
699 * private list and once we've exited that function it is not safe to
700 * use it. */
701 hmap_remove(ports, &port->key_node);
e93b43d6
JP
702
703 for (int i = 0; i < port->n_lsp_addrs; i++) {
704 destroy_lport_addresses(&port->lsp_addrs[i]);
705 }
706 free(port->lsp_addrs);
707
708 for (int i = 0; i < port->n_ps_addrs; i++) {
709 destroy_lport_addresses(&port->ps_addrs[i]);
710 }
711 free(port->ps_addrs);
712
4685e523 713 destroy_lport_addresses(&port->lrp_networks);
9975d7be
BP
714 free(port->json_key);
715 free(port->key);
5868eb24
BP
716 free(port);
717 }
718}
719
720static struct ovn_port *
721ovn_port_find(struct hmap *ports, const char *name)
722{
723 struct ovn_port *op;
724
725 HMAP_FOR_EACH_WITH_HASH (op, key_node, hash_string(name, 0), ports) {
726 if (!strcmp(op->key, name)) {
727 return op;
728 }
729 }
730 return NULL;
731}
732
733static uint32_t
734ovn_port_allocate_key(struct ovn_datapath *od)
735{
736 return allocate_tnlid(&od->port_tnlids, "port",
737 (1u << 15) - 1, &od->port_key_hint);
738}
739
8639f9be
ND
740static bool
741ipam_is_duplicate_mac(struct eth_addr *ea, uint64_t mac64, bool warn)
742{
743 struct macam_node *macam_node;
744 HMAP_FOR_EACH_WITH_HASH (macam_node, hmap_node, hash_uint64(mac64),
745 &macam) {
746 if (eth_addr_equals(*ea, macam_node->mac_addr)) {
747 if (warn) {
748 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
749 VLOG_WARN_RL(&rl, "Duplicate MAC set: "ETH_ADDR_FMT,
750 ETH_ADDR_ARGS(macam_node->mac_addr));
751 }
752 return true;
753 }
754 }
755 return false;
756}
757
758static bool
759ipam_is_duplicate_ip(struct ovn_datapath *od, uint32_t ip, bool warn)
760{
761 struct ipam_node *ipam_node;
762 HMAP_FOR_EACH_WITH_HASH (ipam_node, hmap_node, hash_int(ip, 0),
763 &od->ipam) {
764 if (ipam_node->ip_addr == ip) {
765 if (warn) {
766 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
767 VLOG_WARN_RL(&rl, "Duplicate IP set: "IP_FMT,
768 IP_ARGS(htonl(ip)));
769 }
770 return true;
771 }
772 }
773 return false;
774}
775
776static void
777ipam_insert_mac(struct eth_addr *ea, bool check)
778{
779 if (!ea) {
780 return;
781 }
782
783 uint64_t mac64 = eth_addr_to_uint64(*ea);
784 /* If the new MAC was not assigned by this address management system or
785 * check is true and the new MAC is a duplicate, do not insert it into the
786 * macam hmap. */
787 if (((mac64 ^ MAC_ADDR_PREFIX) >> 24)
788 || (check && ipam_is_duplicate_mac(ea, mac64, true))) {
789 return;
790 }
791
792 struct macam_node *new_macam_node = xmalloc(sizeof *new_macam_node);
793 new_macam_node->mac_addr = *ea;
794 hmap_insert(&macam, &new_macam_node->hmap_node, hash_uint64(mac64));
795}
796
797static void
798ipam_insert_ip(struct ovn_datapath *od, uint32_t ip, bool check)
799{
800 if (!od) {
801 return;
802 }
803
804 if (check && ipam_is_duplicate_ip(od, ip, true)) {
805 return;
806 }
807
808 struct ipam_node *new_ipam_node = xmalloc(sizeof *new_ipam_node);
809 new_ipam_node->ip_addr = ip;
810 hmap_insert(&od->ipam, &new_ipam_node->hmap_node, hash_int(ip, 0));
811}
812
813static void
814ipam_insert_lsp_addresses(struct ovn_datapath *od, struct ovn_port *op,
815 char *address)
816{
817 if (!od || !op || !address || !strcmp(address, "unknown")
6374d518 818 || is_dynamic_lsp_address(address)) {
8639f9be
ND
819 return;
820 }
821
822 struct lport_addresses laddrs;
823 if (!extract_lsp_addresses(address, &laddrs)) {
824 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
825 VLOG_WARN_RL(&rl, "Extract addresses failed.");
826 return;
827 }
828 ipam_insert_mac(&laddrs.ea, true);
829
830 /* IP is only added to IPAM if the switch's subnet option
831 * is set, whereas MAC is always added to MACAM. */
832 if (!smap_get(&od->nbs->other_config, "subnet")) {
833 destroy_lport_addresses(&laddrs);
834 return;
835 }
836
837 for (size_t j = 0; j < laddrs.n_ipv4_addrs; j++) {
838 uint32_t ip = ntohl(laddrs.ipv4_addrs[j].addr);
839 ipam_insert_ip(od, ip, true);
840 }
841
842 destroy_lport_addresses(&laddrs);
843}
844
845static void
846ipam_add_port_addresses(struct ovn_datapath *od, struct ovn_port *op)
847{
848 if (!od || !op) {
849 return;
850 }
851
852 if (op->nbsp) {
853 /* Add all the port's addresses to address data structures. */
854 for (size_t i = 0; i < op->nbsp->n_addresses; i++) {
855 ipam_insert_lsp_addresses(od, op, op->nbsp->addresses[i]);
856 }
857 if (op->nbsp->dynamic_addresses) {
858 ipam_insert_lsp_addresses(od, op, op->nbsp->dynamic_addresses);
859 }
860 } else if (op->nbrp) {
861 struct lport_addresses lrp_networks;
862 if (!extract_lrp_networks(op->nbrp, &lrp_networks)) {
863 static struct vlog_rate_limit rl
864 = VLOG_RATE_LIMIT_INIT(1, 1);
865 VLOG_WARN_RL(&rl, "Extract addresses failed.");
866 return;
867 }
868 ipam_insert_mac(&lrp_networks.ea, true);
869
870 if (!op->peer || !op->peer->nbsp || !op->peer->od || !op->peer->od->nbs
871 || !smap_get(&op->peer->od->nbs->other_config, "subnet")) {
872 destroy_lport_addresses(&lrp_networks);
873 return;
874 }
875
876 for (size_t i = 0; i < lrp_networks.n_ipv4_addrs; i++) {
877 uint32_t ip = ntohl(lrp_networks.ipv4_addrs[i].addr);
878 ipam_insert_ip(op->peer->od, ip, true);
879 }
880
881 destroy_lport_addresses(&lrp_networks);
882 }
883}
884
885static uint64_t
886ipam_get_unused_mac(void)
887{
888 /* Stores the suffix of the most recently ipam-allocated MAC address. */
889 static uint32_t last_mac;
890
891 uint64_t mac64;
892 struct eth_addr mac;
893 uint32_t mac_addr_suffix, i;
894 for (i = 0; i < MAC_ADDR_SPACE - 1; i++) {
895 /* The tentative MAC's suffix will be in the interval (1, 0xfffffe). */
896 mac_addr_suffix = ((last_mac + i) % (MAC_ADDR_SPACE - 1)) + 1;
897 mac64 = MAC_ADDR_PREFIX | mac_addr_suffix;
898 eth_addr_from_uint64(mac64, &mac);
899 if (!ipam_is_duplicate_mac(&mac, mac64, false)) {
900 last_mac = mac_addr_suffix;
901 break;
902 }
903 }
904
905 if (i == MAC_ADDR_SPACE) {
906 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
907 VLOG_WARN_RL(&rl, "MAC address space exhausted.");
908 mac64 = 0;
909 }
910
911 return mac64;
912}
913
914static uint32_t
915ipam_get_unused_ip(struct ovn_datapath *od, uint32_t subnet, uint32_t mask)
916{
917 if (!od) {
918 return 0;
919 }
920
921 uint32_t ip = 0;
922
923 /* Find an unused IP address in subnet. x.x.x.1 is reserved for a
924 * logical router port. */
925 for (uint32_t i = 2; i < ~mask; i++) {
926 uint32_t tentative_ip = subnet + i;
927 if (!ipam_is_duplicate_ip(od, tentative_ip, false)) {
928 ip = tentative_ip;
929 break;
930 }
931 }
932
933 if (!ip) {
934 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
935 VLOG_WARN_RL( &rl, "Subnet address space has been exhausted.");
936 }
937
938 return ip;
939}
940
941static bool
942ipam_allocate_addresses(struct ovn_datapath *od, struct ovn_port *op,
6374d518 943 const char *addrspec, ovs_be32 subnet, ovs_be32 mask)
8639f9be
ND
944{
945 if (!od || !op || !op->nbsp) {
946 return false;
947 }
948
949 uint32_t ip = ipam_get_unused_ip(od, ntohl(subnet), ntohl(mask));
950 if (!ip) {
951 return false;
952 }
953
954 struct eth_addr mac;
6374d518
LR
955 bool check_mac;
956 int n = 0;
957
958 if (ovs_scan(addrspec, ETH_ADDR_SCAN_FMT" dynamic%n",
959 ETH_ADDR_SCAN_ARGS(mac), &n)
960 && addrspec[n] == '\0') {
961 check_mac = true;
962 } else {
963 uint64_t mac64 = ipam_get_unused_mac();
964 if (!mac64) {
965 return false;
966 }
967 eth_addr_from_uint64(mac64, &mac);
968 check_mac = false;
8639f9be 969 }
8639f9be
ND
970
971 /* Add MAC/IP to MACAM/IPAM hmaps if both addresses were allocated
972 * successfully. */
973 ipam_insert_ip(od, ip, false);
6374d518 974 ipam_insert_mac(&mac, check_mac);
8639f9be
ND
975
976 char *new_addr = xasprintf(ETH_ADDR_FMT" "IP_FMT,
977 ETH_ADDR_ARGS(mac), IP_ARGS(htonl(ip)));
978 nbrec_logical_switch_port_set_dynamic_addresses(op->nbsp, new_addr);
979 free(new_addr);
980
981 return true;
982}
983
984static void
b511690b 985build_ipam(struct hmap *datapaths, struct hmap *ports)
8639f9be
ND
986{
987 /* IPAM generally stands for IP address management. In non-virtualized
988 * world, MAC addresses come with the hardware. But, with virtualized
989 * workloads, they need to be assigned and managed. This function
990 * does both IP address management (ipam) and MAC address management
991 * (macam). */
992
8639f9be
ND
993 /* If the switch's other_config:subnet is set, allocate new addresses for
994 * ports that have the "dynamic" keyword in their addresses column. */
995 struct ovn_datapath *od;
996 HMAP_FOR_EACH (od, key_node, datapaths) {
997 if (od->nbs) {
998 const char *subnet_str = smap_get(&od->nbs->other_config,
999 "subnet");
1000 if (!subnet_str) {
1001 continue;
1002 }
1003
1004 ovs_be32 subnet, mask;
1005 char *error = ip_parse_masked(subnet_str, &subnet, &mask);
1006 if (error || mask == OVS_BE32_MAX || !ip_is_cidr(mask)) {
1007 static struct vlog_rate_limit rl
1008 = VLOG_RATE_LIMIT_INIT(5, 1);
1009 VLOG_WARN_RL(&rl, "bad 'subnet' %s", subnet_str);
1010 free(error);
1011 continue;
1012 }
1013
1014 struct ovn_port *op;
1015 for (size_t i = 0; i < od->nbs->n_ports; i++) {
1016 const struct nbrec_logical_switch_port *nbsp =
1017 od->nbs->ports[i];
1018
1019 if (!nbsp) {
1020 continue;
1021 }
1022
1023 op = ovn_port_find(ports, nbsp->name);
1024 if (!op || (op->nbsp && op->peer)) {
1025 /* Do not allocate addresses for logical switch ports that
1026 * have a peer. */
1027 continue;
1028 }
1029
1030 for (size_t j = 0; j < nbsp->n_addresses; j++) {
6374d518 1031 if (is_dynamic_lsp_address(nbsp->addresses[j])
8639f9be 1032 && !nbsp->dynamic_addresses) {
6374d518
LR
1033 if (!ipam_allocate_addresses(od, op,
1034 nbsp->addresses[j], subnet, mask)
8639f9be
ND
1035 || !extract_lsp_addresses(nbsp->dynamic_addresses,
1036 &op->lsp_addrs[op->n_lsp_addrs])) {
1037 static struct vlog_rate_limit rl
1038 = VLOG_RATE_LIMIT_INIT(1, 1);
1039 VLOG_INFO_RL(&rl, "Failed to allocate address.");
1040 } else {
1041 op->n_lsp_addrs++;
1042 }
1043 break;
1044 }
1045 }
1046 }
1047 }
1048 }
1049}
1050\f
b511690b
GS
1051/* Tag allocation for nested containers.
1052 *
1053 * For a logical switch port with 'parent_name' and a request to allocate tags,
1054 * keeps a track of all allocated tags. */
1055struct tag_alloc_node {
1056 struct hmap_node hmap_node;
1057 char *parent_name;
1058 unsigned long *allocated_tags; /* A bitmap to track allocated tags. */
1059};
1060
1061static void
1062tag_alloc_destroy(struct hmap *tag_alloc_table)
1063{
1064 struct tag_alloc_node *node;
1065 HMAP_FOR_EACH_POP (node, hmap_node, tag_alloc_table) {
1066 bitmap_free(node->allocated_tags);
1067 free(node->parent_name);
1068 free(node);
1069 }
1070 hmap_destroy(tag_alloc_table);
1071}
1072
1073static struct tag_alloc_node *
1074tag_alloc_get_node(struct hmap *tag_alloc_table, const char *parent_name)
1075{
1076 /* If a node for the 'parent_name' exists, return it. */
1077 struct tag_alloc_node *tag_alloc_node;
1078 HMAP_FOR_EACH_WITH_HASH (tag_alloc_node, hmap_node,
1079 hash_string(parent_name, 0),
1080 tag_alloc_table) {
1081 if (!strcmp(tag_alloc_node->parent_name, parent_name)) {
1082 return tag_alloc_node;
1083 }
1084 }
1085
1086 /* Create a new node. */
1087 tag_alloc_node = xmalloc(sizeof *tag_alloc_node);
1088 tag_alloc_node->parent_name = xstrdup(parent_name);
1089 tag_alloc_node->allocated_tags = bitmap_allocate(MAX_OVN_TAGS);
1090 /* Tag 0 is invalid for nested containers. */
1091 bitmap_set1(tag_alloc_node->allocated_tags, 0);
1092 hmap_insert(tag_alloc_table, &tag_alloc_node->hmap_node,
1093 hash_string(parent_name, 0));
1094
1095 return tag_alloc_node;
1096}
1097
1098static void
1099tag_alloc_add_existing_tags(struct hmap *tag_alloc_table,
1100 const struct nbrec_logical_switch_port *nbsp)
1101{
1102 /* Add the tags of already existing nested containers. If there is no
1103 * 'nbsp->parent_name' or no 'nbsp->tag' set, there is nothing to do. */
1104 if (!nbsp->parent_name || !nbsp->parent_name[0] || !nbsp->tag) {
1105 return;
1106 }
1107
1108 struct tag_alloc_node *tag_alloc_node;
1109 tag_alloc_node = tag_alloc_get_node(tag_alloc_table, nbsp->parent_name);
1110 bitmap_set1(tag_alloc_node->allocated_tags, *nbsp->tag);
1111}
1112
1113static void
1114tag_alloc_create_new_tag(struct hmap *tag_alloc_table,
1115 const struct nbrec_logical_switch_port *nbsp)
1116{
1117 if (!nbsp->tag_request) {
1118 return;
1119 }
1120
1121 if (nbsp->parent_name && nbsp->parent_name[0]
1122 && *nbsp->tag_request == 0) {
1123 /* For nested containers that need allocation, do the allocation. */
1124
1125 if (nbsp->tag) {
1126 /* This has already been allocated. */
1127 return;
1128 }
1129
1130 struct tag_alloc_node *tag_alloc_node;
1131 int64_t tag;
1132 tag_alloc_node = tag_alloc_get_node(tag_alloc_table,
1133 nbsp->parent_name);
1134 tag = bitmap_scan(tag_alloc_node->allocated_tags, 0, 1, MAX_OVN_TAGS);
1135 if (tag == MAX_OVN_TAGS) {
1136 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1137 VLOG_ERR_RL(&rl, "out of vlans for logical switch ports with "
1138 "parent %s", nbsp->parent_name);
1139 return;
1140 }
1141 bitmap_set1(tag_alloc_node->allocated_tags, tag);
1142 nbrec_logical_switch_port_set_tag(nbsp, &tag, 1);
1143 } else if (*nbsp->tag_request != 0) {
1144 /* For everything else, copy the contents of 'tag_request' to 'tag'. */
1145 nbrec_logical_switch_port_set_tag(nbsp, nbsp->tag_request, 1);
1146 }
1147}
1148\f
8639f9be 1149
5868eb24
BP
1150static void
1151join_logical_ports(struct northd_context *ctx,
1152 struct hmap *datapaths, struct hmap *ports,
a6095f81 1153 struct hmap *chassis_qdisc_queues,
b511690b
GS
1154 struct hmap *tag_alloc_table, struct ovs_list *sb_only,
1155 struct ovs_list *nb_only, struct ovs_list *both)
5868eb24
BP
1156{
1157 hmap_init(ports);
417e7e66
BW
1158 ovs_list_init(sb_only);
1159 ovs_list_init(nb_only);
1160 ovs_list_init(both);
5868eb24
BP
1161
1162 const struct sbrec_port_binding *sb;
1163 SBREC_PORT_BINDING_FOR_EACH (sb, ctx->ovnsb_idl) {
1164 struct ovn_port *op = ovn_port_create(ports, sb->logical_port,
9975d7be 1165 NULL, NULL, sb);
417e7e66 1166 ovs_list_push_back(sb_only, &op->list);
5868eb24
BP
1167 }
1168
1169 struct ovn_datapath *od;
1170 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
1171 if (od->nbs) {
1172 for (size_t i = 0; i < od->nbs->n_ports; i++) {
0ee00741
HK
1173 const struct nbrec_logical_switch_port *nbsp
1174 = od->nbs->ports[i];
1175 struct ovn_port *op = ovn_port_find(ports, nbsp->name);
9975d7be 1176 if (op) {
0ee00741 1177 if (op->nbsp || op->nbrp) {
9975d7be
BP
1178 static struct vlog_rate_limit rl
1179 = VLOG_RATE_LIMIT_INIT(5, 1);
1180 VLOG_WARN_RL(&rl, "duplicate logical port %s",
0ee00741 1181 nbsp->name);
9975d7be
BP
1182 continue;
1183 }
0ee00741 1184 op->nbsp = nbsp;
417e7e66 1185 ovs_list_remove(&op->list);
a6095f81
BS
1186
1187 uint32_t queue_id = smap_get_int(&op->sb->options,
1188 "qdisc_queue_id", 0);
1189 if (queue_id && op->sb->chassis) {
1190 add_chassis_queue(
1191 chassis_qdisc_queues, &op->sb->chassis->header_.uuid,
1192 queue_id);
1193 }
1194
417e7e66 1195 ovs_list_push_back(both, &op->list);
e93b43d6
JP
1196
1197 /* This port exists due to a SB binding, but should
1198 * not have been initialized fully. */
1199 ovs_assert(!op->n_lsp_addrs && !op->n_ps_addrs);
9975d7be 1200 } else {
0ee00741 1201 op = ovn_port_create(ports, nbsp->name, nbsp, NULL, NULL);
417e7e66 1202 ovs_list_push_back(nb_only, &op->list);
9975d7be
BP
1203 }
1204
e93b43d6 1205 op->lsp_addrs
0ee00741
HK
1206 = xmalloc(sizeof *op->lsp_addrs * nbsp->n_addresses);
1207 for (size_t j = 0; j < nbsp->n_addresses; j++) {
1208 if (!strcmp(nbsp->addresses[j], "unknown")) {
e93b43d6
JP
1209 continue;
1210 }
6374d518 1211 if (is_dynamic_lsp_address(nbsp->addresses[j])) {
8639f9be
ND
1212 if (nbsp->dynamic_addresses) {
1213 if (!extract_lsp_addresses(nbsp->dynamic_addresses,
1214 &op->lsp_addrs[op->n_lsp_addrs])) {
1215 static struct vlog_rate_limit rl
1216 = VLOG_RATE_LIMIT_INIT(1, 1);
1217 VLOG_INFO_RL(&rl, "invalid syntax '%s' in "
1218 "logical switch port "
1219 "dynamic_addresses. No "
1220 "MAC address found",
1221 op->nbsp->dynamic_addresses);
1222 continue;
1223 }
1224 } else {
1225 continue;
1226 }
1227 } else if (!extract_lsp_addresses(nbsp->addresses[j],
e93b43d6
JP
1228 &op->lsp_addrs[op->n_lsp_addrs])) {
1229 static struct vlog_rate_limit rl
1230 = VLOG_RATE_LIMIT_INIT(1, 1);
1231 VLOG_INFO_RL(&rl, "invalid syntax '%s' in logical "
1232 "switch port addresses. No MAC "
1233 "address found",
0ee00741 1234 op->nbsp->addresses[j]);
e93b43d6
JP
1235 continue;
1236 }
1237 op->n_lsp_addrs++;
1238 }
1239
1240 op->ps_addrs
0ee00741
HK
1241 = xmalloc(sizeof *op->ps_addrs * nbsp->n_port_security);
1242 for (size_t j = 0; j < nbsp->n_port_security; j++) {
1243 if (!extract_lsp_addresses(nbsp->port_security[j],
e93b43d6
JP
1244 &op->ps_addrs[op->n_ps_addrs])) {
1245 static struct vlog_rate_limit rl
1246 = VLOG_RATE_LIMIT_INIT(1, 1);
1247 VLOG_INFO_RL(&rl, "invalid syntax '%s' in port "
1248 "security. No MAC address found",
0ee00741 1249 op->nbsp->port_security[j]);
e93b43d6
JP
1250 continue;
1251 }
1252 op->n_ps_addrs++;
1253 }
1254
9975d7be 1255 op->od = od;
8639f9be 1256 ipam_add_port_addresses(od, op);
b511690b 1257 tag_alloc_add_existing_tags(tag_alloc_table, nbsp);
9975d7be
BP
1258 }
1259 } else {
1260 for (size_t i = 0; i < od->nbr->n_ports; i++) {
0ee00741
HK
1261 const struct nbrec_logical_router_port *nbrp
1262 = od->nbr->ports[i];
9975d7be 1263
4685e523 1264 struct lport_addresses lrp_networks;
0ee00741 1265 if (!extract_lrp_networks(nbrp, &lrp_networks)) {
9975d7be
BP
1266 static struct vlog_rate_limit rl
1267 = VLOG_RATE_LIMIT_INIT(5, 1);
0ee00741 1268 VLOG_WARN_RL(&rl, "bad 'mac' %s", nbrp->mac);
9975d7be
BP
1269 continue;
1270 }
1271
4685e523 1272 if (!lrp_networks.n_ipv4_addrs && !lrp_networks.n_ipv6_addrs) {
9975d7be
BP
1273 continue;
1274 }
1275
0ee00741 1276 struct ovn_port *op = ovn_port_find(ports, nbrp->name);
9975d7be 1277 if (op) {
0ee00741 1278 if (op->nbsp || op->nbrp) {
9975d7be
BP
1279 static struct vlog_rate_limit rl
1280 = VLOG_RATE_LIMIT_INIT(5, 1);
1281 VLOG_WARN_RL(&rl, "duplicate logical router port %s",
0ee00741 1282 nbrp->name);
9975d7be
BP
1283 continue;
1284 }
0ee00741 1285 op->nbrp = nbrp;
417e7e66
BW
1286 ovs_list_remove(&op->list);
1287 ovs_list_push_back(both, &op->list);
4685e523
JP
1288
1289 /* This port exists but should not have been
1290 * initialized fully. */
1291 ovs_assert(!op->lrp_networks.n_ipv4_addrs
1292 && !op->lrp_networks.n_ipv6_addrs);
9975d7be 1293 } else {
0ee00741 1294 op = ovn_port_create(ports, nbrp->name, NULL, nbrp, NULL);
417e7e66 1295 ovs_list_push_back(nb_only, &op->list);
9975d7be
BP
1296 }
1297
4685e523 1298 op->lrp_networks = lrp_networks;
9975d7be 1299 op->od = od;
8639f9be 1300 ipam_add_port_addresses(op->od, op);
5868eb24 1301 }
9975d7be
BP
1302 }
1303 }
1304
1305 /* Connect logical router ports, and logical switch ports of type "router",
1306 * to their peers. */
1307 struct ovn_port *op;
1308 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741
HK
1309 if (op->nbsp && !strcmp(op->nbsp->type, "router")) {
1310 const char *peer_name = smap_get(&op->nbsp->options, "router-port");
9975d7be
BP
1311 if (!peer_name) {
1312 continue;
1313 }
1314
1315 struct ovn_port *peer = ovn_port_find(ports, peer_name);
0ee00741 1316 if (!peer || !peer->nbrp) {
9975d7be
BP
1317 continue;
1318 }
1319
1320 peer->peer = op;
1321 op->peer = peer;
86e98048
BP
1322 op->od->router_ports = xrealloc(
1323 op->od->router_ports,
1324 sizeof *op->od->router_ports * (op->od->n_router_ports + 1));
1325 op->od->router_ports[op->od->n_router_ports++] = op;
0ee00741 1326 } else if (op->nbrp && op->nbrp->peer) {
ad386c3f
BP
1327 struct ovn_port *peer = ovn_port_find(ports, op->nbrp->peer);
1328 if (peer) {
1329 if (peer->nbrp) {
1330 op->peer = peer;
60fa6dbb 1331 } else if (peer->nbsp) {
ad386c3f
BP
1332 /* An ovn_port for a switch port of type "router" does have
1333 * a router port as its peer (see the case above for
1334 * "router" ports), but this is set via options:router-port
1335 * in Logical_Switch_Port and does not involve the
1336 * Logical_Router_Port's 'peer' column. */
1337 static struct vlog_rate_limit rl =
1338 VLOG_RATE_LIMIT_INIT(5, 1);
1339 VLOG_WARN_RL(&rl, "Bad configuration: The peer of router "
1340 "port %s is a switch port", op->key);
1341 }
1342 }
5868eb24
BP
1343 }
1344 }
1345}
1346
1347static void
a6095f81
BS
1348ovn_port_update_sbrec(const struct ovn_port *op,
1349 struct hmap *chassis_qdisc_queues)
5868eb24
BP
1350{
1351 sbrec_port_binding_set_datapath(op->sb, op->od->sb);
0ee00741 1352 if (op->nbrp) {
c1645003 1353 /* If the router is for l3 gateway, it resides on a chassis
17bac0ff 1354 * and its port type is "l3gateway". */
c1645003
GS
1355 const char *chassis = smap_get(&op->od->nbr->options, "chassis");
1356 if (chassis) {
17bac0ff 1357 sbrec_port_binding_set_type(op->sb, "l3gateway");
c1645003
GS
1358 } else {
1359 sbrec_port_binding_set_type(op->sb, "patch");
1360 }
9975d7be
BP
1361
1362 const char *peer = op->peer ? op->peer->key : "<error>";
c1645003
GS
1363 struct smap new;
1364 smap_init(&new);
1365 smap_add(&new, "peer", peer);
1366 if (chassis) {
17bac0ff 1367 smap_add(&new, "l3gateway-chassis", chassis);
c1645003
GS
1368 }
1369 sbrec_port_binding_set_options(op->sb, &new);
1370 smap_destroy(&new);
9975d7be
BP
1371
1372 sbrec_port_binding_set_parent_port(op->sb, NULL);
1373 sbrec_port_binding_set_tag(op->sb, NULL, 0);
1374 sbrec_port_binding_set_mac(op->sb, NULL, 0);
1375 } else {
0ee00741 1376 if (strcmp(op->nbsp->type, "router")) {
a6095f81
BS
1377 uint32_t queue_id = smap_get_int(
1378 &op->sb->options, "qdisc_queue_id", 0);
1379 bool has_qos = port_has_qos_params(&op->nbsp->options);
1380 struct smap options;
1381
1382 if (op->sb->chassis && has_qos && !queue_id) {
1383 queue_id = allocate_chassis_queueid(chassis_qdisc_queues,
1384 op->sb->chassis);
1385 } else if (!has_qos && queue_id) {
1386 free_chassis_queueid(chassis_qdisc_queues,
1387 op->sb->chassis,
1388 queue_id);
1389 queue_id = 0;
1390 }
1391
1392 smap_clone(&options, &op->nbsp->options);
1393 if (queue_id) {
1394 smap_add_format(&options,
1395 "qdisc_queue_id", "%d", queue_id);
1396 }
1397 sbrec_port_binding_set_options(op->sb, &options);
1398 smap_destroy(&options);
0ee00741 1399 sbrec_port_binding_set_type(op->sb, op->nbsp->type);
9975d7be 1400 } else {
c1645003
GS
1401 const char *chassis = NULL;
1402 if (op->peer && op->peer->od && op->peer->od->nbr) {
1403 chassis = smap_get(&op->peer->od->nbr->options, "chassis");
1404 }
1405
1406 /* A switch port connected to a gateway router is also of
17bac0ff 1407 * type "l3gateway". */
c1645003 1408 if (chassis) {
17bac0ff 1409 sbrec_port_binding_set_type(op->sb, "l3gateway");
c1645003
GS
1410 } else {
1411 sbrec_port_binding_set_type(op->sb, "patch");
1412 }
9975d7be 1413
f99f67bd
BP
1414 const char *router_port = smap_get_def(&op->nbsp->options,
1415 "router-port", "<error>");
c1645003
GS
1416 struct smap new;
1417 smap_init(&new);
1418 smap_add(&new, "peer", router_port);
1419 if (chassis) {
17bac0ff 1420 smap_add(&new, "l3gateway-chassis", chassis);
c1645003 1421 }
8439c2eb
CSV
1422
1423 const char *nat_addresses = smap_get(&op->nbsp->options,
1424 "nat-addresses");
1425 if (nat_addresses) {
1426 struct lport_addresses laddrs;
1427 if (!extract_lsp_addresses(nat_addresses, &laddrs)) {
1428 static struct vlog_rate_limit rl =
1429 VLOG_RATE_LIMIT_INIT(1, 1);
1430 VLOG_WARN_RL(&rl, "Error extracting nat-addresses.");
1431 } else {
1432 smap_add(&new, "nat-addresses", nat_addresses);
1433 destroy_lport_addresses(&laddrs);
1434 }
1435 }
c1645003
GS
1436 sbrec_port_binding_set_options(op->sb, &new);
1437 smap_destroy(&new);
9975d7be 1438 }
0ee00741
HK
1439 sbrec_port_binding_set_parent_port(op->sb, op->nbsp->parent_name);
1440 sbrec_port_binding_set_tag(op->sb, op->nbsp->tag, op->nbsp->n_tag);
1441 sbrec_port_binding_set_mac(op->sb, (const char **) op->nbsp->addresses,
1442 op->nbsp->n_addresses);
9975d7be 1443 }
5868eb24
BP
1444}
1445
6e31816f
CSV
1446/* Remove mac_binding entries that refer to logical_ports which are
1447 * deleted. */
1448static void
1449cleanup_mac_bindings(struct northd_context *ctx, struct hmap *ports)
1450{
1451 const struct sbrec_mac_binding *b, *n;
1452 SBREC_MAC_BINDING_FOR_EACH_SAFE (b, n, ctx->ovnsb_idl) {
1453 if (!ovn_port_find(ports, b->logical_port)) {
1454 sbrec_mac_binding_delete(b);
1455 }
1456 }
1457}
1458
0bac7164 1459/* Updates the southbound Port_Binding table so that it contains the logical
80f408f4 1460 * switch ports specified by the northbound database.
0bac7164
BP
1461 *
1462 * Initializes 'ports' to contain a "struct ovn_port" for every logical port,
1463 * using the "struct ovn_datapath"s in 'datapaths' to look up logical
1464 * datapaths. */
5868eb24
BP
1465static void
1466build_ports(struct northd_context *ctx, struct hmap *datapaths,
1467 struct hmap *ports)
1468{
1469 struct ovs_list sb_only, nb_only, both;
a6095f81
BS
1470 struct hmap tag_alloc_table = HMAP_INITIALIZER(&tag_alloc_table);
1471 struct hmap chassis_qdisc_queues = HMAP_INITIALIZER(&chassis_qdisc_queues);
5868eb24 1472
a6095f81
BS
1473 join_logical_ports(ctx, datapaths, ports, &chassis_qdisc_queues,
1474 &tag_alloc_table, &sb_only, &nb_only, &both);
5868eb24 1475
5868eb24 1476 struct ovn_port *op, *next;
b511690b
GS
1477 /* For logical ports that are in both databases, update the southbound
1478 * record based on northbound data. Also index the in-use tunnel_keys.
1479 * For logical ports that are in NB database, do any tag allocation
1480 * needed. */
5868eb24 1481 LIST_FOR_EACH_SAFE (op, next, list, &both) {
b511690b
GS
1482 if (op->nbsp) {
1483 tag_alloc_create_new_tag(&tag_alloc_table, op->nbsp);
1484 }
a6095f81 1485 ovn_port_update_sbrec(op, &chassis_qdisc_queues);
5868eb24
BP
1486
1487 add_tnlid(&op->od->port_tnlids, op->sb->tunnel_key);
1488 if (op->sb->tunnel_key > op->od->port_key_hint) {
1489 op->od->port_key_hint = op->sb->tunnel_key;
1490 }
1491 }
1492
1493 /* Add southbound record for each unmatched northbound record. */
1494 LIST_FOR_EACH_SAFE (op, next, list, &nb_only) {
1495 uint16_t tunnel_key = ovn_port_allocate_key(op->od);
1496 if (!tunnel_key) {
1497 continue;
1498 }
1499
1500 op->sb = sbrec_port_binding_insert(ctx->ovnsb_txn);
a6095f81 1501 ovn_port_update_sbrec(op, &chassis_qdisc_queues);
5868eb24
BP
1502
1503 sbrec_port_binding_set_logical_port(op->sb, op->key);
1504 sbrec_port_binding_set_tunnel_key(op->sb, tunnel_key);
1505 }
1506
6e31816f
CSV
1507 bool remove_mac_bindings = false;
1508 if (!ovs_list_is_empty(&sb_only)) {
1509 remove_mac_bindings = true;
1510 }
1511
5868eb24
BP
1512 /* Delete southbound records without northbound matches. */
1513 LIST_FOR_EACH_SAFE(op, next, list, &sb_only) {
417e7e66 1514 ovs_list_remove(&op->list);
5868eb24
BP
1515 sbrec_port_binding_delete(op->sb);
1516 ovn_port_destroy(ports, op);
1517 }
6e31816f
CSV
1518 if (remove_mac_bindings) {
1519 cleanup_mac_bindings(ctx, ports);
1520 }
b511690b
GS
1521
1522 tag_alloc_destroy(&tag_alloc_table);
a6095f81 1523 destroy_chassis_queues(&chassis_qdisc_queues);
5868eb24
BP
1524}
1525\f
1526#define OVN_MIN_MULTICAST 32768
1527#define OVN_MAX_MULTICAST 65535
1528
1529struct multicast_group {
1530 const char *name;
1531 uint16_t key; /* OVN_MIN_MULTICAST...OVN_MAX_MULTICAST. */
1532};
1533
1534#define MC_FLOOD "_MC_flood"
1535static const struct multicast_group mc_flood = { MC_FLOOD, 65535 };
1536
1537#define MC_UNKNOWN "_MC_unknown"
1538static const struct multicast_group mc_unknown = { MC_UNKNOWN, 65534 };
1539
1540static bool
1541multicast_group_equal(const struct multicast_group *a,
1542 const struct multicast_group *b)
1543{
1544 return !strcmp(a->name, b->name) && a->key == b->key;
1545}
1546
1547/* Multicast group entry. */
1548struct ovn_multicast {
1549 struct hmap_node hmap_node; /* Index on 'datapath' and 'key'. */
1550 struct ovn_datapath *datapath;
1551 const struct multicast_group *group;
1552
1553 struct ovn_port **ports;
1554 size_t n_ports, allocated_ports;
1555};
1556
1557static uint32_t
1558ovn_multicast_hash(const struct ovn_datapath *datapath,
1559 const struct multicast_group *group)
1560{
1561 return hash_pointer(datapath, group->key);
1562}
1563
1564static struct ovn_multicast *
1565ovn_multicast_find(struct hmap *mcgroups, struct ovn_datapath *datapath,
1566 const struct multicast_group *group)
1567{
1568 struct ovn_multicast *mc;
1569
1570 HMAP_FOR_EACH_WITH_HASH (mc, hmap_node,
1571 ovn_multicast_hash(datapath, group), mcgroups) {
1572 if (mc->datapath == datapath
1573 && multicast_group_equal(mc->group, group)) {
1574 return mc;
4edcdcf4
RB
1575 }
1576 }
5868eb24
BP
1577 return NULL;
1578}
1579
1580static void
1581ovn_multicast_add(struct hmap *mcgroups, const struct multicast_group *group,
1582 struct ovn_port *port)
1583{
1584 struct ovn_datapath *od = port->od;
1585 struct ovn_multicast *mc = ovn_multicast_find(mcgroups, od, group);
1586 if (!mc) {
1587 mc = xmalloc(sizeof *mc);
1588 hmap_insert(mcgroups, &mc->hmap_node, ovn_multicast_hash(od, group));
1589 mc->datapath = od;
1590 mc->group = group;
1591 mc->n_ports = 0;
1592 mc->allocated_ports = 4;
1593 mc->ports = xmalloc(mc->allocated_ports * sizeof *mc->ports);
1594 }
1595 if (mc->n_ports >= mc->allocated_ports) {
1596 mc->ports = x2nrealloc(mc->ports, &mc->allocated_ports,
1597 sizeof *mc->ports);
1598 }
1599 mc->ports[mc->n_ports++] = port;
1600}
4edcdcf4 1601
5868eb24
BP
1602static void
1603ovn_multicast_destroy(struct hmap *mcgroups, struct ovn_multicast *mc)
1604{
1605 if (mc) {
1606 hmap_remove(mcgroups, &mc->hmap_node);
1607 free(mc->ports);
1608 free(mc);
1609 }
1610}
4edcdcf4 1611
5868eb24
BP
1612static void
1613ovn_multicast_update_sbrec(const struct ovn_multicast *mc,
1614 const struct sbrec_multicast_group *sb)
1615{
1616 struct sbrec_port_binding **ports = xmalloc(mc->n_ports * sizeof *ports);
1617 for (size_t i = 0; i < mc->n_ports; i++) {
1618 ports[i] = CONST_CAST(struct sbrec_port_binding *, mc->ports[i]->sb);
1619 }
1620 sbrec_multicast_group_set_ports(sb, ports, mc->n_ports);
1621 free(ports);
4edcdcf4 1622}
bd39395f 1623\f
48605550 1624/* Logical flow generation.
bd39395f 1625 *
48605550 1626 * This code generates the Logical_Flow table in the southbound database, as a
bd39395f
BP
1627 * function of most of the northbound database.
1628 */
1629
5868eb24
BP
1630struct ovn_lflow {
1631 struct hmap_node hmap_node;
bd39395f 1632
5868eb24 1633 struct ovn_datapath *od;
880fcd14 1634 enum ovn_stage stage;
5868eb24
BP
1635 uint16_t priority;
1636 char *match;
1637 char *actions;
bd39395f
BP
1638};
1639
1640static size_t
5868eb24 1641ovn_lflow_hash(const struct ovn_lflow *lflow)
bd39395f 1642{
5868eb24 1643 size_t hash = uuid_hash(&lflow->od->key);
880fcd14 1644 hash = hash_2words((lflow->stage << 16) | lflow->priority, hash);
5868eb24
BP
1645 hash = hash_string(lflow->match, hash);
1646 return hash_string(lflow->actions, hash);
bd39395f
BP
1647}
1648
5868eb24
BP
1649static bool
1650ovn_lflow_equal(const struct ovn_lflow *a, const struct ovn_lflow *b)
1651{
1652 return (a->od == b->od
880fcd14 1653 && a->stage == b->stage
5868eb24
BP
1654 && a->priority == b->priority
1655 && !strcmp(a->match, b->match)
1656 && !strcmp(a->actions, b->actions));
1657}
1658
1659static void
1660ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
880fcd14 1661 enum ovn_stage stage, uint16_t priority,
5868eb24 1662 char *match, char *actions)
bd39395f 1663{
5868eb24 1664 lflow->od = od;
880fcd14 1665 lflow->stage = stage;
5868eb24
BP
1666 lflow->priority = priority;
1667 lflow->match = match;
1668 lflow->actions = actions;
bd39395f
BP
1669}
1670
48605550 1671/* Adds a row with the specified contents to the Logical_Flow table. */
bd39395f 1672static void
5868eb24 1673ovn_lflow_add(struct hmap *lflow_map, struct ovn_datapath *od,
880fcd14 1674 enum ovn_stage stage, uint16_t priority,
5868eb24
BP
1675 const char *match, const char *actions)
1676{
9a9961d2
BP
1677 ovs_assert(ovn_stage_to_datapath_type(stage) == ovn_datapath_get_type(od));
1678
5868eb24 1679 struct ovn_lflow *lflow = xmalloc(sizeof *lflow);
880fcd14 1680 ovn_lflow_init(lflow, od, stage, priority,
5868eb24
BP
1681 xstrdup(match), xstrdup(actions));
1682 hmap_insert(lflow_map, &lflow->hmap_node, ovn_lflow_hash(lflow));
1683}
1684
1685static struct ovn_lflow *
1686ovn_lflow_find(struct hmap *lflows, struct ovn_datapath *od,
880fcd14 1687 enum ovn_stage stage, uint16_t priority,
5868eb24
BP
1688 const char *match, const char *actions)
1689{
1690 struct ovn_lflow target;
880fcd14 1691 ovn_lflow_init(&target, od, stage, priority,
5868eb24
BP
1692 CONST_CAST(char *, match), CONST_CAST(char *, actions));
1693
1694 struct ovn_lflow *lflow;
1695 HMAP_FOR_EACH_WITH_HASH (lflow, hmap_node, ovn_lflow_hash(&target),
1696 lflows) {
1697 if (ovn_lflow_equal(lflow, &target)) {
1698 return lflow;
bd39395f
BP
1699 }
1700 }
5868eb24
BP
1701 return NULL;
1702}
bd39395f 1703
5868eb24
BP
1704static void
1705ovn_lflow_destroy(struct hmap *lflows, struct ovn_lflow *lflow)
1706{
1707 if (lflow) {
1708 hmap_remove(lflows, &lflow->hmap_node);
1709 free(lflow->match);
1710 free(lflow->actions);
1711 free(lflow);
1712 }
bd39395f
BP
1713}
1714
bd39395f 1715/* Appends port security constraints on L2 address field 'eth_addr_field'
e93b43d6
JP
1716 * (e.g. "eth.src" or "eth.dst") to 'match'. 'ps_addrs', with 'n_ps_addrs'
1717 * elements, is the collection of port_security constraints from an
1718 * OVN_NB Logical_Switch_Port row generated by extract_lsp_addresses(). */
bd39395f 1719static void
685f4dfe 1720build_port_security_l2(const char *eth_addr_field,
e93b43d6
JP
1721 struct lport_addresses *ps_addrs,
1722 unsigned int n_ps_addrs,
685f4dfe 1723 struct ds *match)
bd39395f 1724{
e93b43d6
JP
1725 if (!n_ps_addrs) {
1726 return;
1727 }
bd39395f 1728
e93b43d6 1729 ds_put_format(match, " && %s == {", eth_addr_field);
f7cb14cd 1730
e93b43d6
JP
1731 for (size_t i = 0; i < n_ps_addrs; i++) {
1732 ds_put_format(match, "%s ", ps_addrs[i].ea_s);
bd39395f 1733 }
f7cb14cd 1734 ds_chomp(match, ' ');
bd39395f 1735 ds_put_cstr(match, "}");
bd39395f
BP
1736}
1737
685f4dfe
NS
1738static void
1739build_port_security_ipv6_nd_flow(
1740 struct ds *match, struct eth_addr ea, struct ipv6_netaddr *ipv6_addrs,
1741 int n_ipv6_addrs)
1742{
1743 ds_put_format(match, " && ip6 && nd && ((nd.sll == "ETH_ADDR_FMT" || "
1744 "nd.sll == "ETH_ADDR_FMT") || ((nd.tll == "ETH_ADDR_FMT" || "
1745 "nd.tll == "ETH_ADDR_FMT")", ETH_ADDR_ARGS(eth_addr_zero),
1746 ETH_ADDR_ARGS(ea), ETH_ADDR_ARGS(eth_addr_zero),
1747 ETH_ADDR_ARGS(ea));
1748 if (!n_ipv6_addrs) {
1749 ds_put_cstr(match, "))");
1750 return;
1751 }
1752
1753 char ip6_str[INET6_ADDRSTRLEN + 1];
1754 struct in6_addr lla;
1755 in6_generate_lla(ea, &lla);
1756 memset(ip6_str, 0, sizeof(ip6_str));
1757 ipv6_string_mapped(ip6_str, &lla);
1758 ds_put_format(match, " && (nd.target == %s", ip6_str);
1759
1760 for(int i = 0; i < n_ipv6_addrs; i++) {
1761 memset(ip6_str, 0, sizeof(ip6_str));
1762 ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr);
1763 ds_put_format(match, " || nd.target == %s", ip6_str);
1764 }
1765
1766 ds_put_format(match, ")))");
1767}
1768
1769static void
1770build_port_security_ipv6_flow(
1771 enum ovn_pipeline pipeline, struct ds *match, struct eth_addr ea,
1772 struct ipv6_netaddr *ipv6_addrs, int n_ipv6_addrs)
1773{
1774 char ip6_str[INET6_ADDRSTRLEN + 1];
1775
1776 ds_put_format(match, " && %s == {",
1777 pipeline == P_IN ? "ip6.src" : "ip6.dst");
1778
1779 /* Allow link-local address. */
1780 struct in6_addr lla;
1781 in6_generate_lla(ea, &lla);
1782 ipv6_string_mapped(ip6_str, &lla);
1783 ds_put_format(match, "%s, ", ip6_str);
1784
9e687b23
DL
1785 /* Allow ip6.dst=ff00::/8 for multicast packets */
1786 if (pipeline == P_OUT) {
1787 ds_put_cstr(match, "ff00::/8, ");
1788 }
685f4dfe
NS
1789 for(int i = 0; i < n_ipv6_addrs; i++) {
1790 ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr);
9e687b23 1791 ds_put_format(match, "%s, ", ip6_str);
685f4dfe 1792 }
9e687b23
DL
1793 /* Replace ", " by "}". */
1794 ds_chomp(match, ' ');
1795 ds_chomp(match, ',');
685f4dfe
NS
1796 ds_put_cstr(match, "}");
1797}
1798
1799/**
1800 * Build port security constraints on ARP and IPv6 ND fields
1801 * and add logical flows to S_SWITCH_IN_PORT_SEC_ND stage.
1802 *
1803 * For each port security of the logical port, following
1804 * logical flows are added
1805 * - If the port security has no IP (both IPv4 and IPv6) or
1806 * if it has IPv4 address(es)
1807 * - Priority 90 flow to allow ARP packets for known MAC addresses
1808 * in the eth.src and arp.spa fields. If the port security
1809 * has IPv4 addresses, allow known IPv4 addresses in the arp.tpa field.
1810 *
1811 * - If the port security has no IP (both IPv4 and IPv6) or
1812 * if it has IPv6 address(es)
1813 * - Priority 90 flow to allow IPv6 ND packets for known MAC addresses
1814 * in the eth.src and nd.sll/nd.tll fields. If the port security
1815 * has IPv6 addresses, allow known IPv6 addresses in the nd.target field
1816 * for IPv6 Neighbor Advertisement packet.
1817 *
1818 * - Priority 80 flow to drop ARP and IPv6 ND packets.
1819 */
1820static void
1821build_port_security_nd(struct ovn_port *op, struct hmap *lflows)
1822{
e93b43d6
JP
1823 struct ds match = DS_EMPTY_INITIALIZER;
1824
1825 for (size_t i = 0; i < op->n_ps_addrs; i++) {
1826 struct lport_addresses *ps = &op->ps_addrs[i];
685f4dfe 1827
e93b43d6 1828 bool no_ip = !(ps->n_ipv4_addrs || ps->n_ipv6_addrs);
685f4dfe 1829
e93b43d6
JP
1830 ds_clear(&match);
1831 if (ps->n_ipv4_addrs || no_ip) {
1832 ds_put_format(&match,
1833 "inport == %s && eth.src == %s && arp.sha == %s",
1834 op->json_key, ps->ea_s, ps->ea_s);
685f4dfe 1835
e93b43d6
JP
1836 if (ps->n_ipv4_addrs) {
1837 ds_put_cstr(&match, " && arp.spa == {");
f95523c0 1838 for (size_t j = 0; j < ps->n_ipv4_addrs; j++) {
7d9d86ad
NS
1839 /* When the netmask is applied, if the host portion is
1840 * non-zero, the host can only use the specified
1841 * address in the arp.spa. If zero, the host is allowed
1842 * to use any address in the subnet. */
f95523c0
JP
1843 if (ps->ipv4_addrs[j].plen == 32
1844 || ps->ipv4_addrs[j].addr & ~ps->ipv4_addrs[j].mask) {
1845 ds_put_cstr(&match, ps->ipv4_addrs[j].addr_s);
7d9d86ad 1846 } else {
e93b43d6 1847 ds_put_format(&match, "%s/%d",
f95523c0
JP
1848 ps->ipv4_addrs[j].network_s,
1849 ps->ipv4_addrs[j].plen);
7d9d86ad 1850 }
e93b43d6 1851 ds_put_cstr(&match, ", ");
685f4dfe
NS
1852 }
1853 ds_chomp(&match, ' ');
e93b43d6
JP
1854 ds_chomp(&match, ',');
1855 ds_put_cstr(&match, "}");
685f4dfe
NS
1856 }
1857 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90,
1858 ds_cstr(&match), "next;");
685f4dfe
NS
1859 }
1860
e93b43d6
JP
1861 if (ps->n_ipv6_addrs || no_ip) {
1862 ds_clear(&match);
1863 ds_put_format(&match, "inport == %s && eth.src == %s",
1864 op->json_key, ps->ea_s);
1865 build_port_security_ipv6_nd_flow(&match, ps->ea, ps->ipv6_addrs,
1866 ps->n_ipv6_addrs);
685f4dfe
NS
1867 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90,
1868 ds_cstr(&match), "next;");
685f4dfe 1869 }
685f4dfe
NS
1870 }
1871
e93b43d6
JP
1872 ds_clear(&match);
1873 ds_put_format(&match, "inport == %s && (arp || nd)", op->json_key);
685f4dfe 1874 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 80,
e93b43d6
JP
1875 ds_cstr(&match), "drop;");
1876 ds_destroy(&match);
685f4dfe
NS
1877}
1878
1879/**
1880 * Build port security constraints on IPv4 and IPv6 src and dst fields
1881 * and add logical flows to S_SWITCH_(IN/OUT)_PORT_SEC_IP stage.
1882 *
1883 * For each port security of the logical port, following
1884 * logical flows are added
1885 * - If the port security has IPv4 addresses,
1886 * - Priority 90 flow to allow IPv4 packets for known IPv4 addresses
1887 *
1888 * - If the port security has IPv6 addresses,
1889 * - Priority 90 flow to allow IPv6 packets for known IPv6 addresses
1890 *
1891 * - If the port security has IPv4 addresses or IPv6 addresses or both
1892 * - Priority 80 flow to drop all IPv4 and IPv6 traffic
1893 */
1894static void
1895build_port_security_ip(enum ovn_pipeline pipeline, struct ovn_port *op,
1896 struct hmap *lflows)
1897{
1898 char *port_direction;
1899 enum ovn_stage stage;
1900 if (pipeline == P_IN) {
1901 port_direction = "inport";
1902 stage = S_SWITCH_IN_PORT_SEC_IP;
1903 } else {
1904 port_direction = "outport";
1905 stage = S_SWITCH_OUT_PORT_SEC_IP;
1906 }
1907
e93b43d6
JP
1908 for (size_t i = 0; i < op->n_ps_addrs; i++) {
1909 struct lport_addresses *ps = &op->ps_addrs[i];
685f4dfe 1910
e93b43d6 1911 if (!(ps->n_ipv4_addrs || ps->n_ipv6_addrs)) {
685f4dfe
NS
1912 continue;
1913 }
1914
e93b43d6 1915 if (ps->n_ipv4_addrs) {
685f4dfe
NS
1916 struct ds match = DS_EMPTY_INITIALIZER;
1917 if (pipeline == P_IN) {
9e687b23
DL
1918 /* Permit use of the unspecified address for DHCP discovery */
1919 struct ds dhcp_match = DS_EMPTY_INITIALIZER;
1920 ds_put_format(&dhcp_match, "inport == %s"
e93b43d6 1921 " && eth.src == %s"
9e687b23
DL
1922 " && ip4.src == 0.0.0.0"
1923 " && ip4.dst == 255.255.255.255"
e93b43d6
JP
1924 " && udp.src == 68 && udp.dst == 67",
1925 op->json_key, ps->ea_s);
9e687b23
DL
1926 ovn_lflow_add(lflows, op->od, stage, 90,
1927 ds_cstr(&dhcp_match), "next;");
1928 ds_destroy(&dhcp_match);
e93b43d6 1929 ds_put_format(&match, "inport == %s && eth.src == %s"
9e687b23 1930 " && ip4.src == {", op->json_key,
e93b43d6 1931 ps->ea_s);
685f4dfe 1932 } else {
e93b43d6 1933 ds_put_format(&match, "outport == %s && eth.dst == %s"
685f4dfe 1934 " && ip4.dst == {255.255.255.255, 224.0.0.0/4, ",
e93b43d6 1935 op->json_key, ps->ea_s);
685f4dfe
NS
1936 }
1937
f95523c0
JP
1938 for (int j = 0; j < ps->n_ipv4_addrs; j++) {
1939 ovs_be32 mask = ps->ipv4_addrs[j].mask;
7d9d86ad
NS
1940 /* When the netmask is applied, if the host portion is
1941 * non-zero, the host can only use the specified
1942 * address. If zero, the host is allowed to use any
1943 * address in the subnet.
e93b43d6 1944 */
f95523c0
JP
1945 if (ps->ipv4_addrs[j].plen == 32
1946 || ps->ipv4_addrs[j].addr & ~mask) {
1947 ds_put_format(&match, "%s", ps->ipv4_addrs[j].addr_s);
1948 if (pipeline == P_OUT && ps->ipv4_addrs[j].plen != 32) {
e93b43d6
JP
1949 /* Host is also allowed to receive packets to the
1950 * broadcast address in the specified subnet. */
1951 ds_put_format(&match, ", %s",
f95523c0 1952 ps->ipv4_addrs[j].bcast_s);
7d9d86ad
NS
1953 }
1954 } else {
1955 /* host portion is zero */
f95523c0
JP
1956 ds_put_format(&match, "%s/%d", ps->ipv4_addrs[j].network_s,
1957 ps->ipv4_addrs[j].plen);
7d9d86ad
NS
1958 }
1959 ds_put_cstr(&match, ", ");
685f4dfe
NS
1960 }
1961
1962 /* Replace ", " by "}". */
1963 ds_chomp(&match, ' ');
1964 ds_chomp(&match, ',');
1965 ds_put_cstr(&match, "}");
1966 ovn_lflow_add(lflows, op->od, stage, 90, ds_cstr(&match), "next;");
1967 ds_destroy(&match);
685f4dfe
NS
1968 }
1969
e93b43d6 1970 if (ps->n_ipv6_addrs) {
685f4dfe 1971 struct ds match = DS_EMPTY_INITIALIZER;
9e687b23
DL
1972 if (pipeline == P_IN) {
1973 /* Permit use of unspecified address for duplicate address
1974 * detection */
1975 struct ds dad_match = DS_EMPTY_INITIALIZER;
1976 ds_put_format(&dad_match, "inport == %s"
e93b43d6 1977 " && eth.src == %s"
9e687b23
DL
1978 " && ip6.src == ::"
1979 " && ip6.dst == ff02::/16"
1980 " && icmp6.type == {131, 135, 143}", op->json_key,
e93b43d6 1981 ps->ea_s);
9e687b23
DL
1982 ovn_lflow_add(lflows, op->od, stage, 90,
1983 ds_cstr(&dad_match), "next;");
1984 ds_destroy(&dad_match);
1985 }
e93b43d6 1986 ds_put_format(&match, "%s == %s && %s == %s",
685f4dfe 1987 port_direction, op->json_key,
e93b43d6
JP
1988 pipeline == P_IN ? "eth.src" : "eth.dst", ps->ea_s);
1989 build_port_security_ipv6_flow(pipeline, &match, ps->ea,
1990 ps->ipv6_addrs, ps->n_ipv6_addrs);
685f4dfe
NS
1991 ovn_lflow_add(lflows, op->od, stage, 90,
1992 ds_cstr(&match), "next;");
1993 ds_destroy(&match);
685f4dfe
NS
1994 }
1995
e93b43d6
JP
1996 char *match = xasprintf("%s == %s && %s == %s && ip",
1997 port_direction, op->json_key,
1998 pipeline == P_IN ? "eth.src" : "eth.dst",
1999 ps->ea_s);
685f4dfe
NS
2000 ovn_lflow_add(lflows, op->od, stage, 80, match, "drop;");
2001 free(match);
2002 }
f2a715b5 2003
685f4dfe
NS
2004}
2005
95a9a275 2006static bool
80f408f4 2007lsp_is_enabled(const struct nbrec_logical_switch_port *lsp)
95a9a275 2008{
80f408f4 2009 return !lsp->enabled || *lsp->enabled;
95a9a275
RB
2010}
2011
4c7bf534 2012static bool
80f408f4 2013lsp_is_up(const struct nbrec_logical_switch_port *lsp)
4c7bf534 2014{
80f408f4 2015 return !lsp->up || *lsp->up;
4c7bf534
NS
2016}
2017
281977f7
NS
2018static bool
2019build_dhcpv4_action(struct ovn_port *op, ovs_be32 offer_ip,
2020 struct ds *options_action, struct ds *response_action)
2021{
2022 if (!op->nbsp->dhcpv4_options) {
2023 /* CMS has disabled native DHCPv4 for this lport. */
2024 return false;
2025 }
2026
2027 ovs_be32 host_ip, mask;
2028 char *error = ip_parse_masked(op->nbsp->dhcpv4_options->cidr, &host_ip,
2029 &mask);
2030 if (error || ((offer_ip ^ host_ip) & mask)) {
2031 /* Either
2032 * - cidr defined is invalid or
2033 * - the offer ip of the logical port doesn't belong to the cidr
2034 * defined in the DHCPv4 options.
2035 * */
2036 free(error);
2037 return false;
2038 }
2039
2040 const char *server_ip = smap_get(
2041 &op->nbsp->dhcpv4_options->options, "server_id");
2042 const char *server_mac = smap_get(
2043 &op->nbsp->dhcpv4_options->options, "server_mac");
2044 const char *lease_time = smap_get(
2045 &op->nbsp->dhcpv4_options->options, "lease_time");
2046 const char *router = smap_get(
2047 &op->nbsp->dhcpv4_options->options, "router");
2048
2049 if (!(server_ip && server_mac && lease_time && router)) {
2050 /* "server_id", "server_mac", "lease_time" and "router" should be
2051 * present in the dhcp_options. */
2052 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
2053 VLOG_WARN_RL(&rl, "Required DHCPv4 options not defined for lport - %s",
2054 op->json_key);
2055 return false;
2056 }
2057
2058 struct smap dhcpv4_options = SMAP_INITIALIZER(&dhcpv4_options);
2059 smap_clone(&dhcpv4_options, &op->nbsp->dhcpv4_options->options);
2060
2061 /* server_mac is not DHCPv4 option, delete it from the smap. */
2062 smap_remove(&dhcpv4_options, "server_mac");
2063 char *netmask = xasprintf(IP_FMT, IP_ARGS(mask));
2064 smap_add(&dhcpv4_options, "netmask", netmask);
2065 free(netmask);
2066
2067 ds_put_format(options_action,
2068 REGBIT_DHCP_OPTS_RESULT" = put_dhcp_opts(offerip = "
2069 IP_FMT", ", IP_ARGS(offer_ip));
2070 struct smap_node *node;
2071 SMAP_FOR_EACH(node, &dhcpv4_options) {
2072 ds_put_format(options_action, "%s = %s, ", node->key, node->value);
2073 }
2074
2075 ds_chomp(options_action, ' ');
2076 ds_chomp(options_action, ',');
2077 ds_put_cstr(options_action, "); next;");
2078
2079 ds_put_format(response_action, "eth.dst = eth.src; eth.src = %s; "
2080 "ip4.dst = "IP_FMT"; ip4.src = %s; udp.src = 67; "
bf143492
JP
2081 "udp.dst = 68; outport = inport; flags.loopback = 1; "
2082 "output;",
281977f7
NS
2083 server_mac, IP_ARGS(offer_ip), server_ip);
2084
2085 smap_destroy(&dhcpv4_options);
2086 return true;
2087}
2088
33ac3c83
NS
2089static bool
2090build_dhcpv6_action(struct ovn_port *op, struct in6_addr *offer_ip,
2091 struct ds *options_action, struct ds *response_action)
2092{
2093 if (!op->nbsp->dhcpv6_options) {
2094 /* CMS has disabled native DHCPv6 for this lport. */
2095 return false;
2096 }
2097
2098 struct in6_addr host_ip, mask;
2099
2100 char *error = ipv6_parse_masked(op->nbsp->dhcpv6_options->cidr, &host_ip,
2101 &mask);
2102 if (error) {
2103 free(error);
2104 return false;
2105 }
2106 struct in6_addr ip6_mask = ipv6_addr_bitxor(offer_ip, &host_ip);
2107 ip6_mask = ipv6_addr_bitand(&ip6_mask, &mask);
2108 if (!ipv6_mask_is_any(&ip6_mask)) {
2109 /* offer_ip doesn't belongs to the cidr defined in lport's DHCPv6
2110 * options.*/
2111 return false;
2112 }
2113
2114 /* "server_id" should be the MAC address. */
2115 const char *server_mac = smap_get(&op->nbsp->dhcpv6_options->options,
2116 "server_id");
2117 struct eth_addr ea;
2118 if (!server_mac || !eth_addr_from_string(server_mac, &ea)) {
2119 /* "server_id" should be present in the dhcpv6_options. */
2120 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2121 VLOG_WARN_RL(&rl, "server_id not present in the DHCPv6 options"
2122 " for lport %s", op->json_key);
2123 return false;
2124 }
2125
2126 /* Get the link local IP of the DHCPv6 server from the server MAC. */
2127 struct in6_addr lla;
2128 in6_generate_lla(ea, &lla);
2129
2130 char server_ip[INET6_ADDRSTRLEN + 1];
2131 ipv6_string_mapped(server_ip, &lla);
2132
2133 char ia_addr[INET6_ADDRSTRLEN + 1];
2134 ipv6_string_mapped(ia_addr, offer_ip);
2135
2136 ds_put_format(options_action,
40df4566
ZKL
2137 REGBIT_DHCP_OPTS_RESULT" = put_dhcpv6_opts(");
2138
2139 /* Check whether the dhcpv6 options should be configured as stateful.
2140 * Only reply with ia_addr option for dhcpv6 stateful address mode. */
2141 if (!smap_get_bool(&op->nbsp->dhcpv6_options->options,
2142 "dhcpv6_stateless", false)) {
2143 char ia_addr[INET6_ADDRSTRLEN + 1];
2144 ipv6_string_mapped(ia_addr, offer_ip);
2145
2146 ds_put_format(options_action, "ia_addr = %s, ", ia_addr);
2147 }
2148
33ac3c83
NS
2149 struct smap_node *node;
2150 SMAP_FOR_EACH (node, &op->nbsp->dhcpv6_options->options) {
40df4566
ZKL
2151 if (strcmp(node->key, "dhcpv6_stateless")) {
2152 ds_put_format(options_action, "%s = %s, ", node->key, node->value);
2153 }
33ac3c83
NS
2154 }
2155 ds_chomp(options_action, ' ');
2156 ds_chomp(options_action, ',');
2157 ds_put_cstr(options_action, "); next;");
2158
2159 ds_put_format(response_action, "eth.dst = eth.src; eth.src = %s; "
2160 "ip6.dst = ip6.src; ip6.src = %s; udp.src = 547; "
2161 "udp.dst = 546; outport = inport; flags.loopback = 1; "
2162 "output;",
2163 server_mac, server_ip);
40df4566 2164
33ac3c83
NS
2165 return true;
2166}
2167
78aab811
JP
2168static bool
2169has_stateful_acl(struct ovn_datapath *od)
2170{
9975d7be
BP
2171 for (size_t i = 0; i < od->nbs->n_acls; i++) {
2172 struct nbrec_acl *acl = od->nbs->acls[i];
78aab811
JP
2173 if (!strcmp(acl->action, "allow-related")) {
2174 return true;
2175 }
2176 }
2177
2178 return false;
2179}
2180
2181static void
9ab989b7 2182build_pre_acls(struct ovn_datapath *od, struct hmap *lflows)
78aab811
JP
2183{
2184 bool has_stateful = has_stateful_acl(od);
2185
2186 /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are
2187 * allowed by default. */
880fcd14
BP
2188 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 0, "1", "next;");
2189 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 0, "1", "next;");
78aab811 2190
c132fca0 2191 /* If there are any stateful ACL rules in this datapath, we must
78aab811
JP
2192 * send all IP packets through the conntrack action, which handles
2193 * defragmentation, in order to match L4 headers. */
2194 if (has_stateful) {
9ab989b7
BP
2195 for (size_t i = 0; i < od->n_router_ports; i++) {
2196 struct ovn_port *op = od->router_ports[i];
2197 /* Can't use ct() for router ports. Consider the
2198 * following configuration: lp1(10.0.0.2) on
2199 * hostA--ls1--lr0--ls2--lp2(10.0.1.2) on hostB, For a
2200 * ping from lp1 to lp2, First, the response will go
2201 * through ct() with a zone for lp2 in the ls2 ingress
2202 * pipeline on hostB. That ct zone knows about this
2203 * connection. Next, it goes through ct() with the zone
2204 * for the router port in the egress pipeline of ls2 on
2205 * hostB. This zone does not know about the connection,
2206 * as the icmp request went through the logical router
2207 * on hostA, not hostB. This would only work with
2208 * distributed conntrack state across all chassis. */
2209 struct ds match_in = DS_EMPTY_INITIALIZER;
2210 struct ds match_out = DS_EMPTY_INITIALIZER;
2211
2212 ds_put_format(&match_in, "ip && inport == %s", op->json_key);
2213 ds_put_format(&match_out, "ip && outport == %s", op->json_key);
2214 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110,
2215 ds_cstr(&match_in), "next;");
2216 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110,
2217 ds_cstr(&match_out), "next;");
2218
2219 ds_destroy(&match_in);
2220 ds_destroy(&match_out);
48fcdb47 2221 }
2d018f9b
GS
2222 /* Ingress and Egress Pre-ACL Table (Priority 110).
2223 *
2224 * Not to do conntrack on ND packets. */
2225 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110, "nd", "next;");
2226 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110, "nd", "next;");
48fcdb47 2227
78aab811
JP
2228 /* Ingress and Egress Pre-ACL Table (Priority 100).
2229 *
2230 * Regardless of whether the ACL is "from-lport" or "to-lport",
2231 * we need rules in both the ingress and egress table, because
facf8652
GS
2232 * the return traffic needs to be followed.
2233 *
2234 * 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send
2235 * it to conntrack for tracking and defragmentation. */
2236 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 100, "ip",
2237 REGBIT_CONNTRACK_DEFRAG" = 1; next;");
2238 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip",
2239 REGBIT_CONNTRACK_DEFRAG" = 1; next;");
2d018f9b
GS
2240 }
2241}
78aab811 2242
7a15be69
GS
2243/* For a 'key' of the form "IP:port" or just "IP", sets 'port' and
2244 * 'ip_address'. The caller must free() the memory allocated for
2245 * 'ip_address'. */
2246static void
2247ip_address_and_port_from_lb_key(const char *key, char **ip_address,
2248 uint16_t *port)
2249{
2250 char *ip_str, *start, *next;
2251 *ip_address = NULL;
2252 *port = 0;
2253
2254 next = start = xstrdup(key);
2255 ip_str = strsep(&next, ":");
2256 if (!ip_str || !ip_str[0]) {
2257 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2258 VLOG_WARN_RL(&rl, "bad ip address for load balancer key %s", key);
2259 free(start);
2260 return;
2261 }
2262
2263 ovs_be32 ip, mask;
2264 char *error = ip_parse_masked(ip_str, &ip, &mask);
2265 if (error || mask != OVS_BE32_MAX) {
2266 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2267 VLOG_WARN_RL(&rl, "bad ip address for load balancer key %s", key);
2268 free(start);
2269 free(error);
2270 return;
2271 }
2272
2273 int l4_port = 0;
2274 if (next && next[0]) {
2275 if (!str_to_int(next, 0, &l4_port) || l4_port < 0 || l4_port > 65535) {
2276 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2277 VLOG_WARN_RL(&rl, "bad ip port for load balancer key %s", key);
2278 free(start);
2279 return;
2280 }
2281 }
2282
2283 *port = l4_port;
2284 *ip_address = strdup(ip_str);
2285 free(start);
2286}
2287
2288static void
2289build_pre_lb(struct ovn_datapath *od, struct hmap *lflows)
2290{
2291 /* Allow all packets to go to next tables by default. */
2292 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 0, "1", "next;");
2293 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 0, "1", "next;");
2294
2295 struct sset all_ips = SSET_INITIALIZER(&all_ips);
61591ad9
GS
2296 bool vip_configured = false;
2297 for (int i = 0; i < od->nbs->n_load_balancer; i++) {
2298 struct nbrec_load_balancer *lb = od->nbs->load_balancer[i];
7a15be69
GS
2299 struct smap *vips = &lb->vips;
2300 struct smap_node *node;
7a15be69
GS
2301
2302 SMAP_FOR_EACH (node, vips) {
2303 vip_configured = true;
2304
2305 /* node->key contains IP:port or just IP. */
2306 char *ip_address = NULL;
2307 uint16_t port;
2308 ip_address_and_port_from_lb_key(node->key, &ip_address, &port);
2309 if (!ip_address) {
2310 continue;
2311 }
2312
2313 if (!sset_contains(&all_ips, ip_address)) {
2314 sset_add(&all_ips, ip_address);
2315 }
2316
2317 free(ip_address);
2318
2319 /* Ignore L4 port information in the key because fragmented packets
2320 * may not have L4 information. The pre-stateful table will send
2321 * the packet through ct() action to de-fragment. In stateful
2322 * table, we will eventually look at L4 information. */
2323 }
61591ad9 2324 }
7a15be69 2325
61591ad9
GS
2326 /* 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send
2327 * packet to conntrack for defragmentation. */
2328 const char *ip_address;
2329 SSET_FOR_EACH(ip_address, &all_ips) {
2330 char *match = xasprintf("ip && ip4.dst == %s", ip_address);
2331 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB,
2332 100, match, REGBIT_CONNTRACK_DEFRAG" = 1; next;");
2333 free(match);
2334 }
7a15be69 2335
61591ad9 2336 sset_destroy(&all_ips);
7a15be69 2337
61591ad9
GS
2338 if (vip_configured) {
2339 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB,
2340 100, "ip", REGBIT_CONNTRACK_DEFRAG" = 1; next;");
7a15be69
GS
2341 }
2342}
2343
facf8652
GS
2344static void
2345build_pre_stateful(struct ovn_datapath *od, struct hmap *lflows)
2346{
2347 /* Ingress and Egress pre-stateful Table (Priority 0): Packets are
2348 * allowed by default. */
2349 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 0, "1", "next;");
2350 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 0, "1", "next;");
2351
2352 /* If REGBIT_CONNTRACK_DEFRAG is set as 1, then the packets should be
2353 * sent to conntrack for tracking and defragmentation. */
2354 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 100,
2355 REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;");
2356 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 100,
2357 REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;");
2358}
2359
2d018f9b
GS
2360static void
2361build_acls(struct ovn_datapath *od, struct hmap *lflows)
2362{
2363 bool has_stateful = has_stateful_acl(od);
e75451fe 2364
2d018f9b
GS
2365 /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
2366 * default. A related rule at priority 1 is added below if there
2367 * are any stateful ACLs in this datapath. */
2368 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 0, "1", "next;");
2369 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 0, "1", "next;");
2370
2371 if (has_stateful) {
78aab811
JP
2372 /* Ingress and Egress ACL Table (Priority 1).
2373 *
2374 * By default, traffic is allowed. This is partially handled by
2375 * the Priority 0 ACL flows added earlier, but we also need to
2376 * commit IP flows. This is because, while the initiater's
2377 * direction may not have any stateful rules, the server's may
2378 * and then its return traffic would not have an associated
cc58e1f2
RB
2379 * conntrack entry and would return "+invalid".
2380 *
2381 * We use "ct_commit" for a connection that is not already known
2382 * by the connection tracker. Once a connection is committed,
2383 * subsequent packets will hit the flow at priority 0 that just
2384 * uses "next;"
2385 *
b73db61d 2386 * We also check for established connections that have ct_label.blocked
cc58e1f2
RB
2387 * set on them. That's a connection that was disallowed, but is
2388 * now allowed by policy again since it hit this default-allow flow.
b73db61d 2389 * We need to set ct_label.blocked=0 to let the connection continue,
cc58e1f2
RB
2390 * which will be done by ct_commit() in the "stateful" stage.
2391 * Subsequent packets will hit the flow at priority 0 that just
2392 * uses "next;". */
2393 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 1,
b73db61d 2394 "ip && (!ct.est || (ct.est && ct_label.blocked == 1))",
cc58e1f2
RB
2395 REGBIT_CONNTRACK_COMMIT" = 1; next;");
2396 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 1,
b73db61d 2397 "ip && (!ct.est || (ct.est && ct_label.blocked == 1))",
cc58e1f2 2398 REGBIT_CONNTRACK_COMMIT" = 1; next;");
78aab811
JP
2399
2400 /* Ingress and Egress ACL Table (Priority 65535).
2401 *
cc58e1f2
RB
2402 * Always drop traffic that's in an invalid state. Also drop
2403 * reply direction packets for connections that have been marked
2404 * for deletion (bit 0 of ct_label is set).
2405 *
2406 * This is enforced at a higher priority than ACLs can be defined. */
880fcd14 2407 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
b73db61d 2408 "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
cc58e1f2 2409 "drop;");
880fcd14 2410 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
b73db61d 2411 "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
cc58e1f2 2412 "drop;");
78aab811
JP
2413
2414 /* Ingress and Egress ACL Table (Priority 65535).
2415 *
cc58e1f2
RB
2416 * Allow reply traffic that is part of an established
2417 * conntrack entry that has not been marked for deletion
2418 * (bit 0 of ct_label). We only match traffic in the
2419 * reply direction because we want traffic in the request
2420 * direction to hit the currently defined policy from ACLs.
2421 *
2422 * This is enforced at a higher priority than ACLs can be defined. */
880fcd14 2423 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
cc58e1f2 2424 "ct.est && !ct.rel && !ct.new && !ct.inv "
b73db61d 2425 "&& ct.rpl && ct_label.blocked == 0",
78aab811 2426 "next;");
880fcd14 2427 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
cc58e1f2 2428 "ct.est && !ct.rel && !ct.new && !ct.inv "
b73db61d 2429 "&& ct.rpl && ct_label.blocked == 0",
78aab811
JP
2430 "next;");
2431
2432 /* Ingress and Egress ACL Table (Priority 65535).
2433 *
cc58e1f2
RB
2434 * Allow traffic that is related to an existing conntrack entry that
2435 * has not been marked for deletion (bit 0 of ct_label).
2436 *
2437 * This is enforced at a higher priority than ACLs can be defined.
78aab811
JP
2438 *
2439 * NOTE: This does not support related data sessions (eg,
2440 * a dynamically negotiated FTP data channel), but will allow
2441 * related traffic such as an ICMP Port Unreachable through
2442 * that's generated from a non-listening UDP port. */
880fcd14 2443 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
cc58e1f2 2444 "!ct.est && ct.rel && !ct.new && !ct.inv "
b73db61d 2445 "&& ct_label.blocked == 0",
78aab811 2446 "next;");
880fcd14 2447 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
cc58e1f2 2448 "!ct.est && ct.rel && !ct.new && !ct.inv "
b73db61d 2449 "&& ct_label.blocked == 0",
78aab811 2450 "next;");
e75451fe
ZKL
2451
2452 /* Ingress and Egress ACL Table (Priority 65535).
2453 *
2454 * Not to do conntrack on ND packets. */
2455 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, "nd", "next;");
2456 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, "nd", "next;");
78aab811
JP
2457 }
2458
2459 /* Ingress or Egress ACL Table (Various priorities). */
9975d7be
BP
2460 for (size_t i = 0; i < od->nbs->n_acls; i++) {
2461 struct nbrec_acl *acl = od->nbs->acls[i];
78aab811 2462 bool ingress = !strcmp(acl->direction, "from-lport") ? true :false;
880fcd14 2463 enum ovn_stage stage = ingress ? S_SWITCH_IN_ACL : S_SWITCH_OUT_ACL;
78aab811 2464
cc58e1f2
RB
2465 if (!strcmp(acl->action, "allow")
2466 || !strcmp(acl->action, "allow-related")) {
78aab811
JP
2467 /* If there are any stateful flows, we must even commit "allow"
2468 * actions. This is because, while the initiater's
2469 * direction may not have any stateful rules, the server's
2470 * may and then its return traffic would not have an
2471 * associated conntrack entry and would return "+invalid". */
cc58e1f2
RB
2472 if (!has_stateful) {
2473 ovn_lflow_add(lflows, od, stage,
2474 acl->priority + OVN_ACL_PRI_OFFSET,
2475 acl->match, "next;");
2476 } else {
2477 struct ds match = DS_EMPTY_INITIALIZER;
2478
2479 /* Commit the connection tracking entry if it's a new
2480 * connection that matches this ACL. After this commit,
2481 * the reply traffic is allowed by a flow we create at
2482 * priority 65535, defined earlier.
2483 *
2484 * It's also possible that a known connection was marked for
2485 * deletion after a policy was deleted, but the policy was
2486 * re-added while that connection is still known. We catch
b73db61d 2487 * that case here and un-set ct_label.blocked (which will be done
cc58e1f2
RB
2488 * by ct_commit in the "stateful" stage) to indicate that the
2489 * connection should be allowed to resume.
2490 */
2491 ds_put_format(&match, "((ct.new && !ct.est)"
2492 " || (!ct.new && ct.est && !ct.rpl "
b73db61d 2493 "&& ct_label.blocked == 1)) "
cc58e1f2
RB
2494 "&& (%s)", acl->match);
2495 ovn_lflow_add(lflows, od, stage,
2496 acl->priority + OVN_ACL_PRI_OFFSET,
2497 ds_cstr(&match),
2498 REGBIT_CONNTRACK_COMMIT" = 1; next;");
2499
2500 /* Match on traffic in the request direction for an established
2501 * connection tracking entry that has not been marked for
2502 * deletion. There is no need to commit here, so we can just
2503 * proceed to the next table. We use this to ensure that this
2504 * connection is still allowed by the currently defined
2505 * policy. */
2506 ds_clear(&match);
2507 ds_put_format(&match,
2508 "!ct.new && ct.est && !ct.rpl"
b73db61d 2509 " && ct_label.blocked == 0 && (%s)",
cc58e1f2
RB
2510 acl->match);
2511 ovn_lflow_add(lflows, od, stage,
2512 acl->priority + OVN_ACL_PRI_OFFSET,
2513 ds_cstr(&match), "next;");
2514
2515 ds_destroy(&match);
2516 }
2517 } else if (!strcmp(acl->action, "drop")
2518 || !strcmp(acl->action, "reject")) {
78aab811
JP
2519 struct ds match = DS_EMPTY_INITIALIZER;
2520
cc58e1f2
RB
2521 /* XXX Need to support "reject", treat it as "drop;" for now. */
2522 if (!strcmp(acl->action, "reject")) {
2523 VLOG_INFO("reject is not a supported action");
2524 }
78aab811 2525
cc58e1f2
RB
2526 /* The implementation of "drop" differs if stateful ACLs are in
2527 * use for this datapath. In that case, the actions differ
2528 * depending on whether the connection was previously committed
2529 * to the connection tracker with ct_commit. */
2530 if (has_stateful) {
2531 /* If the packet is not part of an established connection, then
2532 * we can simply drop it. */
2533 ds_put_format(&match,
b73db61d 2534 "(!ct.est || (ct.est && ct_label.blocked == 1)) "
cc58e1f2
RB
2535 "&& (%s)",
2536 acl->match);
2537 ovn_lflow_add(lflows, od, stage, acl->priority +
2538 OVN_ACL_PRI_OFFSET, ds_cstr(&match), "drop;");
2539
2540 /* For an existing connection without ct_label set, we've
2541 * encountered a policy change. ACLs previously allowed
2542 * this connection and we committed the connection tracking
2543 * entry. Current policy says that we should drop this
2544 * connection. First, we set bit 0 of ct_label to indicate
2545 * that this connection is set for deletion. By not
2546 * specifying "next;", we implicitly drop the packet after
2547 * updating conntrack state. We would normally defer
2548 * ct_commit() to the "stateful" stage, but since we're
2549 * dropping the packet, we go ahead and do it here. */
2550 ds_clear(&match);
2551 ds_put_format(&match,
b73db61d 2552 "ct.est && ct_label.blocked == 0 && (%s)",
cc58e1f2
RB
2553 acl->match);
2554 ovn_lflow_add(lflows, od, stage,
2555 acl->priority + OVN_ACL_PRI_OFFSET,
2556 ds_cstr(&match), "ct_commit(ct_label=1/1);");
2557
2558 ds_destroy(&match);
2559 } else {
2560 /* There are no stateful ACLs in use on this datapath,
2561 * so a "drop" ACL is simply the "drop" logical flow action
2562 * in all cases. */
2563 ovn_lflow_add(lflows, od, stage,
2564 acl->priority + OVN_ACL_PRI_OFFSET,
2565 acl->match, "drop;");
2566 }
78aab811
JP
2567 }
2568 }
281977f7
NS
2569
2570 /* Add 34000 priority flow to allow DHCP reply from ovn-controller to all
2571 * logical ports of the datapath if the CMS has configured DHCPv4 options*/
052fa3ac
BP
2572 for (size_t i = 0; i < od->nbs->n_ports; i++) {
2573 if (od->nbs->ports[i]->dhcpv4_options) {
2574 const char *server_id = smap_get(
2575 &od->nbs->ports[i]->dhcpv4_options->options, "server_id");
2576 const char *server_mac = smap_get(
2577 &od->nbs->ports[i]->dhcpv4_options->options, "server_mac");
2578 const char *lease_time = smap_get(
2579 &od->nbs->ports[i]->dhcpv4_options->options, "lease_time");
2580 const char *router = smap_get(
2581 &od->nbs->ports[i]->dhcpv4_options->options, "router");
2582 if (server_id && server_mac && lease_time && router) {
2583 struct ds match = DS_EMPTY_INITIALIZER;
2584 const char *actions =
2585 has_stateful ? "ct_commit; next;" : "next;";
2586 ds_put_format(&match, "outport == \"%s\" && eth.src == %s "
2587 "&& ip4.src == %s && udp && udp.src == 67 "
2588 "&& udp.dst == 68", od->nbs->ports[i]->name,
2589 server_mac, server_id);
2590 ovn_lflow_add(
2591 lflows, od, S_SWITCH_OUT_ACL, 34000, ds_cstr(&match),
2592 actions);
75e82c17 2593 ds_destroy(&match);
281977f7 2594 }
052fa3ac 2595 }
33ac3c83 2596
052fa3ac
BP
2597 if (od->nbs->ports[i]->dhcpv6_options) {
2598 const char *server_mac = smap_get(
2599 &od->nbs->ports[i]->dhcpv6_options->options, "server_id");
2600 struct eth_addr ea;
2601 if (server_mac && eth_addr_from_string(server_mac, &ea)) {
2602 /* Get the link local IP of the DHCPv6 server from the
2603 * server MAC. */
2604 struct in6_addr lla;
2605 in6_generate_lla(ea, &lla);
2606
2607 char server_ip[INET6_ADDRSTRLEN + 1];
2608 ipv6_string_mapped(server_ip, &lla);
2609
2610 struct ds match = DS_EMPTY_INITIALIZER;
2611 const char *actions = has_stateful ? "ct_commit; next;" :
2612 "next;";
2613 ds_put_format(&match, "outport == \"%s\" && eth.src == %s "
2614 "&& ip6.src == %s && udp && udp.src == 547 "
2615 "&& udp.dst == 546", od->nbs->ports[i]->name,
2616 server_mac, server_ip);
2617 ovn_lflow_add(
2618 lflows, od, S_SWITCH_OUT_ACL, 34000, ds_cstr(&match),
2619 actions);
75e82c17 2620 ds_destroy(&match);
33ac3c83 2621 }
281977f7
NS
2622 }
2623 }
78aab811
JP
2624}
2625
1a03fc7d
BS
2626static void
2627build_qos(struct ovn_datapath *od, struct hmap *lflows) {
2628 ovn_lflow_add(lflows, od, S_SWITCH_IN_QOS_MARK, 0, "1", "next;");
2629 ovn_lflow_add(lflows, od, S_SWITCH_OUT_QOS_MARK, 0, "1", "next;");
2630
2631 for (size_t i = 0; i < od->nbs->n_qos_rules; i++) {
2632 struct nbrec_qos *qos = od->nbs->qos_rules[i];
2633 bool ingress = !strcmp(qos->direction, "from-lport") ? true :false;
2634 enum ovn_stage stage = ingress ? S_SWITCH_IN_QOS_MARK : S_SWITCH_OUT_QOS_MARK;
2635
2636 if (!strcmp(qos->key_action, "dscp")) {
2637 struct ds dscp_action = DS_EMPTY_INITIALIZER;
2638
2639 ds_put_format(&dscp_action, "ip.dscp = %d; next;",
2640 (uint8_t)qos->value_action);
2641 ovn_lflow_add(lflows, od, stage,
2642 qos->priority,
2643 qos->match, ds_cstr(&dscp_action));
2644 ds_destroy(&dscp_action);
2645 }
2646 }
2647}
2648
7a15be69
GS
2649static void
2650build_lb(struct ovn_datapath *od, struct hmap *lflows)
2651{
2652 /* Ingress and Egress LB Table (Priority 0): Packets are allowed by
2653 * default. */
2654 ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, 0, "1", "next;");
2655 ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, 0, "1", "next;");
2656
2657 if (od->nbs->load_balancer) {
2658 /* Ingress and Egress LB Table (Priority 65535).
2659 *
2660 * Send established traffic through conntrack for just NAT. */
2661 ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, UINT16_MAX,
2662 "ct.est && !ct.rel && !ct.new && !ct.inv",
2663 REGBIT_CONNTRACK_NAT" = 1; next;");
2664 ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, UINT16_MAX,
2665 "ct.est && !ct.rel && !ct.new && !ct.inv",
2666 REGBIT_CONNTRACK_NAT" = 1; next;");
2667 }
2668}
2669
fa313a8c
GS
2670static void
2671build_stateful(struct ovn_datapath *od, struct hmap *lflows)
2672{
2673 /* Ingress and Egress stateful Table (Priority 0): Packets are
2674 * allowed by default. */
2675 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 0, "1", "next;");
2676 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 0, "1", "next;");
2677
2678 /* If REGBIT_CONNTRACK_COMMIT is set as 1, then the packets should be
b73db61d 2679 * committed to conntrack. We always set ct_label.blocked to 0 here as
cc58e1f2
RB
2680 * any packet that makes it this far is part of a connection we
2681 * want to allow to continue. */
fa313a8c 2682 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100,
cc58e1f2 2683 REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit(ct_label=0/1); next;");
fa313a8c 2684 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100,
cc58e1f2 2685 REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit(ct_label=0/1); next;");
7a15be69
GS
2686
2687 /* If REGBIT_CONNTRACK_NAT is set as 1, then packets should just be sent
2688 * through nat (without committing).
2689 *
2690 * REGBIT_CONNTRACK_COMMIT is set for new connections and
2691 * REGBIT_CONNTRACK_NAT is set for established connections. So they
2692 * don't overlap.
2693 */
2694 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100,
2695 REGBIT_CONNTRACK_NAT" == 1", "ct_lb;");
2696 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100,
2697 REGBIT_CONNTRACK_NAT" == 1", "ct_lb;");
2698
2699 /* Load balancing rules for new connections get committed to conntrack
2700 * table. So even if REGBIT_CONNTRACK_COMMIT is set in a previous table
2701 * a higher priority rule for load balancing below also commits the
2702 * connection, so it is okay if we do not hit the above match on
2703 * REGBIT_CONNTRACK_COMMIT. */
61591ad9
GS
2704 for (int i = 0; i < od->nbs->n_load_balancer; i++) {
2705 struct nbrec_load_balancer *lb = od->nbs->load_balancer[i];
7a15be69
GS
2706 struct smap *vips = &lb->vips;
2707 struct smap_node *node;
2708
2709 SMAP_FOR_EACH (node, vips) {
2710 uint16_t port = 0;
2711
2712 /* node->key contains IP:port or just IP. */
2713 char *ip_address = NULL;
2714 ip_address_and_port_from_lb_key(node->key, &ip_address, &port);
2715 if (!ip_address) {
2716 continue;
2717 }
2718
2719 /* New connections in Ingress table. */
2720 char *action = xasprintf("ct_lb(%s);", node->value);
2721 struct ds match = DS_EMPTY_INITIALIZER;
2722 ds_put_format(&match, "ct.new && ip && ip4.dst == %s", ip_address);
2723 if (port) {
2724 if (lb->protocol && !strcmp(lb->protocol, "udp")) {
546f1ff3 2725 ds_put_format(&match, " && udp && udp.dst == %d", port);
7a15be69 2726 } else {
546f1ff3 2727 ds_put_format(&match, " && tcp && tcp.dst == %d", port);
7a15be69
GS
2728 }
2729 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL,
2730 120, ds_cstr(&match), action);
2731 } else {
2732 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL,
2733 110, ds_cstr(&match), action);
2734 }
2735
7443e4ec 2736 free(ip_address);
7a15be69
GS
2737 ds_destroy(&match);
2738 free(action);
2739 }
2740 }
fa313a8c
GS
2741}
2742
bd39395f 2743static void
9975d7be
BP
2744build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
2745 struct hmap *lflows, struct hmap *mcgroups)
bd39395f 2746{
5cff6b99
BP
2747 /* This flow table structure is documented in ovn-northd(8), so please
2748 * update ovn-northd.8.xml if you change anything. */
2749
09b39248
JP
2750 struct ds match = DS_EMPTY_INITIALIZER;
2751 struct ds actions = DS_EMPTY_INITIALIZER;
2752
9975d7be 2753 /* Build pre-ACL and ACL tables for both ingress and egress.
1a03fc7d 2754 * Ingress tables 3 through 9. Egress tables 0 through 6. */
5868eb24
BP
2755 struct ovn_datapath *od;
2756 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
2757 if (!od->nbs) {
2758 continue;
2759 }
2760
9ab989b7 2761 build_pre_acls(od, lflows);
7a15be69 2762 build_pre_lb(od, lflows);
facf8652 2763 build_pre_stateful(od, lflows);
2d018f9b 2764 build_acls(od, lflows);
1a03fc7d 2765 build_qos(od, lflows);
7a15be69 2766 build_lb(od, lflows);
fa313a8c 2767 build_stateful(od, lflows);
9975d7be
BP
2768 }
2769
2770 /* Logical switch ingress table 0: Admission control framework (priority
2771 * 100). */
2772 HMAP_FOR_EACH (od, key_node, datapaths) {
2773 if (!od->nbs) {
2774 continue;
2775 }
2776
bd39395f 2777 /* Logical VLANs not supported. */
685f4dfe 2778 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "vlan.present",
091e3af9 2779 "drop;");
bd39395f
BP
2780
2781 /* Broadcast/multicast source address is invalid. */
685f4dfe 2782 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "eth.src[40]",
091e3af9 2783 "drop;");
bd39395f 2784
35060cdc
BP
2785 /* Port security flows have priority 50 (see below) and will continue
2786 * to the next table if packet source is acceptable. */
bd39395f
BP
2787 }
2788
685f4dfe
NS
2789 /* Logical switch ingress table 0: Ingress port security - L2
2790 * (priority 50).
2791 * Ingress table 1: Ingress port security - IP (priority 90 and 80)
2792 * Ingress table 2: Ingress port security - ND (priority 90 and 80)
2793 */
5868eb24
BP
2794 struct ovn_port *op;
2795 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 2796 if (!op->nbsp) {
9975d7be
BP
2797 continue;
2798 }
2799
0ee00741 2800 if (!lsp_is_enabled(op->nbsp)) {
96af668a
BP
2801 /* Drop packets from disabled logical ports (since logical flow
2802 * tables are default-drop). */
2803 continue;
2804 }
2805
09b39248 2806 ds_clear(&match);
a6095f81 2807 ds_clear(&actions);
9975d7be 2808 ds_put_format(&match, "inport == %s", op->json_key);
e93b43d6
JP
2809 build_port_security_l2("eth.src", op->ps_addrs, op->n_ps_addrs,
2810 &match);
a6095f81
BS
2811
2812 const char *queue_id = smap_get(&op->sb->options, "qdisc_queue_id");
2813 if (queue_id) {
2814 ds_put_format(&actions, "set_queue(%s); ", queue_id);
2815 }
2816 ds_put_cstr(&actions, "next;");
685f4dfe 2817 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_L2, 50,
a6095f81 2818 ds_cstr(&match), ds_cstr(&actions));
685f4dfe 2819
0ee00741 2820 if (op->nbsp->n_port_security) {
685f4dfe
NS
2821 build_port_security_ip(P_IN, op, lflows);
2822 build_port_security_nd(op, lflows);
2823 }
2824 }
2825
2826 /* Ingress table 1 and 2: Port security - IP and ND, by default goto next.
2827 * (priority 0)*/
2828 HMAP_FOR_EACH (od, key_node, datapaths) {
2829 if (!od->nbs) {
2830 continue;
2831 }
2832
2833 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_ND, 0, "1", "next;");
2834 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_IP, 0, "1", "next;");
5868eb24 2835 }
445a266a 2836
1a03fc7d 2837 /* Ingress table 10: ARP/ND responder, skip requests coming from localnet
281977f7 2838 * ports. (priority 100). */
fa128126 2839 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 2840 if (!op->nbsp) {
fa128126
HZ
2841 continue;
2842 }
2843
0ee00741 2844 if (!strcmp(op->nbsp->type, "localnet")) {
09b39248
JP
2845 ds_clear(&match);
2846 ds_put_format(&match, "inport == %s", op->json_key);
e75451fe 2847 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100,
09b39248 2848 ds_cstr(&match), "next;");
fa128126
HZ
2849 }
2850 }
2851
1a03fc7d 2852 /* Ingress table 10: ARP/ND responder, reply for known IPs.
fa128126 2853 * (priority 50). */
57d143eb 2854 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 2855 if (!op->nbsp) {
57d143eb
HZ
2856 continue;
2857 }
2858
4c7bf534 2859 /*
e75451fe 2860 * Add ARP/ND reply flows if either the
4c7bf534
NS
2861 * - port is up or
2862 * - port type is router
2863 */
0ee00741 2864 if (!lsp_is_up(op->nbsp) && strcmp(op->nbsp->type, "router")) {
4c7bf534
NS
2865 continue;
2866 }
2867
e93b43d6
JP
2868 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
2869 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
09b39248 2870 ds_clear(&match);
e93b43d6
JP
2871 ds_put_format(&match, "arp.tpa == %s && arp.op == 1",
2872 op->lsp_addrs[i].ipv4_addrs[j].addr_s);
09b39248
JP
2873 ds_clear(&actions);
2874 ds_put_format(&actions,
57d143eb 2875 "eth.dst = eth.src; "
e93b43d6 2876 "eth.src = %s; "
57d143eb
HZ
2877 "arp.op = 2; /* ARP reply */ "
2878 "arp.tha = arp.sha; "
e93b43d6 2879 "arp.sha = %s; "
57d143eb 2880 "arp.tpa = arp.spa; "
e93b43d6 2881 "arp.spa = %s; "
57d143eb 2882 "outport = inport; "
bf143492 2883 "flags.loopback = 1; "
57d143eb 2884 "output;",
e93b43d6
JP
2885 op->lsp_addrs[i].ea_s, op->lsp_addrs[i].ea_s,
2886 op->lsp_addrs[i].ipv4_addrs[j].addr_s);
e75451fe 2887 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50,
09b39248 2888 ds_cstr(&match), ds_cstr(&actions));
9fcb6a18
BP
2889
2890 /* Do not reply to an ARP request from the port that owns the
2891 * address (otherwise a DHCP client that ARPs to check for a
2892 * duplicate address will fail). Instead, forward it the usual
2893 * way.
2894 *
2895 * (Another alternative would be to simply drop the packet. If
2896 * everything is working as it is configured, then this would
2897 * produce equivalent results, since no one should reply to the
2898 * request. But ARPing for one's own IP address is intended to
2899 * detect situations where the network is not working as
2900 * configured, so dropping the request would frustrate that
2901 * intent.) */
2902 ds_put_format(&match, " && inport == %s", op->json_key);
2903 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100,
2904 ds_cstr(&match), "next;");
57d143eb 2905 }
7dc88496 2906
6fdb7cd6
JP
2907 /* For ND solicitations, we need to listen for both the
2908 * unicast IPv6 address and its all-nodes multicast address,
2909 * but always respond with the unicast IPv6 address. */
2910 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
09b39248 2911 ds_clear(&match);
6fdb7cd6
JP
2912 ds_put_format(&match,
2913 "nd_ns && ip6.dst == {%s, %s} && nd.target == %s",
2914 op->lsp_addrs[i].ipv6_addrs[j].addr_s,
2915 op->lsp_addrs[i].ipv6_addrs[j].sn_addr_s,
2916 op->lsp_addrs[i].ipv6_addrs[j].addr_s);
2917
09b39248
JP
2918 ds_clear(&actions);
2919 ds_put_format(&actions,
6fdb7cd6
JP
2920 "nd_na { "
2921 "eth.src = %s; "
2922 "ip6.src = %s; "
2923 "nd.target = %s; "
2924 "nd.tll = %s; "
2925 "outport = inport; "
bf143492 2926 "flags.loopback = 1; "
6fdb7cd6
JP
2927 "output; "
2928 "};",
2929 op->lsp_addrs[i].ea_s,
2930 op->lsp_addrs[i].ipv6_addrs[j].addr_s,
2931 op->lsp_addrs[i].ipv6_addrs[j].addr_s,
2932 op->lsp_addrs[i].ea_s);
e75451fe 2933 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50,
09b39248 2934 ds_cstr(&match), ds_cstr(&actions));
9fcb6a18
BP
2935
2936 /* Do not reply to a solicitation from the port that owns the
2937 * address (otherwise DAD detection will fail). */
2938 ds_put_format(&match, " && inport == %s", op->json_key);
2939 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100,
2940 ds_cstr(&match), "next;");
e75451fe 2941 }
57d143eb
HZ
2942 }
2943 }
2944
1a03fc7d 2945 /* Ingress table 10: ARP/ND responder, by default goto next.
fa128126
HZ
2946 * (priority 0)*/
2947 HMAP_FOR_EACH (od, key_node, datapaths) {
2948 if (!od->nbs) {
2949 continue;
2950 }
2951
e75451fe 2952 ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_ND_RSP, 0, "1", "next;");
fa128126
HZ
2953 }
2954
1a03fc7d 2955 /* Logical switch ingress table 11 and 12: DHCP options and response
281977f7
NS
2956 * priority 100 flows. */
2957 HMAP_FOR_EACH (op, key_node, ports) {
2958 if (!op->nbsp) {
2959 continue;
2960 }
2961
2962 if (!lsp_is_enabled(op->nbsp) || !strcmp(op->nbsp->type, "router")) {
2963 /* Don't add the DHCP flows if the port is not enabled or if the
2964 * port is a router port. */
2965 continue;
2966 }
2967
33ac3c83
NS
2968 if (!op->nbsp->dhcpv4_options && !op->nbsp->dhcpv6_options) {
2969 /* CMS has disabled both native DHCPv4 and DHCPv6 for this lport.
2970 */
281977f7
NS
2971 continue;
2972 }
2973
2974 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
2975 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
2976 struct ds options_action = DS_EMPTY_INITIALIZER;
2977 struct ds response_action = DS_EMPTY_INITIALIZER;
2978 if (build_dhcpv4_action(
2979 op, op->lsp_addrs[i].ipv4_addrs[j].addr,
2980 &options_action, &response_action)) {
2981 struct ds match = DS_EMPTY_INITIALIZER;
2982 ds_put_format(
2983 &match, "inport == %s && eth.src == %s && "
2984 "ip4.src == 0.0.0.0 && ip4.dst == 255.255.255.255 && "
2985 "udp.src == 68 && udp.dst == 67", op->json_key,
2986 op->lsp_addrs[i].ea_s);
2987
2988 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS,
2989 100, ds_cstr(&match),
2990 ds_cstr(&options_action));
2991 /* If REGBIT_DHCP_OPTS_RESULT is set, it means the
2992 * put_dhcp_opts action is successful */
2993 ds_put_cstr(&match, " && "REGBIT_DHCP_OPTS_RESULT);
2994 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_RESPONSE,
2995 100, ds_cstr(&match),
2996 ds_cstr(&response_action));
2997 ds_destroy(&match);
2998 ds_destroy(&options_action);
2999 ds_destroy(&response_action);
3000 break;
3001 }
3002 }
33ac3c83
NS
3003
3004 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
3005 struct ds options_action = DS_EMPTY_INITIALIZER;
3006 struct ds response_action = DS_EMPTY_INITIALIZER;
3007 if (build_dhcpv6_action(
3008 op, &op->lsp_addrs[i].ipv6_addrs[j].addr,
3009 &options_action, &response_action)) {
3010 struct ds match = DS_EMPTY_INITIALIZER;
3011 ds_put_format(
3012 &match, "inport == %s && eth.src == %s"
3013 " && ip6.dst == ff02::1:2 && udp.src == 546 &&"
3014 " udp.dst == 547", op->json_key,
3015 op->lsp_addrs[i].ea_s);
3016
3017 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS, 100,
3018 ds_cstr(&match), ds_cstr(&options_action));
3019
3020 /* If REGBIT_DHCP_OPTS_RESULT is set to 1, it means the
3021 * put_dhcpv6_opts action is successful */
3022 ds_put_cstr(&match, " && "REGBIT_DHCP_OPTS_RESULT);
3023 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_RESPONSE, 100,
3024 ds_cstr(&match), ds_cstr(&response_action));
3025 ds_destroy(&match);
3026 ds_destroy(&options_action);
3027 ds_destroy(&response_action);
3028 break;
3029 }
3030 }
281977f7
NS
3031 }
3032 }
3033
1a03fc7d 3034 /* Ingress table 11 and 12: DHCP options and response, by default goto next.
281977f7
NS
3035 * (priority 0). */
3036
3037 HMAP_FOR_EACH (od, key_node, datapaths) {
3038 if (!od->nbs) {
3039 continue;
3040 }
3041
3042 ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_OPTIONS, 0, "1", "next;");
3043 ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_RESPONSE, 0, "1", "next;");
3044 }
3045
1a03fc7d 3046 /* Ingress table 13: Destination lookup, broadcast and multicast handling
5868eb24
BP
3047 * (priority 100). */
3048 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 3049 if (!op->nbsp) {
9975d7be
BP
3050 continue;
3051 }
3052
0ee00741 3053 if (lsp_is_enabled(op->nbsp)) {
9975d7be 3054 ovn_multicast_add(mcgroups, &mc_flood, op);
445a266a 3055 }
5868eb24
BP
3056 }
3057 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
3058 if (!od->nbs) {
3059 continue;
3060 }
3061
3062 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 100, "eth.mcast",
5868eb24 3063 "outport = \""MC_FLOOD"\"; output;");
bd39395f 3064 }
bd39395f 3065
1a03fc7d 3066 /* Ingress table 13: Destination lookup, unicast handling (priority 50), */
5868eb24 3067 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 3068 if (!op->nbsp) {
9975d7be
BP
3069 continue;
3070 }
3071
0ee00741 3072 for (size_t i = 0; i < op->nbsp->n_addresses; i++) {
10c3fcdf 3073 /* Addresses are owned by the logical port.
3074 * Ethernet address followed by zero or more IPv4
3075 * or IPv6 addresses (or both). */
74ff3298 3076 struct eth_addr mac;
10c3fcdf 3077 if (ovs_scan(op->nbsp->addresses[i],
3078 ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) {
09b39248 3079 ds_clear(&match);
9975d7be
BP
3080 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
3081 ETH_ADDR_ARGS(mac));
5868eb24 3082
09b39248 3083 ds_clear(&actions);
9975d7be
BP
3084 ds_put_format(&actions, "outport = %s; output;", op->json_key);
3085 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
5868eb24 3086 ds_cstr(&match), ds_cstr(&actions));
0ee00741
HK
3087 } else if (!strcmp(op->nbsp->addresses[i], "unknown")) {
3088 if (lsp_is_enabled(op->nbsp)) {
9975d7be 3089 ovn_multicast_add(mcgroups, &mc_unknown, op);
96af668a
BP
3090 op->od->has_unknown = true;
3091 }
6374d518 3092 } else if (is_dynamic_lsp_address(op->nbsp->addresses[i])) {
8639f9be 3093 if (!op->nbsp->dynamic_addresses
10c3fcdf 3094 || !ovs_scan(op->nbsp->dynamic_addresses,
3095 ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) {
8639f9be
ND
3096 continue;
3097 }
3098 ds_clear(&match);
3099 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
3100 ETH_ADDR_ARGS(mac));
3101
3102 ds_clear(&actions);
3103 ds_put_format(&actions, "outport = %s; output;", op->json_key);
3104 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
3105 ds_cstr(&match), ds_cstr(&actions));
5868eb24
BP
3106 } else {
3107 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
445a266a 3108
2fa326a3
BP
3109 VLOG_INFO_RL(&rl,
3110 "%s: invalid syntax '%s' in addresses column",
0ee00741 3111 op->nbsp->name, op->nbsp->addresses[i]);
445a266a
BP
3112 }
3113 }
bd39395f
BP
3114 }
3115
1a03fc7d 3116 /* Ingress table 13: Destination lookup for unknown MACs (priority 0). */
5868eb24 3117 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
3118 if (!od->nbs) {
3119 continue;
3120 }
3121
5868eb24 3122 if (od->has_unknown) {
9975d7be 3123 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1",
5868eb24 3124 "outport = \""MC_UNKNOWN"\"; output;");
445a266a 3125 }
bd39395f
BP
3126 }
3127
94300e09
JP
3128 /* Egress tables 6: Egress port security - IP (priority 0)
3129 * Egress table 7: Egress port security L2 - multicast/broadcast
3130 * (priority 100). */
5868eb24 3131 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
3132 if (!od->nbs) {
3133 continue;
3134 }
3135
685f4dfe
NS
3136 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_IP, 0, "1", "next;");
3137 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_L2, 100, "eth.mcast",
091e3af9 3138 "output;");
48f42f3a
RB
3139 }
3140
94300e09 3141 /* Egress table 6: Egress port security - IP (priorities 90 and 80)
685f4dfe
NS
3142 * if port security enabled.
3143 *
94300e09 3144 * Egress table 7: Egress port security - L2 (priorities 50 and 150).
d770a830
BP
3145 *
3146 * Priority 50 rules implement port security for enabled logical port.
3147 *
3148 * Priority 150 rules drop packets to disabled logical ports, so that they
3149 * don't even receive multicast or broadcast packets. */
5868eb24 3150 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 3151 if (!op->nbsp) {
9975d7be
BP
3152 continue;
3153 }
3154
09b39248 3155 ds_clear(&match);
9975d7be 3156 ds_put_format(&match, "outport == %s", op->json_key);
0ee00741 3157 if (lsp_is_enabled(op->nbsp)) {
e93b43d6
JP
3158 build_port_security_l2("eth.dst", op->ps_addrs, op->n_ps_addrs,
3159 &match);
685f4dfe 3160 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 50,
d770a830
BP
3161 ds_cstr(&match), "output;");
3162 } else {
685f4dfe 3163 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 150,
d770a830
BP
3164 ds_cstr(&match), "drop;");
3165 }
eb00399e 3166
0ee00741 3167 if (op->nbsp->n_port_security) {
685f4dfe
NS
3168 build_port_security_ip(P_OUT, op, lflows);
3169 }
eb00399e 3170 }
09b39248
JP
3171
3172 ds_destroy(&match);
3173 ds_destroy(&actions);
9975d7be 3174}
eb00399e 3175
9975d7be
BP
3176static bool
3177lrport_is_enabled(const struct nbrec_logical_router_port *lrport)
3178{
3179 return !lrport->enabled || *lrport->enabled;
3180}
3181
4685e523
JP
3182/* Returns a string of the IP address of the router port 'op' that
3183 * overlaps with 'ip_s". If one is not found, returns NULL.
3184 *
3185 * The caller must not free the returned string. */
3186static const char *
3187find_lrp_member_ip(const struct ovn_port *op, const char *ip_s)
3188{
6fdb7cd6 3189 bool is_ipv4 = strchr(ip_s, '.') ? true : false;
4685e523 3190
6fdb7cd6
JP
3191 if (is_ipv4) {
3192 ovs_be32 ip;
4685e523 3193
6fdb7cd6
JP
3194 if (!ip_parse(ip_s, &ip)) {
3195 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3196 VLOG_WARN_RL(&rl, "bad ip address %s", ip_s);
3197 return NULL;
3198 }
4685e523 3199
6fdb7cd6
JP
3200 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
3201 const struct ipv4_netaddr *na = &op->lrp_networks.ipv4_addrs[i];
3202
3203 if (!((na->network ^ ip) & na->mask)) {
3204 /* There should be only 1 interface that matches the
3205 * supplied IP. Otherwise, it's a configuration error,
3206 * because subnets of a router's interfaces should NOT
3207 * overlap. */
3208 return na->addr_s;
3209 }
3210 }
3211 } else {
3212 struct in6_addr ip6;
3213
3214 if (!ipv6_parse(ip_s, &ip6)) {
3215 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3216 VLOG_WARN_RL(&rl, "bad ipv6 address %s", ip_s);
3217 return NULL;
3218 }
3219
3220 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
3221 const struct ipv6_netaddr *na = &op->lrp_networks.ipv6_addrs[i];
3222 struct in6_addr xor_addr = ipv6_addr_bitxor(&na->network, &ip6);
3223 struct in6_addr and_addr = ipv6_addr_bitand(&xor_addr, &na->mask);
3224
3225 if (ipv6_is_zero(&and_addr)) {
3226 /* There should be only 1 interface that matches the
3227 * supplied IP. Otherwise, it's a configuration error,
3228 * because subnets of a router's interfaces should NOT
3229 * overlap. */
3230 return na->addr_s;
3231 }
4685e523
JP
3232 }
3233 }
3234
3235 return NULL;
3236}
3237
9975d7be 3238static void
0bac7164 3239add_route(struct hmap *lflows, const struct ovn_port *op,
4685e523
JP
3240 const char *lrp_addr_s, const char *network_s, int plen,
3241 const char *gateway)
9975d7be 3242{
6fdb7cd6 3243 bool is_ipv4 = strchr(network_s, '.') ? true : false;
a63f7235 3244 struct ds match = DS_EMPTY_INITIALIZER;
6fdb7cd6 3245
a63f7235
JP
3246 /* IPv6 link-local addresses must be scoped to the local router port. */
3247 if (!is_ipv4) {
3248 struct in6_addr network;
3249 ovs_assert(ipv6_parse(network_s, &network));
3250 if (in6_is_lla(&network)) {
3251 ds_put_format(&match, "inport == %s && ", op->json_key);
3252 }
3253 }
3254 ds_put_format(&match, "ip%s.dst == %s/%d", is_ipv4 ? "4" : "6",
3255 network_s, plen);
9975d7be
BP
3256
3257 struct ds actions = DS_EMPTY_INITIALIZER;
6fdb7cd6
JP
3258 ds_put_format(&actions, "ip.ttl--; %sreg0 = ", is_ipv4 ? "" : "xx");
3259
9975d7be 3260 if (gateway) {
c9bdf7bd 3261 ds_put_cstr(&actions, gateway);
9975d7be 3262 } else {
6fdb7cd6 3263 ds_put_format(&actions, "ip%s.dst", is_ipv4 ? "4" : "6");
9975d7be 3264 }
4685e523 3265 ds_put_format(&actions, "; "
6fdb7cd6 3266 "%sreg1 = %s; "
4685e523 3267 "eth.src = %s; "
0bac7164 3268 "outport = %s; "
bf143492 3269 "flags.loopback = 1; "
0bac7164 3270 "next;",
6fdb7cd6 3271 is_ipv4 ? "" : "xx",
4685e523
JP
3272 lrp_addr_s,
3273 op->lrp_networks.ea_s,
3274 op->json_key);
9975d7be
BP
3275
3276 /* The priority here is calculated to implement longest-prefix-match
3277 * routing. */
a63f7235
JP
3278 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_ROUTING, plen,
3279 ds_cstr(&match), ds_cstr(&actions));
3280 ds_destroy(&match);
9975d7be 3281 ds_destroy(&actions);
9975d7be
BP
3282}
3283
28dc3fe9
SR
3284static void
3285build_static_route_flow(struct hmap *lflows, struct ovn_datapath *od,
3286 struct hmap *ports,
3287 const struct nbrec_logical_router_static_route *route)
3288{
6fdb7cd6 3289 ovs_be32 nexthop;
4685e523 3290 const char *lrp_addr_s;
6fdb7cd6
JP
3291 unsigned int plen;
3292 bool is_ipv4;
28dc3fe9 3293
6fdb7cd6
JP
3294 /* Verify that the next hop is an IP address with an all-ones mask. */
3295 char *error = ip_parse_cidr(route->nexthop, &nexthop, &plen);
3296 if (!error) {
3297 if (plen != 32) {
3298 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3299 VLOG_WARN_RL(&rl, "bad next hop mask %s", route->nexthop);
3300 return;
3301 }
3302 is_ipv4 = true;
3303 } else {
28dc3fe9 3304 free(error);
6fdb7cd6
JP
3305
3306 struct in6_addr ip6;
3307 char *error = ipv6_parse_cidr(route->nexthop, &ip6, &plen);
3308 if (!error) {
3309 if (plen != 128) {
3310 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3311 VLOG_WARN_RL(&rl, "bad next hop mask %s", route->nexthop);
3312 return;
3313 }
3314 is_ipv4 = false;
3315 } else {
3316 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3317 VLOG_WARN_RL(&rl, "bad next hop ip address %s", route->nexthop);
3318 free(error);
3319 return;
3320 }
28dc3fe9
SR
3321 }
3322
6fdb7cd6
JP
3323 char *prefix_s;
3324 if (is_ipv4) {
3325 ovs_be32 prefix;
3326 /* Verify that ip prefix is a valid IPv4 address. */
3327 error = ip_parse_cidr(route->ip_prefix, &prefix, &plen);
3328 if (error) {
3329 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3330 VLOG_WARN_RL(&rl, "bad 'ip_prefix' in static routes %s",
3331 route->ip_prefix);
3332 free(error);
3333 return;
3334 }
3335 prefix_s = xasprintf(IP_FMT, IP_ARGS(prefix & be32_prefix_mask(plen)));
3336 } else {
3337 /* Verify that ip prefix is a valid IPv6 address. */
3338 struct in6_addr prefix;
3339 error = ipv6_parse_cidr(route->ip_prefix, &prefix, &plen);
3340 if (error) {
3341 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3342 VLOG_WARN_RL(&rl, "bad 'ip_prefix' in static routes %s",
3343 route->ip_prefix);
3344 free(error);
3345 return;
3346 }
3347 struct in6_addr mask = ipv6_create_mask(plen);
3348 struct in6_addr network = ipv6_addr_bitand(&prefix, &mask);
3349 prefix_s = xmalloc(INET6_ADDRSTRLEN);
3350 inet_ntop(AF_INET6, &network, prefix_s, INET6_ADDRSTRLEN);
28dc3fe9
SR
3351 }
3352
3353 /* Find the outgoing port. */
3354 struct ovn_port *out_port = NULL;
3355 if (route->output_port) {
3356 out_port = ovn_port_find(ports, route->output_port);
3357 if (!out_port) {
3358 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3359 VLOG_WARN_RL(&rl, "Bad out port %s for static route %s",
3360 route->output_port, route->ip_prefix);
6fdb7cd6 3361 goto free_prefix_s;
28dc3fe9 3362 }
4685e523 3363 lrp_addr_s = find_lrp_member_ip(out_port, route->nexthop);
28dc3fe9
SR
3364 } else {
3365 /* output_port is not specified, find the
3366 * router port matching the next hop. */
3367 int i;
3368 for (i = 0; i < od->nbr->n_ports; i++) {
3369 struct nbrec_logical_router_port *lrp = od->nbr->ports[i];
3370 out_port = ovn_port_find(ports, lrp->name);
3371 if (!out_port) {
3372 /* This should not happen. */
3373 continue;
3374 }
3375
4685e523
JP
3376 lrp_addr_s = find_lrp_member_ip(out_port, route->nexthop);
3377 if (lrp_addr_s) {
28dc3fe9
SR
3378 break;
3379 }
3380 }
28dc3fe9
SR
3381 }
3382
4685e523
JP
3383 if (!lrp_addr_s) {
3384 /* There is no matched out port. */
3385 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3386 VLOG_WARN_RL(&rl, "No path for static route %s; next hop %s",
3387 route->ip_prefix, route->nexthop);
6fdb7cd6 3388 goto free_prefix_s;
4685e523
JP
3389 }
3390
6fdb7cd6
JP
3391 add_route(lflows, out_port, lrp_addr_s, prefix_s, plen, route->nexthop);
3392
3393free_prefix_s:
c9bdf7bd 3394 free(prefix_s);
28dc3fe9
SR
3395}
3396
4685e523 3397static void
6fdb7cd6 3398op_put_v4_networks(struct ds *ds, const struct ovn_port *op, bool add_bcast)
4685e523
JP
3399{
3400 if (!add_bcast && op->lrp_networks.n_ipv4_addrs == 1) {
3401 ds_put_format(ds, "%s", op->lrp_networks.ipv4_addrs[0].addr_s);
3402 return;
3403 }
3404
3405 ds_put_cstr(ds, "{");
3406 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
3407 ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].addr_s);
3408 if (add_bcast) {
3409 ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].bcast_s);
3410 }
3411 }
3412 ds_chomp(ds, ' ');
3413 ds_chomp(ds, ',');
3414 ds_put_cstr(ds, "}");
3415}
3416
6fdb7cd6
JP
3417static void
3418op_put_v6_networks(struct ds *ds, const struct ovn_port *op)
3419{
3420 if (op->lrp_networks.n_ipv6_addrs == 1) {
3421 ds_put_format(ds, "%s", op->lrp_networks.ipv6_addrs[0].addr_s);
3422 return;
3423 }
3424
3425 ds_put_cstr(ds, "{");
3426 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
3427 ds_put_format(ds, "%s, ", op->lrp_networks.ipv6_addrs[i].addr_s);
3428 }
3429 ds_chomp(ds, ' ');
3430 ds_chomp(ds, ',');
3431 ds_put_cstr(ds, "}");
3432}
3433
9975d7be
BP
3434static void
3435build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
3436 struct hmap *lflows)
3437{
3438 /* This flow table structure is documented in ovn-northd(8), so please
3439 * update ovn-northd.8.xml if you change anything. */
3440
09b39248
JP
3441 struct ds match = DS_EMPTY_INITIALIZER;
3442 struct ds actions = DS_EMPTY_INITIALIZER;
3443
9975d7be
BP
3444 /* Logical router ingress table 0: Admission control framework. */
3445 struct ovn_datapath *od;
3446 HMAP_FOR_EACH (od, key_node, datapaths) {
3447 if (!od->nbr) {
3448 continue;
3449 }
3450
3451 /* Logical VLANs not supported.
3452 * Broadcast/multicast source address is invalid. */
3453 ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100,
3454 "vlan.present || eth.src[40]", "drop;");
3455 }
3456
3457 /* Logical router ingress table 0: match (priority 50). */
3458 struct ovn_port *op;
3459 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 3460 if (!op->nbrp) {
9975d7be
BP
3461 continue;
3462 }
3463
0ee00741 3464 if (!lrport_is_enabled(op->nbrp)) {
9975d7be
BP
3465 /* Drop packets from disabled logical ports (since logical flow
3466 * tables are default-drop). */
3467 continue;
3468 }
3469
09b39248 3470 ds_clear(&match);
4685e523
JP
3471 ds_put_format(&match, "(eth.mcast || eth.dst == %s) && inport == %s",
3472 op->lrp_networks.ea_s, op->json_key);
9975d7be 3473 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
09b39248 3474 ds_cstr(&match), "next;");
9975d7be
BP
3475 }
3476
3477 /* Logical router ingress table 1: IP Input. */
78aab811 3478 HMAP_FOR_EACH (od, key_node, datapaths) {
9975d7be
BP
3479 if (!od->nbr) {
3480 continue;
3481 }
3482
3483 /* L3 admission control: drop multicast and broadcast source, localhost
3484 * source or destination, and zero network source or destination
3485 * (priority 100). */
3486 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100,
3487 "ip4.mcast || "
3488 "ip4.src == 255.255.255.255 || "
3489 "ip4.src == 127.0.0.0/8 || "
3490 "ip4.dst == 127.0.0.0/8 || "
3491 "ip4.src == 0.0.0.0/8 || "
3492 "ip4.dst == 0.0.0.0/8",
3493 "drop;");
3494
0bac7164
BP
3495 /* ARP reply handling. Use ARP replies to populate the logical
3496 * router's ARP table. */
3497 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "arp.op == 2",
3498 "put_arp(inport, arp.spa, arp.sha);");
3499
9975d7be
BP
3500 /* Drop Ethernet local broadcast. By definition this traffic should
3501 * not be forwarded.*/
3502 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
3503 "eth.bcast", "drop;");
3504
9975d7be
BP
3505 /* TTL discard.
3506 *
3507 * XXX Need to send ICMP time exceeded if !ip.later_frag. */
09b39248
JP
3508 ds_clear(&match);
3509 ds_put_cstr(&match, "ip4 && ip.ttl == {0, 1}");
3510 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30,
3511 ds_cstr(&match), "drop;");
9975d7be 3512
c34a87b6
JP
3513 /* ND advertisement handling. Use advertisements to populate
3514 * the logical router's ARP/ND table. */
3515 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "nd_na",
3516 "put_nd(inport, nd.target, nd.tll);");
3517
3518 /* Lean from neighbor solicitations that were not directed at
3519 * us. (A priority-90 flow will respond to requests to us and
3520 * learn the sender's mac address. */
3521 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 80, "nd_ns",
3522 "put_nd(inport, ip6.src, nd.sll);");
3523
9975d7be
BP
3524 /* Pass other traffic not already handled to the next table for
3525 * routing. */
3526 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;");
78aab811
JP
3527 }
3528
6fdb7cd6 3529 /* Logical router ingress table 1: IP Input for IPv4. */
9975d7be 3530 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 3531 if (!op->nbrp) {
9975d7be
BP
3532 continue;
3533 }
3534
9975d7be 3535
6fdb7cd6
JP
3536 if (op->lrp_networks.n_ipv4_addrs) {
3537 /* L3 admission control: drop packets that originate from an
3538 * IPv4 address owned by the router or a broadcast address
3539 * known to the router (priority 100). */
3540 ds_clear(&match);
3541 ds_put_cstr(&match, "ip4.src == ");
3542 op_put_v4_networks(&match, op, true);
3543 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
3544 ds_cstr(&match), "drop;");
3545
3546 /* ICMP echo reply. These flows reply to ICMP echo requests
3547 * received for the router's IP address. Since packets only
3548 * get here as part of the logical router datapath, the inport
3549 * (i.e. the incoming locally attached net) does not matter.
3550 * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */
3551 ds_clear(&match);
3552 ds_put_cstr(&match, "ip4.dst == ");
3553 op_put_v4_networks(&match, op, false);
3554 ds_put_cstr(&match, " && icmp4.type == 8 && icmp4.code == 0");
3555
3556 ds_clear(&actions);
3557 ds_put_format(&actions,
3558 "ip4.dst <-> ip4.src; "
3559 "ip.ttl = 255; "
3560 "icmp4.type = 0; "
bf143492 3561 "flags.loopback = 1; "
6fdb7cd6
JP
3562 "next; ");
3563 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
3564 ds_cstr(&match), ds_cstr(&actions));
3565 }
dd7652e6 3566
9975d7be
BP
3567 /* ARP reply. These flows reply to ARP requests for the router's own
3568 * IP address. */
4685e523
JP
3569 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
3570 ds_clear(&match);
3571 ds_put_format(&match,
3572 "inport == %s && arp.tpa == %s && arp.op == 1",
3573 op->json_key, op->lrp_networks.ipv4_addrs[i].addr_s);
3574
3575 ds_clear(&actions);
3576 ds_put_format(&actions,
3577 "eth.dst = eth.src; "
3578 "eth.src = %s; "
3579 "arp.op = 2; /* ARP reply */ "
3580 "arp.tha = arp.sha; "
3581 "arp.sha = %s; "
3582 "arp.tpa = arp.spa; "
3583 "arp.spa = %s; "
3584 "outport = %s; "
bf143492 3585 "flags.loopback = 1; "
4685e523
JP
3586 "output;",
3587 op->lrp_networks.ea_s,
3588 op->lrp_networks.ea_s,
3589 op->lrp_networks.ipv4_addrs[i].addr_s,
3590 op->json_key);
3591 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
3592 ds_cstr(&match), ds_cstr(&actions));
3593 }
9975d7be 3594
cc4583aa
GS
3595 /* A set to hold all load-balancer vips that need ARP responses. */
3596 struct sset all_ips = SSET_INITIALIZER(&all_ips);
3597
3598 for (int i = 0; i < op->od->nbr->n_load_balancer; i++) {
3599 struct nbrec_load_balancer *lb = op->od->nbr->load_balancer[i];
3600 struct smap *vips = &lb->vips;
3601 struct smap_node *node;
3602
3603 SMAP_FOR_EACH (node, vips) {
3604 /* node->key contains IP:port or just IP. */
3605 char *ip_address = NULL;
3606 uint16_t port;
3607
3608 ip_address_and_port_from_lb_key(node->key, &ip_address, &port);
3609 if (!ip_address) {
3610 continue;
3611 }
3612
3613 if (!sset_contains(&all_ips, ip_address)) {
3614 sset_add(&all_ips, ip_address);
3615 }
3616
3617 free(ip_address);
3618 }
3619 }
3620
3621 const char *ip_address;
3622 SSET_FOR_EACH(ip_address, &all_ips) {
3623 ovs_be32 ip;
3624 if (!ip_parse(ip_address, &ip) || !ip) {
3625 continue;
3626 }
3627
3628 ds_clear(&match);
3629 ds_put_format(&match,
3630 "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1",
3631 op->json_key, IP_ARGS(ip));
3632
3633 ds_clear(&actions);
3634 ds_put_format(&actions,
3635 "eth.dst = eth.src; "
3636 "eth.src = %s; "
3637 "arp.op = 2; /* ARP reply */ "
3638 "arp.tha = arp.sha; "
3639 "arp.sha = %s; "
3640 "arp.tpa = arp.spa; "
3641 "arp.spa = "IP_FMT"; "
3642 "outport = %s; "
3643 "flags.loopback = 1; "
3644 "output;",
3645 op->lrp_networks.ea_s,
3646 op->lrp_networks.ea_s,
3647 IP_ARGS(ip),
3648 op->json_key);
3649 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
3650 ds_cstr(&match), ds_cstr(&actions));
3651 }
3652
3653 sset_destroy(&all_ips);
3654
dde5ea7b
GS
3655 ovs_be32 *snat_ips = xmalloc(sizeof *snat_ips * op->od->nbr->n_nat);
3656 size_t n_snat_ips = 0;
de297547
GS
3657 for (int i = 0; i < op->od->nbr->n_nat; i++) {
3658 const struct nbrec_nat *nat;
3659
3660 nat = op->od->nbr->nat[i];
3661
de297547
GS
3662 ovs_be32 ip;
3663 if (!ip_parse(nat->external_ip, &ip) || !ip) {
3664 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
dde5ea7b 3665 VLOG_WARN_RL(&rl, "bad ip address %s in nat configuration "
de297547
GS
3666 "for router %s", nat->external_ip, op->key);
3667 continue;
3668 }
3669
dde5ea7b
GS
3670 if (!strcmp(nat->type, "snat")) {
3671 snat_ips[n_snat_ips++] = ip;
3672 continue;
3673 }
3674
3675 /* ARP handling for external IP addresses.
3676 *
3677 * DNAT IP addresses are external IP addresses that need ARP
3678 * handling. */
09b39248
JP
3679 ds_clear(&match);
3680 ds_put_format(&match,
3681 "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1",
3682 op->json_key, IP_ARGS(ip));
4685e523 3683
09b39248
JP
3684 ds_clear(&actions);
3685 ds_put_format(&actions,
de297547 3686 "eth.dst = eth.src; "
4685e523 3687 "eth.src = %s; "
de297547
GS
3688 "arp.op = 2; /* ARP reply */ "
3689 "arp.tha = arp.sha; "
4685e523 3690 "arp.sha = %s; "
de297547
GS
3691 "arp.tpa = arp.spa; "
3692 "arp.spa = "IP_FMT"; "
3693 "outport = %s; "
bf143492 3694 "flags.loopback = 1; "
de297547 3695 "output;",
4685e523
JP
3696 op->lrp_networks.ea_s,
3697 op->lrp_networks.ea_s,
de297547
GS
3698 IP_ARGS(ip),
3699 op->json_key);
3700 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
09b39248 3701 ds_cstr(&match), ds_cstr(&actions));
de297547
GS
3702 }
3703
4685e523
JP
3704 ds_clear(&match);
3705 ds_put_cstr(&match, "ip4.dst == {");
3706 bool has_drop_ips = false;
3707 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
49da9ec0 3708 bool snat_ip_is_router_ip = false;
dde5ea7b
GS
3709 for (int j = 0; j < n_snat_ips; j++) {
3710 /* Packets to SNAT IPs should not be dropped. */
3711 if (op->lrp_networks.ipv4_addrs[i].addr == snat_ips[j]) {
49da9ec0
CSV
3712 snat_ip_is_router_ip = true;
3713 break;
4685e523 3714 }
4ef48e9d 3715 }
49da9ec0
CSV
3716 if (snat_ip_is_router_ip) {
3717 continue;
3718 }
4685e523
JP
3719 ds_put_format(&match, "%s, ",
3720 op->lrp_networks.ipv4_addrs[i].addr_s);
3721 has_drop_ips = true;
4ef48e9d 3722 }
4685e523
JP
3723 ds_chomp(&match, ' ');
3724 ds_chomp(&match, ',');
3725 ds_put_cstr(&match, "}");
4ef48e9d 3726
4685e523
JP
3727 if (has_drop_ips) {
3728 /* Drop IP traffic to this router. */
09b39248
JP
3729 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
3730 ds_cstr(&match), "drop;");
4ef48e9d 3731 }
4685e523 3732
dde5ea7b 3733 free(snat_ips);
9975d7be
BP
3734 }
3735
6fdb7cd6
JP
3736 /* Logical router ingress table 1: IP Input for IPv6. */
3737 HMAP_FOR_EACH (op, key_node, ports) {
3738 if (!op->nbrp) {
3739 continue;
3740 }
3741
3742 if (op->lrp_networks.n_ipv6_addrs) {
3743 /* L3 admission control: drop packets that originate from an
3744 * IPv6 address owned by the router (priority 100). */
3745 ds_clear(&match);
3746 ds_put_cstr(&match, "ip6.src == ");
3747 op_put_v6_networks(&match, op);
3748 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
3749 ds_cstr(&match), "drop;");
3750
3751 /* ICMPv6 echo reply. These flows reply to echo requests
3752 * received for the router's IP address. */
3753 ds_clear(&match);
3754 ds_put_cstr(&match, "ip6.dst == ");
3755 op_put_v6_networks(&match, op);
3756 ds_put_cstr(&match, " && icmp6.type == 128 && icmp6.code == 0");
3757
3758 ds_clear(&actions);
3759 ds_put_cstr(&actions,
3760 "ip6.dst <-> ip6.src; "
3761 "ip.ttl = 255; "
3762 "icmp6.type = 129; "
bf143492 3763 "flags.loopback = 1; "
6fdb7cd6
JP
3764 "next; ");
3765 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
3766 ds_cstr(&match), ds_cstr(&actions));
3767
3768 /* Drop IPv6 traffic to this router. */
3769 ds_clear(&match);
3770 ds_put_cstr(&match, "ip6.dst == ");
3771 op_put_v6_networks(&match, op);
3772 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
3773 ds_cstr(&match), "drop;");
3774 }
3775
3776 /* ND reply. These flows reply to ND solicitations for the
3777 * router's own IP address. */
3778 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
3779 ds_clear(&match);
3780 ds_put_format(&match,
3781 "inport == %s && nd_ns && ip6.dst == {%s, %s} "
3782 "&& nd.target == %s",
3783 op->json_key,
3784 op->lrp_networks.ipv6_addrs[i].addr_s,
3785 op->lrp_networks.ipv6_addrs[i].sn_addr_s,
3786 op->lrp_networks.ipv6_addrs[i].addr_s);
3787
3788 ds_clear(&actions);
3789 ds_put_format(&actions,
c34a87b6 3790 "put_nd(inport, ip6.src, nd.sll); "
6fdb7cd6
JP
3791 "nd_na { "
3792 "eth.src = %s; "
3793 "ip6.src = %s; "
3794 "nd.target = %s; "
3795 "nd.tll = %s; "
3796 "outport = inport; "
bf143492 3797 "flags.loopback = 1; "
6fdb7cd6
JP
3798 "output; "
3799 "};",
3800 op->lrp_networks.ea_s,
3801 op->lrp_networks.ipv6_addrs[i].addr_s,
3802 op->lrp_networks.ipv6_addrs[i].addr_s,
3803 op->lrp_networks.ea_s);
3804 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
3805 ds_cstr(&match), ds_cstr(&actions));
3806 }
3807 }
3808
cc4583aa 3809 /* NAT, Defrag and load balancing in Gateway routers. */
de297547
GS
3810 HMAP_FOR_EACH (od, key_node, datapaths) {
3811 if (!od->nbr) {
3812 continue;
3813 }
3814
3815 /* Packets are allowed by default. */
cc4583aa 3816 ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG, 0, "1", "next;");
de297547
GS
3817 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;");
3818 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;");
3819 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;");
3820
cc4583aa
GS
3821 /* NAT rules, packet defrag and load balancing are only valid on
3822 * Gateway routers. */
de297547
GS
3823 if (!smap_get(&od->nbr->options, "chassis")) {
3824 continue;
3825 }
3826
cc4583aa
GS
3827 /* A set to hold all ips that need defragmentation and tracking. */
3828 struct sset all_ips = SSET_INITIALIZER(&all_ips);
3829
3830 for (int i = 0; i < od->nbr->n_load_balancer; i++) {
3831 struct nbrec_load_balancer *lb = od->nbr->load_balancer[i];
3832 struct smap *vips = &lb->vips;
3833 struct smap_node *node;
3834
3835 SMAP_FOR_EACH (node, vips) {
3836 uint16_t port = 0;
3837
3838 /* node->key contains IP:port or just IP. */
3839 char *ip_address = NULL;
3840 ip_address_and_port_from_lb_key(node->key, &ip_address, &port);
3841 if (!ip_address) {
3842 continue;
3843 }
3844
3845 if (!sset_contains(&all_ips, ip_address)) {
3846 sset_add(&all_ips, ip_address);
3847 }
3848
3849 /* Higher priority rules are added in DNAT table to match on
3850 * ct.new which in-turn have group id as an action for load
3851 * balancing. */
3852 ds_clear(&actions);
3853 ds_put_format(&actions, "ct_lb(%s);", node->value);
3854
3855 ds_clear(&match);
3856 ds_put_format(&match, "ct.new && ip && ip4.dst == %s",
3857 ip_address);
3858 free(ip_address);
3859
3860 if (port) {
3861 if (lb->protocol && !strcmp(lb->protocol, "udp")) {
546f1ff3
GS
3862 ds_put_format(&match, " && udp && udp.dst == %d",
3863 port);
cc4583aa 3864 } else {
546f1ff3
GS
3865 ds_put_format(&match, " && tcp && tcp.dst == %d",
3866 port);
cc4583aa
GS
3867 }
3868 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT,
3869 120, ds_cstr(&match), ds_cstr(&actions));
3870 } else {
3871 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT,
3872 110, ds_cstr(&match), ds_cstr(&actions));
3873 }
3874 }
3875 }
3876
3877 /* If there are any load balancing rules, we should send the
3878 * packet to conntrack for defragmentation and tracking. This helps
3879 * with two things.
3880 *
3881 * 1. With tracking, we can send only new connections to pick a
3882 * DNAT ip address from a group.
3883 * 2. If there are L4 ports in load balancing rules, we need the
3884 * defragmentation to match on L4 ports. */
3885 const char *ip_address;
3886 SSET_FOR_EACH(ip_address, &all_ips) {
3887 ds_clear(&match);
3888 ds_put_format(&match, "ip && ip4.dst == %s", ip_address);
3889 ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG,
3890 100, ds_cstr(&match), "ct_next;");
3891 }
3892
3893 sset_destroy(&all_ips);
3894
de297547
GS
3895 for (int i = 0; i < od->nbr->n_nat; i++) {
3896 const struct nbrec_nat *nat;
3897
3898 nat = od->nbr->nat[i];
3899
3900 ovs_be32 ip, mask;
3901
3902 char *error = ip_parse_masked(nat->external_ip, &ip, &mask);
3903 if (error || mask != OVS_BE32_MAX) {
3904 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
3905 VLOG_WARN_RL(&rl, "bad external ip %s for nat",
3906 nat->external_ip);
3907 free(error);
3908 continue;
3909 }
3910
3911 /* Check the validity of nat->logical_ip. 'logical_ip' can
3912 * be a subnet when the type is "snat". */
3913 error = ip_parse_masked(nat->logical_ip, &ip, &mask);
3914 if (!strcmp(nat->type, "snat")) {
3915 if (error) {
3916 static struct vlog_rate_limit rl =
3917 VLOG_RATE_LIMIT_INIT(5, 1);
3918 VLOG_WARN_RL(&rl, "bad ip network or ip %s for snat "
3919 "in router "UUID_FMT"",
3920 nat->logical_ip, UUID_ARGS(&od->key));
3921 free(error);
3922 continue;
3923 }
3924 } else {
3925 if (error || mask != OVS_BE32_MAX) {
3926 static struct vlog_rate_limit rl =
3927 VLOG_RATE_LIMIT_INIT(5, 1);
3928 VLOG_WARN_RL(&rl, "bad ip %s for dnat in router "
3929 ""UUID_FMT"", nat->logical_ip, UUID_ARGS(&od->key));
3930 free(error);
3931 continue;
3932 }
3933 }
3934
de297547
GS
3935 /* Ingress UNSNAT table: It is for already established connections'
3936 * reverse traffic. i.e., SNAT has already been done in egress
3937 * pipeline and now the packet has entered the ingress pipeline as
3938 * part of a reply. We undo the SNAT here.
3939 *
3940 * Undoing SNAT has to happen before DNAT processing. This is
3941 * because when the packet was DNATed in ingress pipeline, it did
3942 * not know about the possibility of eventual additional SNAT in
3943 * egress pipeline. */
3944 if (!strcmp(nat->type, "snat")
3945 || !strcmp(nat->type, "dnat_and_snat")) {
09b39248
JP
3946 ds_clear(&match);
3947 ds_put_format(&match, "ip && ip4.dst == %s", nat->external_ip);
de297547 3948 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 100,
09b39248 3949 ds_cstr(&match), "ct_snat; next;");
de297547
GS
3950 }
3951
3952 /* Ingress DNAT table: Packets enter the pipeline with destination
3953 * IP address that needs to be DNATted from a external IP address
3954 * to a logical IP address. */
3955 if (!strcmp(nat->type, "dnat")
3956 || !strcmp(nat->type, "dnat_and_snat")) {
3957 /* Packet when it goes from the initiator to destination.
3958 * We need to zero the inport because the router can
3959 * send the packet back through the same interface. */
09b39248
JP
3960 ds_clear(&match);
3961 ds_put_format(&match, "ip && ip4.dst == %s", nat->external_ip);
3962 ds_clear(&actions);
bf143492 3963 ds_put_format(&actions,"flags.loopback = 1; ct_dnat(%s);",
09b39248 3964 nat->logical_ip);
de297547 3965 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100,
09b39248 3966 ds_cstr(&match), ds_cstr(&actions));
de297547
GS
3967 }
3968
3969 /* Egress SNAT table: Packets enter the egress pipeline with
3970 * source ip address that needs to be SNATted to a external ip
3971 * address. */
3972 if (!strcmp(nat->type, "snat")
3973 || !strcmp(nat->type, "dnat_and_snat")) {
09b39248
JP
3974 ds_clear(&match);
3975 ds_put_format(&match, "ip && ip4.src == %s", nat->logical_ip);
3976 ds_clear(&actions);
3977 ds_put_format(&actions, "ct_snat(%s);", nat->external_ip);
de297547
GS
3978
3979 /* The priority here is calculated such that the
3980 * nat->logical_ip with the longest mask gets a higher
3981 * priority. */
3982 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT,
09b39248
JP
3983 count_1bits(ntohl(mask)) + 1,
3984 ds_cstr(&match), ds_cstr(&actions));
de297547
GS
3985 }
3986 }
3987
3988 /* Re-circulate every packet through the DNAT zone.
cc4583aa 3989 * This helps with three things.
de297547
GS
3990 *
3991 * 1. Any packet that needs to be unDNATed in the reverse
3992 * direction gets unDNATed. Ideally this could be done in
3993 * the egress pipeline. But since the gateway router
3994 * does not have any feature that depends on the source
3995 * ip address being external IP address for IP routing,
3996 * we can do it here, saving a future re-circulation.
3997 *
cc4583aa
GS
3998 * 2. Established load-balanced connections automatically get
3999 * DNATed.
4000 *
4001 * 3. Any packet that was sent through SNAT zone in the
de297547
GS
4002 * previous table automatically gets re-circulated to get
4003 * back the new destination IP address that is needed for
4004 * routing in the openflow pipeline. */
4005 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50,
bf143492 4006 "ip", "flags.loopback = 1; ct_dnat;");
de297547
GS
4007 }
4008
94300e09 4009 /* Logical router ingress table 4: IP Routing.
9975d7be
BP
4010 *
4011 * A packet that arrives at this table is an IP packet that should be
6fdb7cd6
JP
4012 * routed to the address in 'ip[46].dst'. This table sets outport to
4013 * the correct output port, eth.src to the output port's MAC
4014 * address, and '[xx]reg0' to the next-hop IP address (leaving
4015 * 'ip[46].dst', the packet’s final destination, unchanged), and
4016 * advances to the next table for ARP/ND resolution. */
9975d7be 4017 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 4018 if (!op->nbrp) {
9975d7be
BP
4019 continue;
4020 }
4021
4685e523
JP
4022 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
4023 add_route(lflows, op, op->lrp_networks.ipv4_addrs[i].addr_s,
4024 op->lrp_networks.ipv4_addrs[i].network_s,
4025 op->lrp_networks.ipv4_addrs[i].plen, NULL);
4026 }
6fdb7cd6
JP
4027
4028 for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
4029 add_route(lflows, op, op->lrp_networks.ipv6_addrs[i].addr_s,
4030 op->lrp_networks.ipv6_addrs[i].network_s,
4031 op->lrp_networks.ipv6_addrs[i].plen, NULL);
4032 }
9975d7be 4033 }
4685e523 4034
6fdb7cd6 4035 /* Convert the static routes to flows. */
9975d7be
BP
4036 HMAP_FOR_EACH (od, key_node, datapaths) {
4037 if (!od->nbr) {
4038 continue;
4039 }
4040
28dc3fe9
SR
4041 for (int i = 0; i < od->nbr->n_static_routes; i++) {
4042 const struct nbrec_logical_router_static_route *route;
4043
4044 route = od->nbr->static_routes[i];
4045 build_static_route_flow(lflows, od, ports, route);
4046 }
9975d7be 4047 }
6fdb7cd6 4048
9975d7be
BP
4049 /* XXX destination unreachable */
4050
94300e09 4051 /* Local router ingress table 5: ARP Resolution.
9975d7be
BP
4052 *
4053 * Any packet that reaches this table is an IP packet whose next-hop IP
4054 * address is in reg0. (ip4.dst is the final destination.) This table
4055 * resolves the IP address in reg0 into an output port in outport and an
4056 * Ethernet address in eth.dst. */
4057 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 4058 if (op->nbrp) {
6fdb7cd6
JP
4059 /* This is a logical router port. If next-hop IP address in
4060 * '[xx]reg0' matches IP address of this router port, then
4061 * the packet is intended to eventually be sent to this
4062 * logical port. Set the destination mac address using this
4063 * port's mac address.
509afdc3
GS
4064 *
4065 * The packet is still in peer's logical pipeline. So the match
4066 * should be on peer's outport. */
6fdb7cd6
JP
4067 if (op->peer && op->nbrp->peer) {
4068 if (op->lrp_networks.n_ipv4_addrs) {
4069 ds_clear(&match);
4070 ds_put_format(&match, "outport == %s && reg0 == ",
4071 op->peer->json_key);
4072 op_put_v4_networks(&match, op, false);
4073
4074 ds_clear(&actions);
4075 ds_put_format(&actions, "eth.dst = %s; next;",
4076 op->lrp_networks.ea_s);
4077 ovn_lflow_add(lflows, op->peer->od, S_ROUTER_IN_ARP_RESOLVE,
4078 100, ds_cstr(&match), ds_cstr(&actions));
4079 }
4685e523 4080
6fdb7cd6
JP
4081 if (op->lrp_networks.n_ipv6_addrs) {
4082 ds_clear(&match);
4083 ds_put_format(&match, "outport == %s && xxreg0 == ",
4084 op->peer->json_key);
4085 op_put_v6_networks(&match, op);
4086
4087 ds_clear(&actions);
4088 ds_put_format(&actions, "eth.dst = %s; next;",
4089 op->lrp_networks.ea_s);
4090 ovn_lflow_add(lflows, op->peer->od, S_ROUTER_IN_ARP_RESOLVE,
4091 100, ds_cstr(&match), ds_cstr(&actions));
4092 }
509afdc3 4093 }
0ee00741 4094 } else if (op->od->n_router_ports && strcmp(op->nbsp->type, "router")) {
75cf9d2b
GS
4095 /* This is a logical switch port that backs a VM or a container.
4096 * Extract its addresses. For each of the address, go through all
4097 * the router ports attached to the switch (to which this port
4098 * connects) and if the address in question is reachable from the
6fdb7cd6 4099 * router port, add an ARP/ND entry in that router's pipeline. */
75cf9d2b 4100
e93b43d6 4101 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
4685e523 4102 const char *ea_s = op->lsp_addrs[i].ea_s;
e93b43d6 4103 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
4685e523 4104 const char *ip_s = op->lsp_addrs[i].ipv4_addrs[j].addr_s;
e93b43d6 4105 for (size_t k = 0; k < op->od->n_router_ports; k++) {
80f408f4
JP
4106 /* Get the Logical_Router_Port that the
4107 * Logical_Switch_Port is connected to, as
4108 * 'peer'. */
86e98048 4109 const char *peer_name = smap_get(
0ee00741 4110 &op->od->router_ports[k]->nbsp->options,
86e98048
BP
4111 "router-port");
4112 if (!peer_name) {
4113 continue;
4114 }
4115
e93b43d6 4116 struct ovn_port *peer = ovn_port_find(ports, peer_name);
0ee00741 4117 if (!peer || !peer->nbrp) {
86e98048
BP
4118 continue;
4119 }
4120
4685e523 4121 if (!find_lrp_member_ip(peer, ip_s)) {
86e98048
BP
4122 continue;
4123 }
4124
09b39248 4125 ds_clear(&match);
e93b43d6 4126 ds_put_format(&match, "outport == %s && reg0 == %s",
4685e523
JP
4127 peer->json_key, ip_s);
4128
09b39248 4129 ds_clear(&actions);
4685e523 4130 ds_put_format(&actions, "eth.dst = %s; next;", ea_s);
86e98048 4131 ovn_lflow_add(lflows, peer->od,
09b39248
JP
4132 S_ROUTER_IN_ARP_RESOLVE, 100,
4133 ds_cstr(&match), ds_cstr(&actions));
86e98048 4134 }
9975d7be 4135 }
6fdb7cd6
JP
4136
4137 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
4138 const char *ip_s = op->lsp_addrs[i].ipv6_addrs[j].addr_s;
4139 for (size_t k = 0; k < op->od->n_router_ports; k++) {
4140 /* Get the Logical_Router_Port that the
4141 * Logical_Switch_Port is connected to, as
4142 * 'peer'. */
4143 const char *peer_name = smap_get(
4144 &op->od->router_ports[k]->nbsp->options,
4145 "router-port");
4146 if (!peer_name) {
4147 continue;
4148 }
4149
4150 struct ovn_port *peer = ovn_port_find(ports, peer_name);
4151 if (!peer || !peer->nbrp) {
4152 continue;
4153 }
4154
4155 if (!find_lrp_member_ip(peer, ip_s)) {
4156 continue;
4157 }
4158
4159 ds_clear(&match);
4160 ds_put_format(&match, "outport == %s && xxreg0 == %s",
4161 peer->json_key, ip_s);
4162
4163 ds_clear(&actions);
4164 ds_put_format(&actions, "eth.dst = %s; next;", ea_s);
4165 ovn_lflow_add(lflows, peer->od,
4166 S_ROUTER_IN_ARP_RESOLVE, 100,
4167 ds_cstr(&match), ds_cstr(&actions));
4168 }
4169 }
9975d7be 4170 }
0ee00741 4171 } else if (!strcmp(op->nbsp->type, "router")) {
75cf9d2b
GS
4172 /* This is a logical switch port that connects to a router. */
4173
4174 /* The peer of this switch port is the router port for which
4175 * we need to add logical flows such that it can resolve
4176 * ARP entries for all the other router ports connected to
4177 * the switch in question. */
4178
0ee00741 4179 const char *peer_name = smap_get(&op->nbsp->options,
75cf9d2b
GS
4180 "router-port");
4181 if (!peer_name) {
4182 continue;
4183 }
4184
4185 struct ovn_port *peer = ovn_port_find(ports, peer_name);
0ee00741 4186 if (!peer || !peer->nbrp) {
75cf9d2b
GS
4187 continue;
4188 }
4189
4685e523 4190 for (size_t i = 0; i < op->od->n_router_ports; i++) {
75cf9d2b 4191 const char *router_port_name = smap_get(
0ee00741 4192 &op->od->router_ports[i]->nbsp->options,
75cf9d2b
GS
4193 "router-port");
4194 struct ovn_port *router_port = ovn_port_find(ports,
4195 router_port_name);
0ee00741 4196 if (!router_port || !router_port->nbrp) {
75cf9d2b
GS
4197 continue;
4198 }
4199
4200 /* Skip the router port under consideration. */
4201 if (router_port == peer) {
4202 continue;
4203 }
4204
6fdb7cd6
JP
4205 if (router_port->lrp_networks.n_ipv4_addrs) {
4206 ds_clear(&match);
4207 ds_put_format(&match, "outport == %s && reg0 == ",
4208 peer->json_key);
4209 op_put_v4_networks(&match, router_port, false);
4210
4211 ds_clear(&actions);
4212 ds_put_format(&actions, "eth.dst = %s; next;",
4213 router_port->lrp_networks.ea_s);
4214 ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE,
4215 100, ds_cstr(&match), ds_cstr(&actions));
4216 }
4685e523 4217
6fdb7cd6
JP
4218 if (router_port->lrp_networks.n_ipv6_addrs) {
4219 ds_clear(&match);
4220 ds_put_format(&match, "outport == %s && xxreg0 == ",
4221 peer->json_key);
4222 op_put_v6_networks(&match, router_port);
4223
4224 ds_clear(&actions);
4225 ds_put_format(&actions, "eth.dst = %s; next;",
4226 router_port->lrp_networks.ea_s);
4227 ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE,
4228 100, ds_cstr(&match), ds_cstr(&actions));
4229 }
75cf9d2b 4230 }
9975d7be
BP
4231 }
4232 }
75cf9d2b 4233
0bac7164
BP
4234 HMAP_FOR_EACH (od, key_node, datapaths) {
4235 if (!od->nbr) {
4236 continue;
4237 }
4238
c34a87b6
JP
4239 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip4",
4240 "get_arp(outport, reg0); next;");
4241
4242 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip6",
4243 "get_nd(outport, xxreg0); next;");
0bac7164
BP
4244 }
4245
94300e09 4246 /* Local router ingress table 6: ARP request.
0bac7164
BP
4247 *
4248 * In the common case where the Ethernet destination has been resolved,
94300e09
JP
4249 * this table outputs the packet (priority 0). Otherwise, it composes
4250 * and sends an ARP request (priority 100). */
0bac7164
BP
4251 HMAP_FOR_EACH (od, key_node, datapaths) {
4252 if (!od->nbr) {
4253 continue;
4254 }
4255
4256 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100,
4257 "eth.dst == 00:00:00:00:00:00",
4258 "arp { "
4259 "eth.dst = ff:ff:ff:ff:ff:ff; "
4260 "arp.spa = reg1; "
47021598 4261 "arp.tpa = reg0; "
0bac7164
BP
4262 "arp.op = 1; " /* ARP request */
4263 "output; "
4264 "};");
4265 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1", "output;");
4266 }
9975d7be 4267
de297547 4268 /* Logical router egress table 1: Delivery (priority 100).
9975d7be
BP
4269 *
4270 * Priority 100 rules deliver packets to enabled logical ports. */
4271 HMAP_FOR_EACH (op, key_node, ports) {
0ee00741 4272 if (!op->nbrp) {
9975d7be
BP
4273 continue;
4274 }
4275
0ee00741 4276 if (!lrport_is_enabled(op->nbrp)) {
9975d7be
BP
4277 /* Drop packets to disabled logical ports (since logical flow
4278 * tables are default-drop). */
4279 continue;
4280 }
4281
09b39248
JP
4282 ds_clear(&match);
4283 ds_put_format(&match, "outport == %s", op->json_key);
9975d7be 4284 ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100,
09b39248 4285 ds_cstr(&match), "output;");
9975d7be 4286 }
09b39248
JP
4287
4288 ds_destroy(&match);
4289 ds_destroy(&actions);
9975d7be
BP
4290}
4291
4292/* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
4293 * constructing their contents based on the OVN_NB database. */
4294static void
4295build_lflows(struct northd_context *ctx, struct hmap *datapaths,
4296 struct hmap *ports)
4297{
4298 struct hmap lflows = HMAP_INITIALIZER(&lflows);
4299 struct hmap mcgroups = HMAP_INITIALIZER(&mcgroups);
4300
4301 build_lswitch_flows(datapaths, ports, &lflows, &mcgroups);
4302 build_lrouter_flows(datapaths, ports, &lflows);
4303
5868eb24
BP
4304 /* Push changes to the Logical_Flow table to database. */
4305 const struct sbrec_logical_flow *sbflow, *next_sbflow;
4306 SBREC_LOGICAL_FLOW_FOR_EACH_SAFE (sbflow, next_sbflow, ctx->ovnsb_idl) {
4307 struct ovn_datapath *od
4308 = ovn_datapath_from_sbrec(datapaths, sbflow->logical_datapath);
4309 if (!od) {
4310 sbrec_logical_flow_delete(sbflow);
4311 continue;
eb00399e 4312 }
eb00399e 4313
9975d7be 4314 enum ovn_datapath_type dp_type = od->nbs ? DP_SWITCH : DP_ROUTER;
880fcd14
BP
4315 enum ovn_pipeline pipeline
4316 = !strcmp(sbflow->pipeline, "ingress") ? P_IN : P_OUT;
5868eb24 4317 struct ovn_lflow *lflow = ovn_lflow_find(
880fcd14
BP
4318 &lflows, od, ovn_stage_build(dp_type, pipeline, sbflow->table_id),
4319 sbflow->priority, sbflow->match, sbflow->actions);
5868eb24
BP
4320 if (lflow) {
4321 ovn_lflow_destroy(&lflows, lflow);
4322 } else {
4323 sbrec_logical_flow_delete(sbflow);
4edcdcf4
RB
4324 }
4325 }
5868eb24
BP
4326 struct ovn_lflow *lflow, *next_lflow;
4327 HMAP_FOR_EACH_SAFE (lflow, next_lflow, hmap_node, &lflows) {
880fcd14
BP
4328 enum ovn_pipeline pipeline = ovn_stage_get_pipeline(lflow->stage);
4329 uint8_t table = ovn_stage_get_table(lflow->stage);
4330
5868eb24
BP
4331 sbflow = sbrec_logical_flow_insert(ctx->ovnsb_txn);
4332 sbrec_logical_flow_set_logical_datapath(sbflow, lflow->od->sb);
9975d7be
BP
4333 sbrec_logical_flow_set_pipeline(
4334 sbflow, pipeline == P_IN ? "ingress" : "egress");
880fcd14 4335 sbrec_logical_flow_set_table_id(sbflow, table);
5868eb24
BP
4336 sbrec_logical_flow_set_priority(sbflow, lflow->priority);
4337 sbrec_logical_flow_set_match(sbflow, lflow->match);
4338 sbrec_logical_flow_set_actions(sbflow, lflow->actions);
091e3af9 4339
880fcd14
BP
4340 const struct smap ids = SMAP_CONST1(&ids, "stage-name",
4341 ovn_stage_to_str(lflow->stage));
aaf881c6 4342 sbrec_logical_flow_set_external_ids(sbflow, &ids);
091e3af9 4343
5868eb24 4344 ovn_lflow_destroy(&lflows, lflow);
eb00399e 4345 }
5868eb24
BP
4346 hmap_destroy(&lflows);
4347
4348 /* Push changes to the Multicast_Group table to database. */
4349 const struct sbrec_multicast_group *sbmc, *next_sbmc;
4350 SBREC_MULTICAST_GROUP_FOR_EACH_SAFE (sbmc, next_sbmc, ctx->ovnsb_idl) {
4351 struct ovn_datapath *od = ovn_datapath_from_sbrec(datapaths,
4352 sbmc->datapath);
4353 if (!od) {
4354 sbrec_multicast_group_delete(sbmc);
4355 continue;
4356 }
eb00399e 4357
5868eb24
BP
4358 struct multicast_group group = { .name = sbmc->name,
4359 .key = sbmc->tunnel_key };
4360 struct ovn_multicast *mc = ovn_multicast_find(&mcgroups, od, &group);
4361 if (mc) {
4362 ovn_multicast_update_sbrec(mc, sbmc);
4363 ovn_multicast_destroy(&mcgroups, mc);
4364 } else {
4365 sbrec_multicast_group_delete(sbmc);
4366 }
4367 }
4368 struct ovn_multicast *mc, *next_mc;
4369 HMAP_FOR_EACH_SAFE (mc, next_mc, hmap_node, &mcgroups) {
4370 sbmc = sbrec_multicast_group_insert(ctx->ovnsb_txn);
4371 sbrec_multicast_group_set_datapath(sbmc, mc->datapath->sb);
4372 sbrec_multicast_group_set_name(sbmc, mc->group->name);
4373 sbrec_multicast_group_set_tunnel_key(sbmc, mc->group->key);
4374 ovn_multicast_update_sbrec(mc, sbmc);
4375 ovn_multicast_destroy(&mcgroups, mc);
4edcdcf4 4376 }
5868eb24 4377 hmap_destroy(&mcgroups);
4edcdcf4 4378}
ea382567
RB
4379
4380/* OVN_Northbound and OVN_Southbound have an identical Address_Set table.
4381 * We always update OVN_Southbound to match the current data in
4382 * OVN_Northbound, so that the address sets used in Logical_Flows in
4383 * OVN_Southbound is checked against the proper set.*/
4384static void
4385sync_address_sets(struct northd_context *ctx)
4386{
4387 struct shash sb_address_sets = SHASH_INITIALIZER(&sb_address_sets);
4388
4389 const struct sbrec_address_set *sb_address_set;
4390 SBREC_ADDRESS_SET_FOR_EACH (sb_address_set, ctx->ovnsb_idl) {
4391 shash_add(&sb_address_sets, sb_address_set->name, sb_address_set);
4392 }
4393
4394 const struct nbrec_address_set *nb_address_set;
4395 NBREC_ADDRESS_SET_FOR_EACH (nb_address_set, ctx->ovnnb_idl) {
4396 sb_address_set = shash_find_and_delete(&sb_address_sets,
4397 nb_address_set->name);
4398 if (!sb_address_set) {
4399 sb_address_set = sbrec_address_set_insert(ctx->ovnsb_txn);
4400 sbrec_address_set_set_name(sb_address_set, nb_address_set->name);
4401 }
4402
4403 sbrec_address_set_set_addresses(sb_address_set,
4404 /* "char **" is not compatible with "const char **" */
4405 (const char **) nb_address_set->addresses,
4406 nb_address_set->n_addresses);
4407 }
4408
4409 struct shash_node *node, *next;
4410 SHASH_FOR_EACH_SAFE (node, next, &sb_address_sets) {
4411 sbrec_address_set_delete(node->data);
4412 shash_delete(&sb_address_sets, node);
4413 }
4414 shash_destroy(&sb_address_sets);
4415}
5868eb24 4416\f
4edcdcf4 4417static void
fa183acc 4418ovnnb_db_run(struct northd_context *ctx, struct ovsdb_idl_loop *sb_loop)
4edcdcf4 4419{
b511690b 4420 if (!ctx->ovnsb_txn || !ctx->ovnnb_txn) {
331e7aef
NS
4421 return;
4422 }
5868eb24
BP
4423 struct hmap datapaths, ports;
4424 build_datapaths(ctx, &datapaths);
4425 build_ports(ctx, &datapaths, &ports);
b511690b 4426 build_ipam(&datapaths, &ports);
5868eb24
BP
4427 build_lflows(ctx, &datapaths, &ports);
4428
ea382567
RB
4429 sync_address_sets(ctx);
4430
5868eb24
BP
4431 struct ovn_datapath *dp, *next_dp;
4432 HMAP_FOR_EACH_SAFE (dp, next_dp, key_node, &datapaths) {
4433 ovn_datapath_destroy(&datapaths, dp);
4434 }
4435 hmap_destroy(&datapaths);
4436
4437 struct ovn_port *port, *next_port;
4438 HMAP_FOR_EACH_SAFE (port, next_port, key_node, &ports) {
4439 ovn_port_destroy(&ports, port);
4440 }
4441 hmap_destroy(&ports);
fa183acc
BP
4442
4443 /* Copy nb_cfg from northbound to southbound database.
4444 *
4445 * Also set up to update sb_cfg once our southbound transaction commits. */
4446 const struct nbrec_nb_global *nb = nbrec_nb_global_first(ctx->ovnnb_idl);
14338f22
GS
4447 if (!nb) {
4448 nb = nbrec_nb_global_insert(ctx->ovnnb_txn);
4449 }
fa183acc 4450 const struct sbrec_sb_global *sb = sbrec_sb_global_first(ctx->ovnsb_idl);
14338f22
GS
4451 if (!sb) {
4452 sb = sbrec_sb_global_insert(ctx->ovnsb_txn);
fa183acc 4453 }
14338f22
GS
4454 sbrec_sb_global_set_nb_cfg(sb, nb->nb_cfg);
4455 sb_loop->next_cfg = nb->nb_cfg;
8639f9be
ND
4456
4457 cleanup_macam(&macam);
ac0630a2
RB
4458}
4459
fa183acc
BP
4460/* Handle changes to the 'chassis' column of the 'Port_Binding' table. When
4461 * this column is not empty, it means we need to set the corresponding logical
4462 * port as 'up' in the northbound DB. */
ac0630a2 4463static void
fa183acc 4464update_logical_port_status(struct northd_context *ctx)
ac0630a2 4465{
fc3113bc 4466 struct hmap lports_hmap;
5868eb24 4467 const struct sbrec_port_binding *sb;
0ee00741 4468 const struct nbrec_logical_switch_port *nbsp;
fc3113bc
RB
4469
4470 struct lport_hash_node {
4471 struct hmap_node node;
0ee00741 4472 const struct nbrec_logical_switch_port *nbsp;
4ec3d7c7 4473 } *hash_node;
f93818dd 4474
fc3113bc 4475 hmap_init(&lports_hmap);
f93818dd 4476
0ee00741 4477 NBREC_LOGICAL_SWITCH_PORT_FOR_EACH(nbsp, ctx->ovnnb_idl) {
fc3113bc 4478 hash_node = xzalloc(sizeof *hash_node);
0ee00741
HK
4479 hash_node->nbsp = nbsp;
4480 hmap_insert(&lports_hmap, &hash_node->node, hash_string(nbsp->name, 0));
fc3113bc
RB
4481 }
4482
5868eb24 4483 SBREC_PORT_BINDING_FOR_EACH(sb, ctx->ovnsb_idl) {
0ee00741 4484 nbsp = NULL;
fc3113bc 4485 HMAP_FOR_EACH_WITH_HASH(hash_node, node,
5868eb24
BP
4486 hash_string(sb->logical_port, 0),
4487 &lports_hmap) {
0ee00741
HK
4488 if (!strcmp(sb->logical_port, hash_node->nbsp->name)) {
4489 nbsp = hash_node->nbsp;
fc3113bc
RB
4490 break;
4491 }
f93818dd
RB
4492 }
4493
0ee00741 4494 if (!nbsp) {
dcda6e0d 4495 /* The logical port doesn't exist for this port binding. This can
2e2762d4 4496 * happen under normal circumstances when ovn-northd hasn't gotten
dcda6e0d 4497 * around to pruning the Port_Binding yet. */
f93818dd
RB
4498 continue;
4499 }
4500
0ee00741 4501 if (sb->chassis && (!nbsp->up || !*nbsp->up)) {
f93818dd 4502 bool up = true;
0ee00741
HK
4503 nbrec_logical_switch_port_set_up(nbsp, &up, 1);
4504 } else if (!sb->chassis && (!nbsp->up || *nbsp->up)) {
f93818dd 4505 bool up = false;
0ee00741 4506 nbrec_logical_switch_port_set_up(nbsp, &up, 1);
f93818dd
RB
4507 }
4508 }
fc3113bc 4509
4ec3d7c7 4510 HMAP_FOR_EACH_POP(hash_node, node, &lports_hmap) {
fc3113bc
RB
4511 free(hash_node);
4512 }
4513 hmap_destroy(&lports_hmap);
ac0630a2 4514}
45f98d4c 4515
281977f7
NS
4516static struct dhcp_opts_map supported_dhcp_opts[] = {
4517 OFFERIP,
4518 DHCP_OPT_NETMASK,
4519 DHCP_OPT_ROUTER,
4520 DHCP_OPT_DNS_SERVER,
4521 DHCP_OPT_LOG_SERVER,
4522 DHCP_OPT_LPR_SERVER,
4523 DHCP_OPT_SWAP_SERVER,
4524 DHCP_OPT_POLICY_FILTER,
4525 DHCP_OPT_ROUTER_SOLICITATION,
4526 DHCP_OPT_NIS_SERVER,
4527 DHCP_OPT_NTP_SERVER,
4528 DHCP_OPT_SERVER_ID,
4529 DHCP_OPT_TFTP_SERVER,
4530 DHCP_OPT_CLASSLESS_STATIC_ROUTE,
4531 DHCP_OPT_MS_CLASSLESS_STATIC_ROUTE,
4532 DHCP_OPT_IP_FORWARD_ENABLE,
4533 DHCP_OPT_ROUTER_DISCOVERY,
4534 DHCP_OPT_ETHERNET_ENCAP,
4535 DHCP_OPT_DEFAULT_TTL,
4536 DHCP_OPT_TCP_TTL,
4537 DHCP_OPT_MTU,
4538 DHCP_OPT_LEASE_TIME,
4539 DHCP_OPT_T1,
4540 DHCP_OPT_T2
4541};
4542
33ac3c83
NS
4543static struct dhcp_opts_map supported_dhcpv6_opts[] = {
4544 DHCPV6_OPT_IA_ADDR,
4545 DHCPV6_OPT_SERVER_ID,
4546 DHCPV6_OPT_DOMAIN_SEARCH,
4547 DHCPV6_OPT_DNS_SERVER
4548};
4549
281977f7
NS
4550static void
4551check_and_add_supported_dhcp_opts_to_sb_db(struct northd_context *ctx)
4552{
4553 struct hmap dhcp_opts_to_add = HMAP_INITIALIZER(&dhcp_opts_to_add);
4554 for (size_t i = 0; (i < sizeof(supported_dhcp_opts) /
4555 sizeof(supported_dhcp_opts[0])); i++) {
4556 hmap_insert(&dhcp_opts_to_add, &supported_dhcp_opts[i].hmap_node,
4557 dhcp_opt_hash(supported_dhcp_opts[i].name));
4558 }
4559
4560 const struct sbrec_dhcp_options *opt_row, *opt_row_next;
4561 SBREC_DHCP_OPTIONS_FOR_EACH_SAFE(opt_row, opt_row_next, ctx->ovnsb_idl) {
4562 struct dhcp_opts_map *dhcp_opt =
4563 dhcp_opts_find(&dhcp_opts_to_add, opt_row->name);
4564 if (dhcp_opt) {
4565 hmap_remove(&dhcp_opts_to_add, &dhcp_opt->hmap_node);
4566 } else {
4567 sbrec_dhcp_options_delete(opt_row);
4568 }
4569 }
4570
4571 struct dhcp_opts_map *opt;
4572 HMAP_FOR_EACH (opt, hmap_node, &dhcp_opts_to_add) {
4573 struct sbrec_dhcp_options *sbrec_dhcp_option =
4574 sbrec_dhcp_options_insert(ctx->ovnsb_txn);
4575 sbrec_dhcp_options_set_name(sbrec_dhcp_option, opt->name);
4576 sbrec_dhcp_options_set_code(sbrec_dhcp_option, opt->code);
4577 sbrec_dhcp_options_set_type(sbrec_dhcp_option, opt->type);
4578 }
4579
4580 hmap_destroy(&dhcp_opts_to_add);
4581}
4582
33ac3c83
NS
4583static void
4584check_and_add_supported_dhcpv6_opts_to_sb_db(struct northd_context *ctx)
4585{
4586 struct hmap dhcpv6_opts_to_add = HMAP_INITIALIZER(&dhcpv6_opts_to_add);
4587 for (size_t i = 0; (i < sizeof(supported_dhcpv6_opts) /
4588 sizeof(supported_dhcpv6_opts[0])); i++) {
4589 hmap_insert(&dhcpv6_opts_to_add, &supported_dhcpv6_opts[i].hmap_node,
4590 dhcp_opt_hash(supported_dhcpv6_opts[i].name));
4591 }
4592
4593 const struct sbrec_dhcpv6_options *opt_row, *opt_row_next;
4594 SBREC_DHCPV6_OPTIONS_FOR_EACH_SAFE(opt_row, opt_row_next, ctx->ovnsb_idl) {
4595 struct dhcp_opts_map *dhcp_opt =
4596 dhcp_opts_find(&dhcpv6_opts_to_add, opt_row->name);
4597 if (dhcp_opt) {
4598 hmap_remove(&dhcpv6_opts_to_add, &dhcp_opt->hmap_node);
4599 } else {
4600 sbrec_dhcpv6_options_delete(opt_row);
4601 }
4602 }
4603
4604 struct dhcp_opts_map *opt;
4605 HMAP_FOR_EACH(opt, hmap_node, &dhcpv6_opts_to_add) {
4606 struct sbrec_dhcpv6_options *sbrec_dhcpv6_option =
4607 sbrec_dhcpv6_options_insert(ctx->ovnsb_txn);
4608 sbrec_dhcpv6_options_set_name(sbrec_dhcpv6_option, opt->name);
4609 sbrec_dhcpv6_options_set_code(sbrec_dhcpv6_option, opt->code);
4610 sbrec_dhcpv6_options_set_type(sbrec_dhcpv6_option, opt->type);
4611 }
4612
4613 hmap_destroy(&dhcpv6_opts_to_add);
4614}
4615
fa183acc
BP
4616/* Updates the sb_cfg and hv_cfg columns in the northbound NB_Global table. */
4617static void
4618update_northbound_cfg(struct northd_context *ctx,
4619 struct ovsdb_idl_loop *sb_loop)
4620{
4621 /* Update northbound sb_cfg if appropriate. */
4622 const struct nbrec_nb_global *nbg = nbrec_nb_global_first(ctx->ovnnb_idl);
4623 int64_t sb_cfg = sb_loop->cur_cfg;
4624 if (nbg && sb_cfg && nbg->sb_cfg != sb_cfg) {
4625 nbrec_nb_global_set_sb_cfg(nbg, sb_cfg);
4626 }
4627
4628 /* Update northbound hv_cfg if appropriate. */
4629 if (nbg) {
4630 /* Find minimum nb_cfg among all chassis. */
4631 const struct sbrec_chassis *chassis;
4632 int64_t hv_cfg = nbg->nb_cfg;
4633 SBREC_CHASSIS_FOR_EACH (chassis, ctx->ovnsb_idl) {
4634 if (chassis->nb_cfg < hv_cfg) {
4635 hv_cfg = chassis->nb_cfg;
4636 }
4637 }
4638
4639 /* Update hv_cfg. */
4640 if (nbg->hv_cfg != hv_cfg) {
4641 nbrec_nb_global_set_hv_cfg(nbg, hv_cfg);
4642 }
4643 }
4644}
4645
4646/* Handle a fairly small set of changes in the southbound database. */
4647static void
4648ovnsb_db_run(struct northd_context *ctx, struct ovsdb_idl_loop *sb_loop)
4649{
4650 if (!ctx->ovnnb_txn || !ovsdb_idl_has_ever_connected(ctx->ovnsb_idl)) {
4651 return;
4652 }
4653
4654 update_logical_port_status(ctx);
4655 update_northbound_cfg(ctx, sb_loop);
4656}
4657\f
ac0630a2
RB
4658static void
4659parse_options(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
4660{
4661 enum {
67d9b930 4662 DAEMON_OPTION_ENUMS,
ac0630a2
RB
4663 VLOG_OPTION_ENUMS,
4664 };
4665 static const struct option long_options[] = {
ec78987f 4666 {"ovnsb-db", required_argument, NULL, 'd'},
ac0630a2
RB
4667 {"ovnnb-db", required_argument, NULL, 'D'},
4668 {"help", no_argument, NULL, 'h'},
4669 {"options", no_argument, NULL, 'o'},
4670 {"version", no_argument, NULL, 'V'},
67d9b930 4671 DAEMON_LONG_OPTIONS,
ac0630a2
RB
4672 VLOG_LONG_OPTIONS,
4673 STREAM_SSL_LONG_OPTIONS,
4674 {NULL, 0, NULL, 0},
4675 };
4676 char *short_options = ovs_cmdl_long_options_to_short_options(long_options);
4677
4678 for (;;) {
4679 int c;
4680
4681 c = getopt_long(argc, argv, short_options, long_options, NULL);
4682 if (c == -1) {
4683 break;
4684 }
4685
4686 switch (c) {
67d9b930 4687 DAEMON_OPTION_HANDLERS;
ac0630a2
RB
4688 VLOG_OPTION_HANDLERS;
4689 STREAM_SSL_OPTION_HANDLERS;
4690
4691 case 'd':
ec78987f 4692 ovnsb_db = optarg;
ac0630a2
RB
4693 break;
4694
4695 case 'D':
4696 ovnnb_db = optarg;
4697 break;
4698
4699 case 'h':
4700 usage();
4701 exit(EXIT_SUCCESS);
4702
4703 case 'o':
4704 ovs_cmdl_print_options(long_options);
4705 exit(EXIT_SUCCESS);
4706
4707 case 'V':
4708 ovs_print_version(0, 0);
4709 exit(EXIT_SUCCESS);
4710
4711 default:
4712 break;
4713 }
4714 }
4715
ec78987f 4716 if (!ovnsb_db) {
60bdd011 4717 ovnsb_db = default_sb_db();
ac0630a2
RB
4718 }
4719
4720 if (!ovnnb_db) {
60bdd011 4721 ovnnb_db = default_nb_db();
ac0630a2
RB
4722 }
4723
4724 free(short_options);
4725}
4726
5868eb24
BP
4727static void
4728add_column_noalert(struct ovsdb_idl *idl,
4729 const struct ovsdb_idl_column *column)
4730{
4731 ovsdb_idl_add_column(idl, column);
4732 ovsdb_idl_omit_alert(idl, column);
4733}
4734
ac0630a2
RB
4735int
4736main(int argc, char *argv[])
4737{
ac0630a2 4738 int res = EXIT_SUCCESS;
7b303ff9
AW
4739 struct unixctl_server *unixctl;
4740 int retval;
4741 bool exiting;
ac0630a2
RB
4742
4743 fatal_ignore_sigpipe();
4744 set_program_name(argv[0]);
485f0696 4745 service_start(&argc, &argv);
ac0630a2 4746 parse_options(argc, argv);
67d9b930 4747
e91b927d 4748 daemonize_start(false);
7b303ff9
AW
4749
4750 retval = unixctl_server_create(NULL, &unixctl);
4751 if (retval) {
4752 exit(EXIT_FAILURE);
4753 }
4754 unixctl_command_register("exit", "", 0, 0, ovn_northd_exit, &exiting);
4755
4756 daemonize_complete();
67d9b930 4757
ac0630a2 4758 nbrec_init();
ec78987f 4759 sbrec_init();
ac0630a2 4760
fa183acc 4761 /* We want to detect (almost) all changes to the ovn-nb db. */
331e7aef
NS
4762 struct ovsdb_idl_loop ovnnb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
4763 ovsdb_idl_create(ovnnb_db, &nbrec_idl_class, true, true));
fa183acc
BP
4764 ovsdb_idl_omit_alert(ovnnb_idl_loop.idl, &nbrec_nb_global_col_sb_cfg);
4765 ovsdb_idl_omit_alert(ovnnb_idl_loop.idl, &nbrec_nb_global_col_hv_cfg);
331e7aef 4766
fa183acc 4767 /* We want to detect only selected changes to the ovn-sb db. */
331e7aef
NS
4768 struct ovsdb_idl_loop ovnsb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
4769 ovsdb_idl_create(ovnsb_db, &sbrec_idl_class, false, true));
4770
fa183acc
BP
4771 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_sb_global);
4772 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_sb_global_col_nb_cfg);
4773
331e7aef
NS
4774 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_logical_flow);
4775 add_column_noalert(ovnsb_idl_loop.idl,
4776 &sbrec_logical_flow_col_logical_datapath);
4777 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_pipeline);
4778 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_table_id);
4779 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_priority);
4780 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_match);
4781 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_actions);
4782
4783 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_multicast_group);
4784 add_column_noalert(ovnsb_idl_loop.idl,
4785 &sbrec_multicast_group_col_datapath);
4786 add_column_noalert(ovnsb_idl_loop.idl,
4787 &sbrec_multicast_group_col_tunnel_key);
4788 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_name);
4789 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_ports);
4790
4791 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_datapath_binding);
4792 add_column_noalert(ovnsb_idl_loop.idl,
4793 &sbrec_datapath_binding_col_tunnel_key);
4794 add_column_noalert(ovnsb_idl_loop.idl,
4795 &sbrec_datapath_binding_col_external_ids);
4796
4797 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_port_binding);
4798 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_datapath);
4799 add_column_noalert(ovnsb_idl_loop.idl,
4800 &sbrec_port_binding_col_logical_port);
4801 add_column_noalert(ovnsb_idl_loop.idl,
4802 &sbrec_port_binding_col_tunnel_key);
4803 add_column_noalert(ovnsb_idl_loop.idl,
4804 &sbrec_port_binding_col_parent_port);
4805 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_tag);
4806 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_type);
4807 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_options);
4808 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_mac);
4809 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_port_binding_col_chassis);
6e31816f
CSV
4810 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_mac_binding);
4811 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_datapath);
4812 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_ip);
4813 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_mac);
4814 add_column_noalert(ovnsb_idl_loop.idl,
4815 &sbrec_mac_binding_col_logical_port);
281977f7
NS
4816 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_dhcp_options);
4817 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_code);
4818 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_type);
4819 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_name);
33ac3c83
NS
4820 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_dhcpv6_options);
4821 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_code);
4822 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_type);
4823 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_name);
ea382567
RB
4824 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_address_set);
4825 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_name);
4826 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_addresses);
4827
fa183acc
BP
4828 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_chassis);
4829 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_nb_cfg);
4830
331e7aef 4831 /* Main loop. */
7b303ff9
AW
4832 exiting = false;
4833 while (!exiting) {
331e7aef
NS
4834 struct northd_context ctx = {
4835 .ovnnb_idl = ovnnb_idl_loop.idl,
4836 .ovnnb_txn = ovsdb_idl_loop_run(&ovnnb_idl_loop),
4837 .ovnsb_idl = ovnsb_idl_loop.idl,
4838 .ovnsb_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop),
4839 };
ac0630a2 4840
fa183acc
BP
4841 ovnnb_db_run(&ctx, &ovnsb_idl_loop);
4842 ovnsb_db_run(&ctx, &ovnsb_idl_loop);
281977f7
NS
4843 if (ctx.ovnsb_txn) {
4844 check_and_add_supported_dhcp_opts_to_sb_db(&ctx);
33ac3c83 4845 check_and_add_supported_dhcpv6_opts_to_sb_db(&ctx);
281977f7 4846 }
f93818dd 4847
331e7aef
NS
4848 unixctl_server_run(unixctl);
4849 unixctl_server_wait(unixctl);
4850 if (exiting) {
4851 poll_immediate_wake();
ac0630a2 4852 }
331e7aef
NS
4853 ovsdb_idl_loop_commit_and_wait(&ovnnb_idl_loop);
4854 ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);
ac0630a2 4855
331e7aef 4856 poll_block();
485f0696
GS
4857 if (should_service_stop()) {
4858 exiting = true;
4859 }
ac0630a2
RB
4860 }
4861
7b303ff9 4862 unixctl_server_destroy(unixctl);
331e7aef
NS
4863 ovsdb_idl_loop_destroy(&ovnnb_idl_loop);
4864 ovsdb_idl_loop_destroy(&ovnsb_idl_loop);
485f0696 4865 service_stop();
ac0630a2
RB
4866
4867 exit(res);
4868}
7b303ff9
AW
4869
4870static void
4871ovn_northd_exit(struct unixctl_conn *conn, int argc OVS_UNUSED,
4872 const char *argv[] OVS_UNUSED, void *exiting_)
4873{
4874 bool *exiting = exiting_;
4875 *exiting = true;
4876
4877 unixctl_command_reply(conn, NULL);
4878}