2 * Licensed under the Apache License, Version 2.0 (the "License");
3 * you may not use this file except in compliance with the License.
4 * You may obtain a copy of the License at:
6 * http://www.apache.org/licenses/LICENSE-2.0
8 * Unless required by applicable law or agreed to in writing, software
9 * distributed under the License is distributed on an "AS IS" BASIS,
10 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 * See the License for the specific language governing permissions and
12 * limitations under the License.
21 #include "command-line.h"
24 #include "openvswitch/dynamic-string.h"
25 #include "fatal-signal.h"
29 #include "ovn/lib/lex.h"
30 #include "ovn/lib/ovn-nb-idl.h"
31 #include "ovn/lib/ovn-sb-idl.h"
33 #include "poll-loop.h"
36 #include "stream-ssl.h"
40 #include "openvswitch/vlog.h"
42 VLOG_DEFINE_THIS_MODULE(ovn_northd
);
44 static unixctl_cb_func ovn_northd_exit
;
46 struct northd_context
{
47 struct ovsdb_idl
*ovnnb_idl
;
48 struct ovsdb_idl
*ovnsb_idl
;
49 struct ovsdb_idl_txn
*ovnnb_txn
;
50 struct ovsdb_idl_txn
*ovnsb_txn
;
53 static const char *ovnnb_db
;
54 static const char *ovnsb_db
;
56 static const char *default_nb_db(void);
57 static const char *default_sb_db(void);
59 /* Pipeline stages. */
61 /* The two pipelines in an OVN logical flow table. */
63 P_IN
, /* Ingress pipeline. */
64 P_OUT
/* Egress pipeline. */
67 /* The two purposes for which ovn-northd uses OVN logical datapaths. */
68 enum ovn_datapath_type
{
69 DP_SWITCH
, /* OVN logical switch. */
70 DP_ROUTER
/* OVN logical router. */
73 /* Returns an "enum ovn_stage" built from the arguments.
75 * (It's better to use ovn_stage_build() for type-safety reasons, but inline
76 * functions can't be used in enums or switch cases.) */
77 #define OVN_STAGE_BUILD(DP_TYPE, PIPELINE, TABLE) \
78 (((DP_TYPE) << 9) | ((PIPELINE) << 8) | (TABLE))
80 /* A stage within an OVN logical switch or router.
82 * An "enum ovn_stage" indicates whether the stage is part of a logical switch
83 * or router, whether the stage is part of the ingress or egress pipeline, and
84 * the table within that pipeline. The first three components are combined to
85 * form the stage's full name, e.g. S_SWITCH_IN_PORT_SEC_L2,
86 * S_ROUTER_OUT_DELIVERY. */
88 #define PIPELINE_STAGES \
89 /* Logical switch ingress stages. */ \
90 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_L2, 0, "ls_in_port_sec_l2") \
91 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_IP, 1, "ls_in_port_sec_ip") \
92 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_ND, 2, "ls_in_port_sec_nd") \
93 PIPELINE_STAGE(SWITCH, IN, PRE_ACL, 3, "ls_in_pre_acl") \
94 PIPELINE_STAGE(SWITCH, IN, ACL, 4, "ls_in_acl") \
95 PIPELINE_STAGE(SWITCH, IN, ARP_RSP, 5, "ls_in_arp_rsp") \
96 PIPELINE_STAGE(SWITCH, IN, L2_LKUP, 6, "ls_in_l2_lkup") \
98 /* Logical switch egress stages. */ \
99 PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 0, "ls_out_pre_acl") \
100 PIPELINE_STAGE(SWITCH, OUT, ACL, 1, "ls_out_acl") \
101 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_IP, 2, "ls_out_port_sec_ip") \
102 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_L2, 3, "ls_out_port_sec_l2") \
104 /* Logical router ingress stages. */ \
105 PIPELINE_STAGE(ROUTER, IN, ADMISSION, 0, "lr_in_admission") \
106 PIPELINE_STAGE(ROUTER, IN, IP_INPUT, 1, "lr_in_ip_input") \
107 PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 2, "lr_in_ip_routing") \
108 PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 3, "lr_in_arp_resolve") \
109 PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 4, "lr_in_arp_request") \
111 /* Logical router egress stages. */ \
112 PIPELINE_STAGE(ROUTER, OUT, DELIVERY, 0, "lr_out_delivery")
114 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
115 S_##DP_TYPE##_##PIPELINE##_##STAGE \
116 = OVN_STAGE_BUILD(DP_##DP_TYPE, P_##PIPELINE, TABLE),
118 #undef PIPELINE_STAGE
121 /* Due to various hard-coded priorities need to implement ACLs, the
122 * northbound database supports a smaller range of ACL priorities than
123 * are available to logical flows. This value is added to an ACL
124 * priority to determine the ACL's logical flow priority. */
125 #define OVN_ACL_PRI_OFFSET 1000
127 /* Returns an "enum ovn_stage" built from the arguments. */
128 static enum ovn_stage
129 ovn_stage_build(enum ovn_datapath_type dp_type
, enum ovn_pipeline pipeline
,
132 return OVN_STAGE_BUILD(dp_type
, pipeline
, table
);
135 /* Returns the pipeline to which 'stage' belongs. */
136 static enum ovn_pipeline
137 ovn_stage_get_pipeline(enum ovn_stage stage
)
139 return (stage
>> 8) & 1;
142 /* Returns the table to which 'stage' belongs. */
144 ovn_stage_get_table(enum ovn_stage stage
)
149 /* Returns a string name for 'stage'. */
151 ovn_stage_to_str(enum ovn_stage stage
)
154 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
155 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return NAME;
157 #undef PIPELINE_STAGE
158 default: return "<unknown>";
166 %s: OVN northbound management daemon\n\
167 usage: %s [OPTIONS]\n\
170 --ovnnb-db=DATABASE connect to ovn-nb database at DATABASE\n\
172 --ovnsb-db=DATABASE connect to ovn-sb database at DATABASE\n\
174 -h, --help display this help message\n\
175 -o, --options list available options\n\
176 -V, --version display version information\n\
177 ", program_name
, program_name
, default_nb_db(), default_sb_db());
180 stream_usage("database", true, true, false);
184 struct hmap_node hmap_node
;
189 destroy_tnlids(struct hmap
*tnlids
)
191 struct tnlid_node
*node
, *next
;
192 HMAP_FOR_EACH_SAFE (node
, next
, hmap_node
, tnlids
) {
193 hmap_remove(tnlids
, &node
->hmap_node
);
196 hmap_destroy(tnlids
);
200 add_tnlid(struct hmap
*set
, uint32_t tnlid
)
202 struct tnlid_node
*node
= xmalloc(sizeof *node
);
203 hmap_insert(set
, &node
->hmap_node
, hash_int(tnlid
, 0));
208 tnlid_in_use(const struct hmap
*set
, uint32_t tnlid
)
210 const struct tnlid_node
*node
;
211 HMAP_FOR_EACH_IN_BUCKET (node
, hmap_node
, hash_int(tnlid
, 0), set
) {
212 if (node
->tnlid
== tnlid
) {
220 allocate_tnlid(struct hmap
*set
, const char *name
, uint32_t max
,
223 for (uint32_t tnlid
= *hint
+ 1; tnlid
!= *hint
;
224 tnlid
= tnlid
+ 1 <= max
? tnlid
+ 1 : 1) {
225 if (!tnlid_in_use(set
, tnlid
)) {
226 add_tnlid(set
, tnlid
);
232 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
233 VLOG_WARN_RL(&rl
, "all %s tunnel ids exhausted", name
);
237 /* The 'key' comes from nbs->header_.uuid or nbr->header_.uuid or
238 * sb->external_ids:logical-switch. */
239 struct ovn_datapath
{
240 struct hmap_node key_node
; /* Index on 'key'. */
241 struct uuid key
; /* (nbs/nbr)->header_.uuid. */
243 const struct nbrec_logical_switch
*nbs
; /* May be NULL. */
244 const struct nbrec_logical_router
*nbr
; /* May be NULL. */
245 const struct sbrec_datapath_binding
*sb
; /* May be NULL. */
247 struct ovs_list list
; /* In list of similar records. */
249 /* Logical router data (digested from nbr). */
250 const struct ovn_port
*gateway_port
;
253 /* Logical switch data. */
254 struct ovn_port
**router_ports
;
255 size_t n_router_ports
;
257 struct hmap port_tnlids
;
258 uint32_t port_key_hint
;
263 static struct ovn_datapath
*
264 ovn_datapath_create(struct hmap
*datapaths
, const struct uuid
*key
,
265 const struct nbrec_logical_switch
*nbs
,
266 const struct nbrec_logical_router
*nbr
,
267 const struct sbrec_datapath_binding
*sb
)
269 struct ovn_datapath
*od
= xzalloc(sizeof *od
);
274 hmap_init(&od
->port_tnlids
);
275 od
->port_key_hint
= 0;
276 hmap_insert(datapaths
, &od
->key_node
, uuid_hash(&od
->key
));
281 ovn_datapath_destroy(struct hmap
*datapaths
, struct ovn_datapath
*od
)
284 /* Don't remove od->list. It is used within build_datapaths() as a
285 * private list and once we've exited that function it is not safe to
287 hmap_remove(datapaths
, &od
->key_node
);
288 destroy_tnlids(&od
->port_tnlids
);
289 free(od
->router_ports
);
294 static struct ovn_datapath
*
295 ovn_datapath_find(struct hmap
*datapaths
, const struct uuid
*uuid
)
297 struct ovn_datapath
*od
;
299 HMAP_FOR_EACH_WITH_HASH (od
, key_node
, uuid_hash(uuid
), datapaths
) {
300 if (uuid_equals(uuid
, &od
->key
)) {
307 static struct ovn_datapath
*
308 ovn_datapath_from_sbrec(struct hmap
*datapaths
,
309 const struct sbrec_datapath_binding
*sb
)
313 if (!smap_get_uuid(&sb
->external_ids
, "logical-switch", &key
) &&
314 !smap_get_uuid(&sb
->external_ids
, "logical-router", &key
)) {
317 return ovn_datapath_find(datapaths
, &key
);
321 lrouter_is_enabled(const struct nbrec_logical_router
*lrouter
)
323 return !lrouter
->enabled
|| *lrouter
->enabled
;
327 join_datapaths(struct northd_context
*ctx
, struct hmap
*datapaths
,
328 struct ovs_list
*sb_only
, struct ovs_list
*nb_only
,
329 struct ovs_list
*both
)
331 hmap_init(datapaths
);
332 ovs_list_init(sb_only
);
333 ovs_list_init(nb_only
);
336 const struct sbrec_datapath_binding
*sb
, *sb_next
;
337 SBREC_DATAPATH_BINDING_FOR_EACH_SAFE (sb
, sb_next
, ctx
->ovnsb_idl
) {
339 if (!smap_get_uuid(&sb
->external_ids
, "logical-switch", &key
) &&
340 !smap_get_uuid(&sb
->external_ids
, "logical-router", &key
)) {
341 ovsdb_idl_txn_add_comment(
343 "deleting Datapath_Binding "UUID_FMT
" that lacks "
344 "external-ids:logical-switch and "
345 "external-ids:logical-router",
346 UUID_ARGS(&sb
->header_
.uuid
));
347 sbrec_datapath_binding_delete(sb
);
351 if (ovn_datapath_find(datapaths
, &key
)) {
352 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
354 &rl
, "deleting Datapath_Binding "UUID_FMT
" with "
355 "duplicate external-ids:logical-switch/router "UUID_FMT
,
356 UUID_ARGS(&sb
->header_
.uuid
), UUID_ARGS(&key
));
357 sbrec_datapath_binding_delete(sb
);
361 struct ovn_datapath
*od
= ovn_datapath_create(datapaths
, &key
,
363 ovs_list_push_back(sb_only
, &od
->list
);
366 const struct nbrec_logical_switch
*nbs
;
367 NBREC_LOGICAL_SWITCH_FOR_EACH (nbs
, ctx
->ovnnb_idl
) {
368 struct ovn_datapath
*od
= ovn_datapath_find(datapaths
,
372 ovs_list_remove(&od
->list
);
373 ovs_list_push_back(both
, &od
->list
);
375 od
= ovn_datapath_create(datapaths
, &nbs
->header_
.uuid
,
377 ovs_list_push_back(nb_only
, &od
->list
);
381 const struct nbrec_logical_router
*nbr
;
382 NBREC_LOGICAL_ROUTER_FOR_EACH (nbr
, ctx
->ovnnb_idl
) {
383 if (!lrouter_is_enabled(nbr
)) {
387 struct ovn_datapath
*od
= ovn_datapath_find(datapaths
,
392 ovs_list_remove(&od
->list
);
393 ovs_list_push_back(both
, &od
->list
);
396 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
398 "duplicate UUID "UUID_FMT
" in OVN_Northbound",
399 UUID_ARGS(&nbr
->header_
.uuid
));
403 od
= ovn_datapath_create(datapaths
, &nbr
->header_
.uuid
,
405 ovs_list_push_back(nb_only
, &od
->list
);
409 if (nbr
->default_gw
) {
411 if (!ip_parse(nbr
->default_gw
, &ip
) || !ip
) {
412 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
413 VLOG_WARN_RL(&rl
, "bad 'gateway' %s", nbr
->default_gw
);
419 /* Set the gateway port to NULL. If there is a gateway, it will get
420 * filled in as we go through the ports later. */
421 od
->gateway_port
= NULL
;
426 ovn_datapath_allocate_key(struct hmap
*dp_tnlids
)
428 static uint32_t hint
;
429 return allocate_tnlid(dp_tnlids
, "datapath", (1u << 24) - 1, &hint
);
432 /* Updates the southbound Datapath_Binding table so that it contains the
433 * logical switches and routers specified by the northbound database.
435 * Initializes 'datapaths' to contain a "struct ovn_datapath" for every logical
436 * switch and router. */
438 build_datapaths(struct northd_context
*ctx
, struct hmap
*datapaths
)
440 struct ovs_list sb_only
, nb_only
, both
;
442 join_datapaths(ctx
, datapaths
, &sb_only
, &nb_only
, &both
);
444 if (!ovs_list_is_empty(&nb_only
)) {
445 /* First index the in-use datapath tunnel IDs. */
446 struct hmap dp_tnlids
= HMAP_INITIALIZER(&dp_tnlids
);
447 struct ovn_datapath
*od
;
448 LIST_FOR_EACH (od
, list
, &both
) {
449 add_tnlid(&dp_tnlids
, od
->sb
->tunnel_key
);
452 /* Add southbound record for each unmatched northbound record. */
453 LIST_FOR_EACH (od
, list
, &nb_only
) {
454 uint16_t tunnel_key
= ovn_datapath_allocate_key(&dp_tnlids
);
459 od
->sb
= sbrec_datapath_binding_insert(ctx
->ovnsb_txn
);
461 char uuid_s
[UUID_LEN
+ 1];
462 sprintf(uuid_s
, UUID_FMT
, UUID_ARGS(&od
->key
));
463 const char *key
= od
->nbs
? "logical-switch" : "logical-router";
464 const struct smap id
= SMAP_CONST1(&id
, key
, uuid_s
);
465 sbrec_datapath_binding_set_external_ids(od
->sb
, &id
);
467 sbrec_datapath_binding_set_tunnel_key(od
->sb
, tunnel_key
);
469 destroy_tnlids(&dp_tnlids
);
472 /* Delete southbound records without northbound matches. */
473 struct ovn_datapath
*od
, *next
;
474 LIST_FOR_EACH_SAFE (od
, next
, list
, &sb_only
) {
475 ovs_list_remove(&od
->list
);
476 sbrec_datapath_binding_delete(od
->sb
);
477 ovn_datapath_destroy(datapaths
, od
);
482 struct hmap_node key_node
; /* Index on 'key'. */
483 char *key
; /* nbs->name, nbr->name, sb->logical_port. */
484 char *json_key
; /* 'key', quoted for use in JSON. */
486 const struct nbrec_logical_port
*nbs
; /* May be NULL. */
487 const struct nbrec_logical_router_port
*nbr
; /* May be NULL. */
488 const struct sbrec_port_binding
*sb
; /* May be NULL. */
490 /* Logical router port data. */
491 ovs_be32 ip
, mask
; /* 192.168.10.123/24. */
492 ovs_be32 network
; /* 192.168.10.0. */
493 ovs_be32 bcast
; /* 192.168.10.255. */
495 struct ovn_port
*peer
;
497 struct ovn_datapath
*od
;
499 struct ovs_list list
; /* In list of similar records. */
502 static struct ovn_port
*
503 ovn_port_create(struct hmap
*ports
, const char *key
,
504 const struct nbrec_logical_port
*nbs
,
505 const struct nbrec_logical_router_port
*nbr
,
506 const struct sbrec_port_binding
*sb
)
508 struct ovn_port
*op
= xzalloc(sizeof *op
);
510 struct ds json_key
= DS_EMPTY_INITIALIZER
;
511 json_string_escape(key
, &json_key
);
512 op
->json_key
= ds_steal_cstr(&json_key
);
514 op
->key
= xstrdup(key
);
518 hmap_insert(ports
, &op
->key_node
, hash_string(op
->key
, 0));
523 ovn_port_destroy(struct hmap
*ports
, struct ovn_port
*port
)
526 /* Don't remove port->list. It is used within build_ports() as a
527 * private list and once we've exited that function it is not safe to
529 hmap_remove(ports
, &port
->key_node
);
530 free(port
->json_key
);
536 static struct ovn_port
*
537 ovn_port_find(struct hmap
*ports
, const char *name
)
541 HMAP_FOR_EACH_WITH_HASH (op
, key_node
, hash_string(name
, 0), ports
) {
542 if (!strcmp(op
->key
, name
)) {
550 ovn_port_allocate_key(struct ovn_datapath
*od
)
552 return allocate_tnlid(&od
->port_tnlids
, "port",
553 (1u << 15) - 1, &od
->port_key_hint
);
557 join_logical_ports(struct northd_context
*ctx
,
558 struct hmap
*datapaths
, struct hmap
*ports
,
559 struct ovs_list
*sb_only
, struct ovs_list
*nb_only
,
560 struct ovs_list
*both
)
563 ovs_list_init(sb_only
);
564 ovs_list_init(nb_only
);
567 const struct sbrec_port_binding
*sb
;
568 SBREC_PORT_BINDING_FOR_EACH (sb
, ctx
->ovnsb_idl
) {
569 struct ovn_port
*op
= ovn_port_create(ports
, sb
->logical_port
,
571 ovs_list_push_back(sb_only
, &op
->list
);
574 struct ovn_datapath
*od
;
575 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
577 for (size_t i
= 0; i
< od
->nbs
->n_ports
; i
++) {
578 const struct nbrec_logical_port
*nbs
= od
->nbs
->ports
[i
];
579 struct ovn_port
*op
= ovn_port_find(ports
, nbs
->name
);
581 if (op
->nbs
|| op
->nbr
) {
582 static struct vlog_rate_limit rl
583 = VLOG_RATE_LIMIT_INIT(5, 1);
584 VLOG_WARN_RL(&rl
, "duplicate logical port %s",
589 ovs_list_remove(&op
->list
);
590 ovs_list_push_back(both
, &op
->list
);
592 op
= ovn_port_create(ports
, nbs
->name
, nbs
, NULL
, NULL
);
593 ovs_list_push_back(nb_only
, &op
->list
);
599 for (size_t i
= 0; i
< od
->nbr
->n_ports
; i
++) {
600 const struct nbrec_logical_router_port
*nbr
604 if (!eth_addr_from_string(nbr
->mac
, &mac
)) {
605 static struct vlog_rate_limit rl
606 = VLOG_RATE_LIMIT_INIT(5, 1);
607 VLOG_WARN_RL(&rl
, "bad 'mac' %s", nbr
->mac
);
612 char *error
= ip_parse_masked(nbr
->network
, &ip
, &mask
);
613 if (error
|| mask
== OVS_BE32_MAX
|| !ip_is_cidr(mask
)) {
614 static struct vlog_rate_limit rl
615 = VLOG_RATE_LIMIT_INIT(5, 1);
616 VLOG_WARN_RL(&rl
, "bad 'network' %s", nbr
->network
);
621 struct ovn_port
*op
= ovn_port_find(ports
, nbr
->name
);
623 if (op
->nbs
|| op
->nbr
) {
624 static struct vlog_rate_limit rl
625 = VLOG_RATE_LIMIT_INIT(5, 1);
626 VLOG_WARN_RL(&rl
, "duplicate logical router port %s",
631 ovs_list_remove(&op
->list
);
632 ovs_list_push_back(both
, &op
->list
);
634 op
= ovn_port_create(ports
, nbr
->name
, NULL
, nbr
, NULL
);
635 ovs_list_push_back(nb_only
, &op
->list
);
640 op
->network
= ip
& mask
;
641 op
->bcast
= ip
| ~mask
;
646 /* If 'od' has a gateway and 'op' routes to it... */
647 if (od
->gateway
&& !((op
->network
^ od
->gateway
) & op
->mask
)) {
648 /* ...and if 'op' is a longer match than the current
650 const struct ovn_port
*gw
= od
->gateway_port
;
651 int len
= gw
? ip_count_cidr_bits(gw
->mask
) : 0;
652 if (ip_count_cidr_bits(op
->mask
) > len
) {
653 /* ...then it's the default gateway port. */
654 od
->gateway_port
= op
;
661 /* Connect logical router ports, and logical switch ports of type "router",
664 HMAP_FOR_EACH (op
, key_node
, ports
) {
665 if (op
->nbs
&& !strcmp(op
->nbs
->type
, "router")) {
666 const char *peer_name
= smap_get(&op
->nbs
->options
, "router-port");
671 struct ovn_port
*peer
= ovn_port_find(ports
, peer_name
);
672 if (!peer
|| !peer
->nbr
) {
678 op
->od
->router_ports
= xrealloc(
679 op
->od
->router_ports
,
680 sizeof *op
->od
->router_ports
* (op
->od
->n_router_ports
+ 1));
681 op
->od
->router_ports
[op
->od
->n_router_ports
++] = op
;
682 } else if (op
->nbr
&& op
->nbr
->peer
) {
683 op
->peer
= ovn_port_find(ports
, op
->nbr
->peer
);
689 ovn_port_update_sbrec(const struct ovn_port
*op
)
691 sbrec_port_binding_set_datapath(op
->sb
, op
->od
->sb
);
693 sbrec_port_binding_set_type(op
->sb
, "patch");
695 const char *peer
= op
->peer
? op
->peer
->key
: "<error>";
696 const struct smap ids
= SMAP_CONST1(&ids
, "peer", peer
);
697 sbrec_port_binding_set_options(op
->sb
, &ids
);
699 sbrec_port_binding_set_parent_port(op
->sb
, NULL
);
700 sbrec_port_binding_set_tag(op
->sb
, NULL
, 0);
701 sbrec_port_binding_set_mac(op
->sb
, NULL
, 0);
703 if (strcmp(op
->nbs
->type
, "router")) {
704 sbrec_port_binding_set_type(op
->sb
, op
->nbs
->type
);
705 sbrec_port_binding_set_options(op
->sb
, &op
->nbs
->options
);
707 sbrec_port_binding_set_type(op
->sb
, "patch");
709 const char *router_port
= smap_get(&op
->nbs
->options
,
712 router_port
= "<error>";
714 const struct smap ids
= SMAP_CONST1(&ids
, "peer", router_port
);
715 sbrec_port_binding_set_options(op
->sb
, &ids
);
717 sbrec_port_binding_set_parent_port(op
->sb
, op
->nbs
->parent_name
);
718 sbrec_port_binding_set_tag(op
->sb
, op
->nbs
->tag
, op
->nbs
->n_tag
);
719 sbrec_port_binding_set_mac(op
->sb
, (const char **) op
->nbs
->addresses
,
720 op
->nbs
->n_addresses
);
724 /* Updates the southbound Port_Binding table so that it contains the logical
725 * ports specified by the northbound database.
727 * Initializes 'ports' to contain a "struct ovn_port" for every logical port,
728 * using the "struct ovn_datapath"s in 'datapaths' to look up logical
731 build_ports(struct northd_context
*ctx
, struct hmap
*datapaths
,
734 struct ovs_list sb_only
, nb_only
, both
;
736 join_logical_ports(ctx
, datapaths
, ports
, &sb_only
, &nb_only
, &both
);
738 /* For logical ports that are in both databases, update the southbound
739 * record based on northbound data. Also index the in-use tunnel_keys. */
740 struct ovn_port
*op
, *next
;
741 LIST_FOR_EACH_SAFE (op
, next
, list
, &both
) {
742 ovn_port_update_sbrec(op
);
744 add_tnlid(&op
->od
->port_tnlids
, op
->sb
->tunnel_key
);
745 if (op
->sb
->tunnel_key
> op
->od
->port_key_hint
) {
746 op
->od
->port_key_hint
= op
->sb
->tunnel_key
;
750 /* Add southbound record for each unmatched northbound record. */
751 LIST_FOR_EACH_SAFE (op
, next
, list
, &nb_only
) {
752 uint16_t tunnel_key
= ovn_port_allocate_key(op
->od
);
757 op
->sb
= sbrec_port_binding_insert(ctx
->ovnsb_txn
);
758 ovn_port_update_sbrec(op
);
760 sbrec_port_binding_set_logical_port(op
->sb
, op
->key
);
761 sbrec_port_binding_set_tunnel_key(op
->sb
, tunnel_key
);
764 /* Delete southbound records without northbound matches. */
765 LIST_FOR_EACH_SAFE(op
, next
, list
, &sb_only
) {
766 ovs_list_remove(&op
->list
);
767 sbrec_port_binding_delete(op
->sb
);
768 ovn_port_destroy(ports
, op
);
772 #define OVN_MIN_MULTICAST 32768
773 #define OVN_MAX_MULTICAST 65535
775 struct multicast_group
{
777 uint16_t key
; /* OVN_MIN_MULTICAST...OVN_MAX_MULTICAST. */
780 #define MC_FLOOD "_MC_flood"
781 static const struct multicast_group mc_flood
= { MC_FLOOD
, 65535 };
783 #define MC_UNKNOWN "_MC_unknown"
784 static const struct multicast_group mc_unknown
= { MC_UNKNOWN
, 65534 };
787 multicast_group_equal(const struct multicast_group
*a
,
788 const struct multicast_group
*b
)
790 return !strcmp(a
->name
, b
->name
) && a
->key
== b
->key
;
793 /* Multicast group entry. */
794 struct ovn_multicast
{
795 struct hmap_node hmap_node
; /* Index on 'datapath' and 'key'. */
796 struct ovn_datapath
*datapath
;
797 const struct multicast_group
*group
;
799 struct ovn_port
**ports
;
800 size_t n_ports
, allocated_ports
;
804 ovn_multicast_hash(const struct ovn_datapath
*datapath
,
805 const struct multicast_group
*group
)
807 return hash_pointer(datapath
, group
->key
);
810 static struct ovn_multicast
*
811 ovn_multicast_find(struct hmap
*mcgroups
, struct ovn_datapath
*datapath
,
812 const struct multicast_group
*group
)
814 struct ovn_multicast
*mc
;
816 HMAP_FOR_EACH_WITH_HASH (mc
, hmap_node
,
817 ovn_multicast_hash(datapath
, group
), mcgroups
) {
818 if (mc
->datapath
== datapath
819 && multicast_group_equal(mc
->group
, group
)) {
827 ovn_multicast_add(struct hmap
*mcgroups
, const struct multicast_group
*group
,
828 struct ovn_port
*port
)
830 struct ovn_datapath
*od
= port
->od
;
831 struct ovn_multicast
*mc
= ovn_multicast_find(mcgroups
, od
, group
);
833 mc
= xmalloc(sizeof *mc
);
834 hmap_insert(mcgroups
, &mc
->hmap_node
, ovn_multicast_hash(od
, group
));
838 mc
->allocated_ports
= 4;
839 mc
->ports
= xmalloc(mc
->allocated_ports
* sizeof *mc
->ports
);
841 if (mc
->n_ports
>= mc
->allocated_ports
) {
842 mc
->ports
= x2nrealloc(mc
->ports
, &mc
->allocated_ports
,
845 mc
->ports
[mc
->n_ports
++] = port
;
849 ovn_multicast_destroy(struct hmap
*mcgroups
, struct ovn_multicast
*mc
)
852 hmap_remove(mcgroups
, &mc
->hmap_node
);
859 ovn_multicast_update_sbrec(const struct ovn_multicast
*mc
,
860 const struct sbrec_multicast_group
*sb
)
862 struct sbrec_port_binding
**ports
= xmalloc(mc
->n_ports
* sizeof *ports
);
863 for (size_t i
= 0; i
< mc
->n_ports
; i
++) {
864 ports
[i
] = CONST_CAST(struct sbrec_port_binding
*, mc
->ports
[i
]->sb
);
866 sbrec_multicast_group_set_ports(sb
, ports
, mc
->n_ports
);
870 /* Logical flow generation.
872 * This code generates the Logical_Flow table in the southbound database, as a
873 * function of most of the northbound database.
877 struct hmap_node hmap_node
;
879 struct ovn_datapath
*od
;
880 enum ovn_stage stage
;
887 ovn_lflow_hash(const struct ovn_lflow
*lflow
)
889 size_t hash
= uuid_hash(&lflow
->od
->key
);
890 hash
= hash_2words((lflow
->stage
<< 16) | lflow
->priority
, hash
);
891 hash
= hash_string(lflow
->match
, hash
);
892 return hash_string(lflow
->actions
, hash
);
896 ovn_lflow_equal(const struct ovn_lflow
*a
, const struct ovn_lflow
*b
)
898 return (a
->od
== b
->od
899 && a
->stage
== b
->stage
900 && a
->priority
== b
->priority
901 && !strcmp(a
->match
, b
->match
)
902 && !strcmp(a
->actions
, b
->actions
));
906 ovn_lflow_init(struct ovn_lflow
*lflow
, struct ovn_datapath
*od
,
907 enum ovn_stage stage
, uint16_t priority
,
908 char *match
, char *actions
)
911 lflow
->stage
= stage
;
912 lflow
->priority
= priority
;
913 lflow
->match
= match
;
914 lflow
->actions
= actions
;
917 /* Adds a row with the specified contents to the Logical_Flow table. */
919 ovn_lflow_add(struct hmap
*lflow_map
, struct ovn_datapath
*od
,
920 enum ovn_stage stage
, uint16_t priority
,
921 const char *match
, const char *actions
)
923 struct ovn_lflow
*lflow
= xmalloc(sizeof *lflow
);
924 ovn_lflow_init(lflow
, od
, stage
, priority
,
925 xstrdup(match
), xstrdup(actions
));
926 hmap_insert(lflow_map
, &lflow
->hmap_node
, ovn_lflow_hash(lflow
));
929 static struct ovn_lflow
*
930 ovn_lflow_find(struct hmap
*lflows
, struct ovn_datapath
*od
,
931 enum ovn_stage stage
, uint16_t priority
,
932 const char *match
, const char *actions
)
934 struct ovn_lflow target
;
935 ovn_lflow_init(&target
, od
, stage
, priority
,
936 CONST_CAST(char *, match
), CONST_CAST(char *, actions
));
938 struct ovn_lflow
*lflow
;
939 HMAP_FOR_EACH_WITH_HASH (lflow
, hmap_node
, ovn_lflow_hash(&target
),
941 if (ovn_lflow_equal(lflow
, &target
)) {
949 ovn_lflow_destroy(struct hmap
*lflows
, struct ovn_lflow
*lflow
)
952 hmap_remove(lflows
, &lflow
->hmap_node
);
954 free(lflow
->actions
);
959 struct ipv4_netaddr
{
964 struct ipv6_netaddr
{
965 struct in6_addr addr
;
969 struct lport_addresses
{
972 struct ipv4_netaddr
*ipv4_addrs
;
974 struct ipv6_netaddr
*ipv6_addrs
;
978 * Extracts the mac, ipv4 and ipv6 addresses from the input param 'address'
979 * which should be of the format 'MAC [IP1 IP2 ..]" where IPn should be
980 * a valid IPv4 or IPv6 address and stores them in the 'ipv4_addrs' and
981 * 'ipv6_addrs' fields of input param 'laddrs'.
982 * The caller has to free the 'ipv4_addrs' and 'ipv6_addrs' fields.
983 * If input param 'store_ipv6' is true only then extracted ipv6 addresses
984 * are stored in 'ipv6_addrs' fields.
985 * Return true if at least 'MAC' is found in 'address', false otherwise.
987 * If 'address' = '00:00:00:00:00:01 10.0.0.4 fe80::ea2a:eaff:fe28:3390/64
988 * 30.0.0.3/23' and 'store_ipv6' = true
989 * then returns true with laddrs->n_ipv4_addrs = 2, naddrs->n_ipv6_addrs = 1.
992 * If 'address' = '00:00:00:00:00:01 10.0.0.4 fe80::ea2a:eaff:fe28:3390/64
993 * 30.0.0.3/23' and 'store_ipv6' = false
994 * then returns true with laddrs->n_ipv4_addrs = 2, naddrs->n_ipv6_addrs = 0.
996 * Eg 3. If 'address' = '00:00:00:00:00:01 10.0.0.4 addr 30.0.0.4', then
997 * returns true with laddrs->n_ipv4_addrs = 1 and laddrs->n_ipv6_addrs = 0.
1000 extract_lport_addresses(char *address
, struct lport_addresses
*laddrs
,
1003 char *buf
= address
;
1005 char *buf_end
= buf
+ strlen(address
);
1006 if (!ovs_scan_len(buf
, &buf_index
, ETH_ADDR_SCAN_FMT
,
1007 ETH_ADDR_SCAN_ARGS(laddrs
->ea
))) {
1012 struct in6_addr ip6
;
1016 laddrs
->n_ipv4_addrs
= 0;
1017 laddrs
->n_ipv6_addrs
= 0;
1018 laddrs
->ipv4_addrs
= NULL
;
1019 laddrs
->ipv6_addrs
= NULL
;
1021 /* Loop through the buffer and extract the IPv4/IPv6 addresses
1022 * and store in the 'laddrs'. Break the loop if invalid data is found.
1025 while (buf
< buf_end
) {
1027 error
= ip_parse_cidr_len(buf
, &buf_index
, &ip4
, &plen
);
1029 laddrs
->n_ipv4_addrs
++;
1030 laddrs
->ipv4_addrs
= xrealloc(
1032 sizeof (struct ipv4_netaddr
) * laddrs
->n_ipv4_addrs
);
1033 laddrs
->ipv4_addrs
[laddrs
->n_ipv4_addrs
- 1].addr
= ip4
;
1034 laddrs
->ipv4_addrs
[laddrs
->n_ipv4_addrs
- 1].plen
= plen
;
1039 error
= ipv6_parse_cidr_len(buf
, &buf_index
, &ip6
, &plen
);
1040 if (!error
&& store_ipv6
) {
1041 laddrs
->n_ipv6_addrs
++;
1042 laddrs
->ipv6_addrs
= xrealloc(
1044 sizeof(struct ipv6_netaddr
) * laddrs
->n_ipv6_addrs
);
1045 memcpy(&laddrs
->ipv6_addrs
[laddrs
->n_ipv6_addrs
- 1].addr
, &ip6
,
1046 sizeof(struct in6_addr
));
1047 laddrs
->ipv6_addrs
[laddrs
->n_ipv6_addrs
- 1].plen
= plen
;
1051 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
1052 VLOG_INFO_RL(&rl
, "invalid syntax '%s' in address", address
);
1062 /* Appends port security constraints on L2 address field 'eth_addr_field'
1063 * (e.g. "eth.src" or "eth.dst") to 'match'. 'port_security', with
1064 * 'n_port_security' elements, is the collection of port_security constraints
1065 * from an OVN_NB Logical_Port row. */
1067 build_port_security_l2(const char *eth_addr_field
,
1068 char **port_security
, size_t n_port_security
,
1071 size_t base_len
= match
->length
;
1072 ds_put_format(match
, " && %s == {", eth_addr_field
);
1075 for (size_t i
= 0; i
< n_port_security
; i
++) {
1078 if (eth_addr_from_string(port_security
[i
], &ea
)) {
1079 ds_put_format(match
, ETH_ADDR_FMT
, ETH_ADDR_ARGS(ea
));
1080 ds_put_char(match
, ' ');
1084 ds_chomp(match
, ' ');
1085 ds_put_cstr(match
, "}");
1088 match
->length
= base_len
;
1093 build_port_security_ipv6_nd_flow(
1094 struct ds
*match
, struct eth_addr ea
, struct ipv6_netaddr
*ipv6_addrs
,
1097 ds_put_format(match
, " && ip6 && nd && ((nd.sll == "ETH_ADDR_FMT
" || "
1098 "nd.sll == "ETH_ADDR_FMT
") || ((nd.tll == "ETH_ADDR_FMT
" || "
1099 "nd.tll == "ETH_ADDR_FMT
")", ETH_ADDR_ARGS(eth_addr_zero
),
1100 ETH_ADDR_ARGS(ea
), ETH_ADDR_ARGS(eth_addr_zero
),
1102 if (!n_ipv6_addrs
) {
1103 ds_put_cstr(match
, "))");
1107 char ip6_str
[INET6_ADDRSTRLEN
+ 1];
1108 struct in6_addr lla
;
1109 in6_generate_lla(ea
, &lla
);
1110 memset(ip6_str
, 0, sizeof(ip6_str
));
1111 ipv6_string_mapped(ip6_str
, &lla
);
1112 ds_put_format(match
, " && (nd.target == %s", ip6_str
);
1114 for(int i
= 0; i
< n_ipv6_addrs
; i
++) {
1115 memset(ip6_str
, 0, sizeof(ip6_str
));
1116 ipv6_string_mapped(ip6_str
, &ipv6_addrs
[i
].addr
);
1117 ds_put_format(match
, " || nd.target == %s", ip6_str
);
1120 ds_put_format(match
, ")))");
1124 build_port_security_ipv6_flow(
1125 enum ovn_pipeline pipeline
, struct ds
*match
, struct eth_addr ea
,
1126 struct ipv6_netaddr
*ipv6_addrs
, int n_ipv6_addrs
)
1128 char ip6_str
[INET6_ADDRSTRLEN
+ 1];
1130 ds_put_format(match
, " && %s == {",
1131 pipeline
== P_IN
? "ip6.src" : "ip6.dst");
1133 /* Allow link-local address. */
1134 struct in6_addr lla
;
1135 in6_generate_lla(ea
, &lla
);
1136 ipv6_string_mapped(ip6_str
, &lla
);
1137 ds_put_format(match
, "%s, ", ip6_str
);
1139 /* Allow ip6.src=:: and ip6.dst=ff00::/8 for ND packets */
1140 ds_put_cstr(match
, pipeline
== P_IN
? "::" : "ff00::/8");
1141 for(int i
= 0; i
< n_ipv6_addrs
; i
++) {
1142 ipv6_string_mapped(ip6_str
, &ipv6_addrs
[i
].addr
);
1143 ds_put_format(match
, ", %s", ip6_str
);
1145 ds_put_cstr(match
, "}");
1149 * Build port security constraints on ARP and IPv6 ND fields
1150 * and add logical flows to S_SWITCH_IN_PORT_SEC_ND stage.
1152 * For each port security of the logical port, following
1153 * logical flows are added
1154 * - If the port security has no IP (both IPv4 and IPv6) or
1155 * if it has IPv4 address(es)
1156 * - Priority 90 flow to allow ARP packets for known MAC addresses
1157 * in the eth.src and arp.spa fields. If the port security
1158 * has IPv4 addresses, allow known IPv4 addresses in the arp.tpa field.
1160 * - If the port security has no IP (both IPv4 and IPv6) or
1161 * if it has IPv6 address(es)
1162 * - Priority 90 flow to allow IPv6 ND packets for known MAC addresses
1163 * in the eth.src and nd.sll/nd.tll fields. If the port security
1164 * has IPv6 addresses, allow known IPv6 addresses in the nd.target field
1165 * for IPv6 Neighbor Advertisement packet.
1167 * - Priority 80 flow to drop ARP and IPv6 ND packets.
1170 build_port_security_nd(struct ovn_port
*op
, struct hmap
*lflows
)
1172 for (size_t i
= 0; i
< op
->nbs
->n_port_security
; i
++) {
1173 struct lport_addresses ps
;
1174 if (!extract_lport_addresses(op
->nbs
->port_security
[i
], &ps
, true)) {
1175 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
1176 VLOG_INFO_RL(&rl
, "invalid syntax '%s' in port security. No MAC"
1177 " address found", op
->nbs
->port_security
[i
]);
1181 bool no_ip
= !(ps
.n_ipv4_addrs
|| ps
.n_ipv6_addrs
);
1182 struct ds match
= DS_EMPTY_INITIALIZER
;
1184 if (ps
.n_ipv4_addrs
|| no_ip
) {
1186 &match
, "inport == %s && eth.src == "ETH_ADDR_FMT
" && arp.sha == "
1187 ETH_ADDR_FMT
, op
->json_key
, ETH_ADDR_ARGS(ps
.ea
),
1188 ETH_ADDR_ARGS(ps
.ea
));
1190 if (ps
.n_ipv4_addrs
) {
1191 ds_put_cstr(&match
, " && (");
1192 for (size_t i
= 0; i
< ps
.n_ipv4_addrs
; i
++) {
1193 ds_put_cstr(&match
, "arp.spa == ");
1194 ovs_be32 mask
= be32_prefix_mask(ps
.ipv4_addrs
[i
].plen
);
1195 /* When the netmask is applied, if the host portion is
1196 * non-zero, the host can only use the specified
1197 * address in the arp.spa. If zero, the host is allowed
1198 * to use any address in the subnet. */
1199 if (ps
.ipv4_addrs
[i
].addr
& ~mask
) {
1200 ds_put_format(&match
, IP_FMT
,
1201 IP_ARGS(ps
.ipv4_addrs
[i
].addr
));
1203 ip_format_masked(ps
.ipv4_addrs
[i
].addr
& mask
, mask
,
1206 ds_put_cstr(&match
, " || ");
1208 ds_chomp(&match
, ' ');
1209 ds_chomp(&match
, '|');
1210 ds_chomp(&match
, '|');
1211 ds_put_cstr(&match
, ")");
1213 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_PORT_SEC_ND
, 90,
1214 ds_cstr(&match
), "next;");
1218 if (ps
.n_ipv6_addrs
|| no_ip
) {
1220 ds_put_format(&match
, "inport == %s && eth.src == "ETH_ADDR_FMT
,
1221 op
->json_key
, ETH_ADDR_ARGS(ps
.ea
));
1222 build_port_security_ipv6_nd_flow(&match
, ps
.ea
, ps
.ipv6_addrs
,
1224 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_PORT_SEC_ND
, 90,
1225 ds_cstr(&match
), "next;");
1228 free(ps
.ipv4_addrs
);
1229 free(ps
.ipv6_addrs
);
1232 char *match
= xasprintf("inport == %s && (arp || nd)", op
->json_key
);
1233 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_PORT_SEC_ND
, 80,
1239 * Build port security constraints on IPv4 and IPv6 src and dst fields
1240 * and add logical flows to S_SWITCH_(IN/OUT)_PORT_SEC_IP stage.
1242 * For each port security of the logical port, following
1243 * logical flows are added
1244 * - If the port security has IPv4 addresses,
1245 * - Priority 90 flow to allow IPv4 packets for known IPv4 addresses
1247 * - If the port security has IPv6 addresses,
1248 * - Priority 90 flow to allow IPv6 packets for known IPv6 addresses
1250 * - If the port security has IPv4 addresses or IPv6 addresses or both
1251 * - Priority 80 flow to drop all IPv4 and IPv6 traffic
1254 build_port_security_ip(enum ovn_pipeline pipeline
, struct ovn_port
*op
,
1255 struct hmap
*lflows
)
1257 char *port_direction
;
1258 enum ovn_stage stage
;
1259 if (pipeline
== P_IN
) {
1260 port_direction
= "inport";
1261 stage
= S_SWITCH_IN_PORT_SEC_IP
;
1263 port_direction
= "outport";
1264 stage
= S_SWITCH_OUT_PORT_SEC_IP
;
1267 for (size_t i
= 0; i
< op
->nbs
->n_port_security
; i
++) {
1268 struct lport_addresses ps
;
1269 if (!extract_lport_addresses(op
->nbs
->port_security
[i
], &ps
, true)) {
1273 if (!(ps
.n_ipv4_addrs
|| ps
.n_ipv6_addrs
)) {
1277 if (ps
.n_ipv4_addrs
) {
1278 struct ds match
= DS_EMPTY_INITIALIZER
;
1279 if (pipeline
== P_IN
) {
1280 ds_put_format(&match
, "inport == %s && eth.src == "ETH_ADDR_FMT
1281 " && ip4.src == {0.0.0.0, ", op
->json_key
,
1282 ETH_ADDR_ARGS(ps
.ea
));
1284 ds_put_format(&match
, "outport == %s && eth.dst == "ETH_ADDR_FMT
1285 " && ip4.dst == {255.255.255.255, 224.0.0.0/4, ",
1286 op
->json_key
, ETH_ADDR_ARGS(ps
.ea
));
1289 for (int i
= 0; i
< ps
.n_ipv4_addrs
; i
++) {
1290 ovs_be32 mask
= be32_prefix_mask(ps
.ipv4_addrs
[i
].plen
);
1291 /* When the netmask is applied, if the host portion is
1292 * non-zero, the host can only use the specified
1293 * address. If zero, the host is allowed to use any
1294 * address in the subnet.
1296 if (ps
.ipv4_addrs
[i
].addr
& ~mask
) {
1297 ds_put_format(&match
, IP_FMT
,
1298 IP_ARGS(ps
.ipv4_addrs
[i
].addr
));
1299 if (pipeline
== P_OUT
&& ps
.ipv4_addrs
[i
].plen
!= 32) {
1300 /* Host is also allowed to receive packets to the
1301 * broadcast address in the specified subnet.
1303 ds_put_format(&match
, ", "IP_FMT
,
1304 IP_ARGS(ps
.ipv4_addrs
[i
].addr
| ~mask
));
1307 /* host portion is zero */
1308 ip_format_masked(ps
.ipv4_addrs
[i
].addr
& mask
, mask
,
1311 ds_put_cstr(&match
, ", ");
1314 /* Replace ", " by "}". */
1315 ds_chomp(&match
, ' ');
1316 ds_chomp(&match
, ',');
1317 ds_put_cstr(&match
, "}");
1318 ovn_lflow_add(lflows
, op
->od
, stage
, 90, ds_cstr(&match
), "next;");
1320 free(ps
.ipv4_addrs
);
1323 if (ps
.n_ipv6_addrs
) {
1324 struct ds match
= DS_EMPTY_INITIALIZER
;
1325 ds_put_format(&match
, "%s == %s && %s == "ETH_ADDR_FMT
"",
1326 port_direction
, op
->json_key
,
1327 pipeline
== P_IN
? "eth.src" : "eth.dst",
1328 ETH_ADDR_ARGS(ps
.ea
));
1329 build_port_security_ipv6_flow(pipeline
, &match
, ps
.ea
,
1330 ps
.ipv6_addrs
, ps
.n_ipv6_addrs
);
1331 ovn_lflow_add(lflows
, op
->od
, stage
, 90,
1332 ds_cstr(&match
), "next;");
1334 free(ps
.ipv6_addrs
);
1337 char *match
= xasprintf(
1338 "%s == %s && %s == "ETH_ADDR_FMT
" && ip", port_direction
,
1339 op
->json_key
, pipeline
== P_IN
? "eth.src" : "eth.dst",
1340 ETH_ADDR_ARGS(ps
.ea
));
1341 ovn_lflow_add(lflows
, op
->od
, stage
, 80, match
, "drop;");
1347 lport_is_enabled(const struct nbrec_logical_port
*lport
)
1349 return !lport
->enabled
|| *lport
->enabled
;
1353 lport_is_up(const struct nbrec_logical_port
*lport
)
1355 return !lport
->up
|| *lport
->up
;
1359 has_stateful_acl(struct ovn_datapath
*od
)
1361 for (size_t i
= 0; i
< od
->nbs
->n_acls
; i
++) {
1362 struct nbrec_acl
*acl
= od
->nbs
->acls
[i
];
1363 if (!strcmp(acl
->action
, "allow-related")) {
1372 build_acls(struct ovn_datapath
*od
, struct hmap
*lflows
, struct hmap
*ports
)
1374 bool has_stateful
= has_stateful_acl(od
);
1375 struct ovn_port
*op
;
1377 /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are
1378 * allowed by default. */
1379 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_PRE_ACL
, 0, "1", "next;");
1380 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_PRE_ACL
, 0, "1", "next;");
1382 /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
1383 * default. A related rule at priority 1 is added below if there
1384 * are any stateful ACLs in this datapath. */
1385 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_ACL
, 0, "1", "next;");
1386 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_ACL
, 0, "1", "next;");
1388 /* If there are any stateful ACL rules in this dapapath, we must
1389 * send all IP packets through the conntrack action, which handles
1390 * defragmentation, in order to match L4 headers. */
1392 HMAP_FOR_EACH (op
, key_node
, ports
) {
1393 if (op
->od
== od
&& !strcmp(op
->nbs
->type
, "router")) {
1394 /* Can't use ct() for router ports. Consider the
1395 * following configuration: lp1(10.0.0.2) on
1396 * hostA--ls1--lr0--ls2--lp2(10.0.1.2) on hostB, For a
1397 * ping from lp1 to lp2, First, the response will go
1398 * through ct() with a zone for lp2 in the ls2 ingress
1399 * pipeline on hostB. That ct zone knows about this
1400 * connection. Next, it goes through ct() with the zone
1401 * for the router port in the egress pipeline of ls2 on
1402 * hostB. This zone does not know about the connection,
1403 * as the icmp request went through the logical router
1404 * on hostA, not hostB. This would only work with
1405 * distributed conntrack state across all chassis. */
1406 struct ds match_in
= DS_EMPTY_INITIALIZER
;
1407 struct ds match_out
= DS_EMPTY_INITIALIZER
;
1409 ds_put_format(&match_in
, "ip && inport == %s", op
->json_key
);
1410 ds_put_format(&match_out
, "ip && outport == %s", op
->json_key
);
1411 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_PRE_ACL
, 110,
1412 ds_cstr(&match_in
), "next;");
1413 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_PRE_ACL
, 110,
1414 ds_cstr(&match_out
), "next;");
1416 ds_destroy(&match_in
);
1417 ds_destroy(&match_out
);
1421 /* Ingress and Egress Pre-ACL Table (Priority 100).
1423 * Regardless of whether the ACL is "from-lport" or "to-lport",
1424 * we need rules in both the ingress and egress table, because
1425 * the return traffic needs to be followed. */
1426 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_PRE_ACL
, 100, "ip", "ct_next;");
1427 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_PRE_ACL
, 100, "ip", "ct_next;");
1429 /* Ingress and Egress ACL Table (Priority 1).
1431 * By default, traffic is allowed. This is partially handled by
1432 * the Priority 0 ACL flows added earlier, but we also need to
1433 * commit IP flows. This is because, while the initiater's
1434 * direction may not have any stateful rules, the server's may
1435 * and then its return traffic would not have an associated
1436 * conntrack entry and would return "+invalid". */
1437 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_ACL
, 1, "ip",
1438 "ct_commit; next;");
1439 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_ACL
, 1, "ip",
1440 "ct_commit; next;");
1442 /* Ingress and Egress ACL Table (Priority 65535).
1444 * Always drop traffic that's in an invalid state. This is
1445 * enforced at a higher priority than ACLs can be defined. */
1446 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_ACL
, UINT16_MAX
,
1448 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_ACL
, UINT16_MAX
,
1451 /* Ingress and Egress ACL Table (Priority 65535).
1453 * Always allow traffic that is established to a committed
1454 * conntrack entry. This is enforced at a higher priority than
1455 * ACLs can be defined. */
1456 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_ACL
, UINT16_MAX
,
1457 "ct.est && !ct.rel && !ct.new && !ct.inv",
1459 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_ACL
, UINT16_MAX
,
1460 "ct.est && !ct.rel && !ct.new && !ct.inv",
1463 /* Ingress and Egress ACL Table (Priority 65535).
1465 * Always allow traffic that is related to an existing conntrack
1466 * entry. This is enforced at a higher priority than ACLs can
1469 * NOTE: This does not support related data sessions (eg,
1470 * a dynamically negotiated FTP data channel), but will allow
1471 * related traffic such as an ICMP Port Unreachable through
1472 * that's generated from a non-listening UDP port. */
1473 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_ACL
, UINT16_MAX
,
1474 "!ct.est && ct.rel && !ct.new && !ct.inv",
1476 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_ACL
, UINT16_MAX
,
1477 "!ct.est && ct.rel && !ct.new && !ct.inv",
1481 /* Ingress or Egress ACL Table (Various priorities). */
1482 for (size_t i
= 0; i
< od
->nbs
->n_acls
; i
++) {
1483 struct nbrec_acl
*acl
= od
->nbs
->acls
[i
];
1484 bool ingress
= !strcmp(acl
->direction
, "from-lport") ? true :false;
1485 enum ovn_stage stage
= ingress
? S_SWITCH_IN_ACL
: S_SWITCH_OUT_ACL
;
1487 if (!strcmp(acl
->action
, "allow")) {
1488 /* If there are any stateful flows, we must even commit "allow"
1489 * actions. This is because, while the initiater's
1490 * direction may not have any stateful rules, the server's
1491 * may and then its return traffic would not have an
1492 * associated conntrack entry and would return "+invalid". */
1493 const char *actions
= has_stateful
? "ct_commit; next;" : "next;";
1494 ovn_lflow_add(lflows
, od
, stage
,
1495 acl
->priority
+ OVN_ACL_PRI_OFFSET
,
1496 acl
->match
, actions
);
1497 } else if (!strcmp(acl
->action
, "allow-related")) {
1498 struct ds match
= DS_EMPTY_INITIALIZER
;
1500 /* Commit the connection tracking entry, which allows all
1501 * other traffic related to this entry to flow due to the
1502 * 65535 priority flow defined earlier. */
1503 ds_put_format(&match
, "ct.new && (%s)", acl
->match
);
1504 ovn_lflow_add(lflows
, od
, stage
,
1505 acl
->priority
+ OVN_ACL_PRI_OFFSET
,
1506 ds_cstr(&match
), "ct_commit; next;");
1509 } else if (!strcmp(acl
->action
, "drop")) {
1510 ovn_lflow_add(lflows
, od
, stage
,
1511 acl
->priority
+ OVN_ACL_PRI_OFFSET
,
1512 acl
->match
, "drop;");
1513 } else if (!strcmp(acl
->action
, "reject")) {
1514 /* xxx Need to support "reject". */
1515 VLOG_INFO("reject is not a supported action");
1516 ovn_lflow_add(lflows
, od
, stage
,
1517 acl
->priority
+ OVN_ACL_PRI_OFFSET
,
1518 acl
->match
, "drop;");
1524 build_lswitch_flows(struct hmap
*datapaths
, struct hmap
*ports
,
1525 struct hmap
*lflows
, struct hmap
*mcgroups
)
1527 /* This flow table structure is documented in ovn-northd(8), so please
1528 * update ovn-northd.8.xml if you change anything. */
1530 /* Build pre-ACL and ACL tables for both ingress and egress.
1531 * Ingress tables 3 and 4. Egress tables 0 and 1. */
1532 struct ovn_datapath
*od
;
1533 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
1538 build_acls(od
, lflows
, ports
);
1541 /* Logical switch ingress table 0: Admission control framework (priority
1543 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
1548 /* Logical VLANs not supported. */
1549 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_PORT_SEC_L2
, 100, "vlan.present",
1552 /* Broadcast/multicast source address is invalid. */
1553 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_PORT_SEC_L2
, 100, "eth.src[40]",
1556 /* Port security flows have priority 50 (see below) and will continue
1557 * to the next table if packet source is acceptable. */
1560 /* Logical switch ingress table 0: Ingress port security - L2
1562 * Ingress table 1: Ingress port security - IP (priority 90 and 80)
1563 * Ingress table 2: Ingress port security - ND (priority 90 and 80)
1565 struct ovn_port
*op
;
1566 HMAP_FOR_EACH (op
, key_node
, ports
) {
1571 if (!lport_is_enabled(op
->nbs
)) {
1572 /* Drop packets from disabled logical ports (since logical flow
1573 * tables are default-drop). */
1577 struct ds match
= DS_EMPTY_INITIALIZER
;
1578 ds_put_format(&match
, "inport == %s", op
->json_key
);
1579 build_port_security_l2(
1580 "eth.src", op
->nbs
->port_security
, op
->nbs
->n_port_security
,
1582 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_PORT_SEC_L2
, 50,
1583 ds_cstr(&match
), "next;");
1586 if (op
->nbs
->n_port_security
) {
1587 build_port_security_ip(P_IN
, op
, lflows
);
1588 build_port_security_nd(op
, lflows
);
1592 /* Ingress table 1 and 2: Port security - IP and ND, by default goto next.
1594 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
1599 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_PORT_SEC_ND
, 0, "1", "next;");
1600 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_PORT_SEC_IP
, 0, "1", "next;");
1603 /* Ingress table 3: ARP responder, skip requests coming from localnet ports.
1604 * (priority 100). */
1605 HMAP_FOR_EACH (op
, key_node
, ports
) {
1610 if (!strcmp(op
->nbs
->type
, "localnet")) {
1611 char *match
= xasprintf("inport == %s", op
->json_key
);
1612 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_ARP_RSP
, 100,
1618 /* Ingress table 5: ARP responder, reply for known IPs.
1620 HMAP_FOR_EACH (op
, key_node
, ports
) {
1626 * Add ARP reply flows if either the
1628 * - port type is router
1630 if (!lport_is_up(op
->nbs
) && strcmp(op
->nbs
->type
, "router")) {
1634 for (size_t i
= 0; i
< op
->nbs
->n_addresses
; i
++) {
1635 struct lport_addresses laddrs
;
1636 if (!extract_lport_addresses(op
->nbs
->addresses
[i
], &laddrs
,
1640 for (size_t j
= 0; j
< laddrs
.n_ipv4_addrs
; j
++) {
1641 char *match
= xasprintf(
1642 "arp.tpa == "IP_FMT
" && arp.op == 1",
1643 IP_ARGS(laddrs
.ipv4_addrs
[j
].addr
));
1644 char *actions
= xasprintf(
1645 "eth.dst = eth.src; "
1646 "eth.src = "ETH_ADDR_FMT
"; "
1647 "arp.op = 2; /* ARP reply */ "
1648 "arp.tha = arp.sha; "
1649 "arp.sha = "ETH_ADDR_FMT
"; "
1650 "arp.tpa = arp.spa; "
1651 "arp.spa = "IP_FMT
"; "
1652 "outport = inport; "
1653 "inport = \"\"; /* Allow sending out inport. */ "
1655 ETH_ADDR_ARGS(laddrs
.ea
),
1656 ETH_ADDR_ARGS(laddrs
.ea
),
1657 IP_ARGS(laddrs
.ipv4_addrs
[j
].addr
));
1658 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_ARP_RSP
, 50,
1664 free(laddrs
.ipv4_addrs
);
1668 /* Ingress table 5: ARP responder, by default goto next.
1670 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
1675 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_ARP_RSP
, 0, "1", "next;");
1678 /* Ingress table 6: Destination lookup, broadcast and multicast handling
1679 * (priority 100). */
1680 HMAP_FOR_EACH (op
, key_node
, ports
) {
1685 if (lport_is_enabled(op
->nbs
)) {
1686 ovn_multicast_add(mcgroups
, &mc_flood
, op
);
1689 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
1694 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_L2_LKUP
, 100, "eth.mcast",
1695 "outport = \""MC_FLOOD
"\"; output;");
1698 /* Ingress table 6: Destination lookup, unicast handling (priority 50), */
1699 HMAP_FOR_EACH (op
, key_node
, ports
) {
1704 for (size_t i
= 0; i
< op
->nbs
->n_addresses
; i
++) {
1705 struct eth_addr mac
;
1707 if (eth_addr_from_string(op
->nbs
->addresses
[i
], &mac
)) {
1708 struct ds match
, actions
;
1711 ds_put_format(&match
, "eth.dst == "ETH_ADDR_FMT
,
1712 ETH_ADDR_ARGS(mac
));
1715 ds_put_format(&actions
, "outport = %s; output;", op
->json_key
);
1716 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_L2_LKUP
, 50,
1717 ds_cstr(&match
), ds_cstr(&actions
));
1718 ds_destroy(&actions
);
1720 } else if (!strcmp(op
->nbs
->addresses
[i
], "unknown")) {
1721 if (lport_is_enabled(op
->nbs
)) {
1722 ovn_multicast_add(mcgroups
, &mc_unknown
, op
);
1723 op
->od
->has_unknown
= true;
1726 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
1729 "%s: invalid syntax '%s' in addresses column",
1730 op
->nbs
->name
, op
->nbs
->addresses
[i
]);
1735 /* Ingress table 6: Destination lookup for unknown MACs (priority 0). */
1736 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
1741 if (od
->has_unknown
) {
1742 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_L2_LKUP
, 0, "1",
1743 "outport = \""MC_UNKNOWN
"\"; output;");
1747 /* Egress table 2: Egress port security - IP (priority 0)
1748 * port security L2 - multicast/broadcast (priority
1750 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
1755 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_PORT_SEC_IP
, 0, "1", "next;");
1756 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_PORT_SEC_L2
, 100, "eth.mcast",
1760 /* Egress table 2: Egress port security - IP (priorities 90 and 80)
1761 * if port security enabled.
1763 * Egress table 3: Egress port security - L2 (priorities 50 and 150).
1765 * Priority 50 rules implement port security for enabled logical port.
1767 * Priority 150 rules drop packets to disabled logical ports, so that they
1768 * don't even receive multicast or broadcast packets. */
1769 HMAP_FOR_EACH (op
, key_node
, ports
) {
1774 struct ds match
= DS_EMPTY_INITIALIZER
;
1775 ds_put_format(&match
, "outport == %s", op
->json_key
);
1776 if (lport_is_enabled(op
->nbs
)) {
1777 build_port_security_l2("eth.dst", op
->nbs
->port_security
,
1778 op
->nbs
->n_port_security
, &match
);
1779 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_OUT_PORT_SEC_L2
, 50,
1780 ds_cstr(&match
), "output;");
1782 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_OUT_PORT_SEC_L2
, 150,
1783 ds_cstr(&match
), "drop;");
1788 if (op
->nbs
->n_port_security
) {
1789 build_port_security_ip(P_OUT
, op
, lflows
);
1795 lrport_is_enabled(const struct nbrec_logical_router_port
*lrport
)
1797 return !lrport
->enabled
|| *lrport
->enabled
;
1801 add_route(struct hmap
*lflows
, const struct ovn_port
*op
,
1802 ovs_be32 network
, ovs_be32 mask
, ovs_be32 gateway
)
1804 char *match
= xasprintf("ip4.dst == "IP_FMT
"/"IP_FMT
,
1805 IP_ARGS(network
), IP_ARGS(mask
));
1807 struct ds actions
= DS_EMPTY_INITIALIZER
;
1808 ds_put_cstr(&actions
, "ip.ttl--; reg0 = ");
1810 ds_put_format(&actions
, IP_FMT
, IP_ARGS(gateway
));
1812 ds_put_cstr(&actions
, "ip4.dst");
1814 ds_put_format(&actions
,
1817 "eth.src = "ETH_ADDR_FMT
"; "
1820 IP_ARGS(op
->ip
), ETH_ADDR_ARGS(op
->mac
), op
->json_key
);
1822 /* The priority here is calculated to implement longest-prefix-match
1824 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_IP_ROUTING
,
1825 count_1bits(ntohl(mask
)), match
, ds_cstr(&actions
));
1826 ds_destroy(&actions
);
1831 build_lrouter_flows(struct hmap
*datapaths
, struct hmap
*ports
,
1832 struct hmap
*lflows
)
1834 /* This flow table structure is documented in ovn-northd(8), so please
1835 * update ovn-northd.8.xml if you change anything. */
1837 /* Logical router ingress table 0: Admission control framework. */
1838 struct ovn_datapath
*od
;
1839 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
1844 /* Logical VLANs not supported.
1845 * Broadcast/multicast source address is invalid. */
1846 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_ADMISSION
, 100,
1847 "vlan.present || eth.src[40]", "drop;");
1850 /* Logical router ingress table 0: match (priority 50). */
1851 struct ovn_port
*op
;
1852 HMAP_FOR_EACH (op
, key_node
, ports
) {
1857 if (!lrport_is_enabled(op
->nbr
)) {
1858 /* Drop packets from disabled logical ports (since logical flow
1859 * tables are default-drop). */
1863 char *match
= xasprintf(
1864 "(eth.mcast || eth.dst == "ETH_ADDR_FMT
") && inport == %s",
1865 ETH_ADDR_ARGS(op
->mac
), op
->json_key
);
1866 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_ADMISSION
, 50,
1871 /* Logical router ingress table 1: IP Input. */
1872 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
1877 /* L3 admission control: drop multicast and broadcast source, localhost
1878 * source or destination, and zero network source or destination
1879 * (priority 100). */
1880 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_IP_INPUT
, 100,
1882 "ip4.src == 255.255.255.255 || "
1883 "ip4.src == 127.0.0.0/8 || "
1884 "ip4.dst == 127.0.0.0/8 || "
1885 "ip4.src == 0.0.0.0/8 || "
1886 "ip4.dst == 0.0.0.0/8",
1889 /* ARP reply handling. Use ARP replies to populate the logical
1890 * router's ARP table. */
1891 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_IP_INPUT
, 90, "arp.op == 2",
1892 "put_arp(inport, arp.spa, arp.sha);");
1894 /* Drop Ethernet local broadcast. By definition this traffic should
1895 * not be forwarded.*/
1896 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_IP_INPUT
, 50,
1897 "eth.bcast", "drop;");
1899 /* Drop IP multicast. */
1900 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_IP_INPUT
, 50,
1901 "ip4.mcast", "drop;");
1905 * XXX Need to send ICMP time exceeded if !ip.later_frag. */
1906 char *match
= xasprintf("ip4 && ip.ttl == {0, 1}");
1907 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_IP_INPUT
, 30, match
, "drop;");
1910 /* Pass other traffic not already handled to the next table for
1912 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_IP_INPUT
, 0, "1", "next;");
1915 HMAP_FOR_EACH (op
, key_node
, ports
) {
1920 /* L3 admission control: drop packets that originate from an IP address
1921 * owned by the router or a broadcast address known to the router
1922 * (priority 100). */
1923 char *match
= xasprintf("ip4.src == {"IP_FMT
", "IP_FMT
"}",
1924 IP_ARGS(op
->ip
), IP_ARGS(op
->bcast
));
1925 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_IP_INPUT
, 100,
1929 /* ICMP echo reply. These flows reply to ICMP echo requests
1930 * received for the router's IP address. */
1932 "inport == %s && (ip4.dst == "IP_FMT
" || ip4.dst == "IP_FMT
") && "
1933 "icmp4.type == 8 && icmp4.code == 0",
1934 op
->json_key
, IP_ARGS(op
->ip
), IP_ARGS(op
->bcast
));
1935 char *actions
= xasprintf(
1936 "ip4.dst = ip4.src; "
1937 "ip4.src = "IP_FMT
"; "
1940 "inport = \"\"; /* Allow sending out inport. */ "
1943 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_IP_INPUT
, 90,
1948 /* ARP reply. These flows reply to ARP requests for the router's own
1951 "inport == %s && arp.tpa == "IP_FMT
" && arp.op == 1",
1952 op
->json_key
, IP_ARGS(op
->ip
));
1953 actions
= xasprintf(
1954 "eth.dst = eth.src; "
1955 "eth.src = "ETH_ADDR_FMT
"; "
1956 "arp.op = 2; /* ARP reply */ "
1957 "arp.tha = arp.sha; "
1958 "arp.sha = "ETH_ADDR_FMT
"; "
1959 "arp.tpa = arp.spa; "
1960 "arp.spa = "IP_FMT
"; "
1962 "inport = \"\"; /* Allow sending out inport. */ "
1964 ETH_ADDR_ARGS(op
->mac
),
1965 ETH_ADDR_ARGS(op
->mac
),
1968 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_IP_INPUT
, 90,
1973 /* Drop IP traffic to this router. */
1974 match
= xasprintf("ip4.dst == "IP_FMT
, IP_ARGS(op
->ip
));
1975 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_IP_INPUT
, 60,
1980 /* Logical router ingress table 2: IP Routing.
1982 * A packet that arrives at this table is an IP packet that should be
1983 * routed to the address in ip4.dst. This table sets outport to the correct
1984 * output port, eth.src to the output port's MAC address, and reg0 to the
1985 * next-hop IP address (leaving ip4.dst, the packet’s final destination,
1986 * unchanged), and advances to the next table for ARP resolution. */
1987 HMAP_FOR_EACH (op
, key_node
, ports
) {
1992 add_route(lflows
, op
, op
->network
, op
->mask
, 0);
1994 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
1999 if (od
->gateway
&& od
->gateway_port
) {
2000 add_route(lflows
, od
->gateway_port
, 0, 0, od
->gateway
);
2003 /* XXX destination unreachable */
2005 /* Local router ingress table 3: ARP Resolution.
2007 * Any packet that reaches this table is an IP packet whose next-hop IP
2008 * address is in reg0. (ip4.dst is the final destination.) This table
2009 * resolves the IP address in reg0 into an output port in outport and an
2010 * Ethernet address in eth.dst. */
2011 HMAP_FOR_EACH (op
, key_node
, ports
) {
2013 /* This is a logical router port. If next-hop IP address in 'reg0'
2014 * matches ip address of this router port, then the packet is
2015 * intended to eventually be sent to this logical port. Set the
2016 * destination mac address using this port's mac address.
2018 * The packet is still in peer's logical pipeline. So the match
2019 * should be on peer's outport. */
2020 if (op
->nbr
->peer
) {
2021 struct ovn_port
*peer
= ovn_port_find(ports
, op
->nbr
->peer
);
2026 if (!peer
->ip
|| !op
->ip
) {
2029 char *match
= xasprintf("outport == %s && reg0 == "IP_FMT
,
2030 peer
->json_key
, IP_ARGS(op
->ip
));
2031 char *actions
= xasprintf("eth.dst = "ETH_ADDR_FMT
"; "
2032 "next;", ETH_ADDR_ARGS(op
->mac
));
2033 ovn_lflow_add(lflows
, peer
->od
, S_ROUTER_IN_ARP_RESOLVE
,
2034 100, match
, actions
);
2038 } else if (op
->od
->n_router_ports
) {
2039 for (size_t i
= 0; i
< op
->nbs
->n_addresses
; i
++) {
2040 struct lport_addresses laddrs
;
2041 if (!extract_lport_addresses(op
->nbs
->addresses
[i
], &laddrs
,
2046 for (size_t k
= 0; k
< laddrs
.n_ipv4_addrs
; k
++) {
2047 ovs_be32 ip
= laddrs
.ipv4_addrs
[k
].addr
;
2048 for (size_t j
= 0; j
< op
->od
->n_router_ports
; j
++) {
2049 /* Get the Logical_Router_Port that the Logical_Port is
2050 * connected to, as 'peer'. */
2051 const char *peer_name
= smap_get(
2052 &op
->od
->router_ports
[j
]->nbs
->options
,
2058 struct ovn_port
*peer
2059 = ovn_port_find(ports
, peer_name
);
2060 if (!peer
|| !peer
->nbr
) {
2064 /* Make sure that 'ip' is in 'peer''s network. */
2065 if ((ip
^ peer
->network
) & peer
->mask
) {
2069 char *match
= xasprintf(
2070 "outport == %s && reg0 == "IP_FMT
,
2071 peer
->json_key
, IP_ARGS(ip
));
2072 char *actions
= xasprintf("eth.dst = "ETH_ADDR_FMT
"; "
2074 ETH_ADDR_ARGS(laddrs
.ea
));
2075 ovn_lflow_add(lflows
, peer
->od
,
2076 S_ROUTER_IN_ARP_RESOLVE
,
2077 100, match
, actions
);
2084 free(laddrs
.ipv4_addrs
);
2088 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
2093 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_ARP_RESOLVE
, 0, "1",
2094 "get_arp(outport, reg0); next;");
2097 /* Local router ingress table 4: ARP request.
2099 * In the common case where the Ethernet destination has been resolved,
2100 * this table outputs the packet (priority 100). Otherwise, it composes
2101 * and sends an ARP request (priority 0). */
2102 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
2107 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_ARP_REQUEST
, 100,
2108 "eth.dst == 00:00:00:00:00:00",
2110 "eth.dst = ff:ff:ff:ff:ff:ff; "
2112 "arp.op = 1; " /* ARP request */
2115 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_ARP_REQUEST
, 0, "1", "output;");
2118 /* Logical router egress table 0: Delivery (priority 100).
2120 * Priority 100 rules deliver packets to enabled logical ports. */
2121 HMAP_FOR_EACH (op
, key_node
, ports
) {
2126 if (!lrport_is_enabled(op
->nbr
)) {
2127 /* Drop packets to disabled logical ports (since logical flow
2128 * tables are default-drop). */
2132 char *match
= xasprintf("outport == %s", op
->json_key
);
2133 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_OUT_DELIVERY
, 100,
2139 /* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
2140 * constructing their contents based on the OVN_NB database. */
2142 build_lflows(struct northd_context
*ctx
, struct hmap
*datapaths
,
2145 struct hmap lflows
= HMAP_INITIALIZER(&lflows
);
2146 struct hmap mcgroups
= HMAP_INITIALIZER(&mcgroups
);
2148 build_lswitch_flows(datapaths
, ports
, &lflows
, &mcgroups
);
2149 build_lrouter_flows(datapaths
, ports
, &lflows
);
2151 /* Push changes to the Logical_Flow table to database. */
2152 const struct sbrec_logical_flow
*sbflow
, *next_sbflow
;
2153 SBREC_LOGICAL_FLOW_FOR_EACH_SAFE (sbflow
, next_sbflow
, ctx
->ovnsb_idl
) {
2154 struct ovn_datapath
*od
2155 = ovn_datapath_from_sbrec(datapaths
, sbflow
->logical_datapath
);
2157 sbrec_logical_flow_delete(sbflow
);
2161 enum ovn_datapath_type dp_type
= od
->nbs
? DP_SWITCH
: DP_ROUTER
;
2162 enum ovn_pipeline pipeline
2163 = !strcmp(sbflow
->pipeline
, "ingress") ? P_IN
: P_OUT
;
2164 struct ovn_lflow
*lflow
= ovn_lflow_find(
2165 &lflows
, od
, ovn_stage_build(dp_type
, pipeline
, sbflow
->table_id
),
2166 sbflow
->priority
, sbflow
->match
, sbflow
->actions
);
2168 ovn_lflow_destroy(&lflows
, lflow
);
2170 sbrec_logical_flow_delete(sbflow
);
2173 struct ovn_lflow
*lflow
, *next_lflow
;
2174 HMAP_FOR_EACH_SAFE (lflow
, next_lflow
, hmap_node
, &lflows
) {
2175 enum ovn_pipeline pipeline
= ovn_stage_get_pipeline(lflow
->stage
);
2176 uint8_t table
= ovn_stage_get_table(lflow
->stage
);
2178 sbflow
= sbrec_logical_flow_insert(ctx
->ovnsb_txn
);
2179 sbrec_logical_flow_set_logical_datapath(sbflow
, lflow
->od
->sb
);
2180 sbrec_logical_flow_set_pipeline(
2181 sbflow
, pipeline
== P_IN
? "ingress" : "egress");
2182 sbrec_logical_flow_set_table_id(sbflow
, table
);
2183 sbrec_logical_flow_set_priority(sbflow
, lflow
->priority
);
2184 sbrec_logical_flow_set_match(sbflow
, lflow
->match
);
2185 sbrec_logical_flow_set_actions(sbflow
, lflow
->actions
);
2187 const struct smap ids
= SMAP_CONST1(&ids
, "stage-name",
2188 ovn_stage_to_str(lflow
->stage
));
2189 sbrec_logical_flow_set_external_ids(sbflow
, &ids
);
2191 ovn_lflow_destroy(&lflows
, lflow
);
2193 hmap_destroy(&lflows
);
2195 /* Push changes to the Multicast_Group table to database. */
2196 const struct sbrec_multicast_group
*sbmc
, *next_sbmc
;
2197 SBREC_MULTICAST_GROUP_FOR_EACH_SAFE (sbmc
, next_sbmc
, ctx
->ovnsb_idl
) {
2198 struct ovn_datapath
*od
= ovn_datapath_from_sbrec(datapaths
,
2201 sbrec_multicast_group_delete(sbmc
);
2205 struct multicast_group group
= { .name
= sbmc
->name
,
2206 .key
= sbmc
->tunnel_key
};
2207 struct ovn_multicast
*mc
= ovn_multicast_find(&mcgroups
, od
, &group
);
2209 ovn_multicast_update_sbrec(mc
, sbmc
);
2210 ovn_multicast_destroy(&mcgroups
, mc
);
2212 sbrec_multicast_group_delete(sbmc
);
2215 struct ovn_multicast
*mc
, *next_mc
;
2216 HMAP_FOR_EACH_SAFE (mc
, next_mc
, hmap_node
, &mcgroups
) {
2217 sbmc
= sbrec_multicast_group_insert(ctx
->ovnsb_txn
);
2218 sbrec_multicast_group_set_datapath(sbmc
, mc
->datapath
->sb
);
2219 sbrec_multicast_group_set_name(sbmc
, mc
->group
->name
);
2220 sbrec_multicast_group_set_tunnel_key(sbmc
, mc
->group
->key
);
2221 ovn_multicast_update_sbrec(mc
, sbmc
);
2222 ovn_multicast_destroy(&mcgroups
, mc
);
2224 hmap_destroy(&mcgroups
);
2228 ovnnb_db_run(struct northd_context
*ctx
)
2230 if (!ctx
->ovnsb_txn
) {
2233 struct hmap datapaths
, ports
;
2234 build_datapaths(ctx
, &datapaths
);
2235 build_ports(ctx
, &datapaths
, &ports
);
2236 build_lflows(ctx
, &datapaths
, &ports
);
2238 struct ovn_datapath
*dp
, *next_dp
;
2239 HMAP_FOR_EACH_SAFE (dp
, next_dp
, key_node
, &datapaths
) {
2240 ovn_datapath_destroy(&datapaths
, dp
);
2242 hmap_destroy(&datapaths
);
2244 struct ovn_port
*port
, *next_port
;
2245 HMAP_FOR_EACH_SAFE (port
, next_port
, key_node
, &ports
) {
2246 ovn_port_destroy(&ports
, port
);
2248 hmap_destroy(&ports
);
2252 * The only change we get notified about is if the 'chassis' column of the
2253 * 'Port_Binding' table changes. When this column is not empty, it means we
2254 * need to set the corresponding logical port as 'up' in the northbound DB.
2257 ovnsb_db_run(struct northd_context
*ctx
)
2259 if (!ctx
->ovnnb_txn
) {
2262 struct hmap lports_hmap
;
2263 const struct sbrec_port_binding
*sb
;
2264 const struct nbrec_logical_port
*nb
;
2266 struct lport_hash_node
{
2267 struct hmap_node node
;
2268 const struct nbrec_logical_port
*nb
;
2269 } *hash_node
, *hash_node_next
;
2271 hmap_init(&lports_hmap
);
2273 NBREC_LOGICAL_PORT_FOR_EACH(nb
, ctx
->ovnnb_idl
) {
2274 hash_node
= xzalloc(sizeof *hash_node
);
2276 hmap_insert(&lports_hmap
, &hash_node
->node
, hash_string(nb
->name
, 0));
2279 SBREC_PORT_BINDING_FOR_EACH(sb
, ctx
->ovnsb_idl
) {
2281 HMAP_FOR_EACH_WITH_HASH(hash_node
, node
,
2282 hash_string(sb
->logical_port
, 0),
2284 if (!strcmp(sb
->logical_port
, hash_node
->nb
->name
)) {
2291 /* The logical port doesn't exist for this port binding. This can
2292 * happen under normal circumstances when ovn-northd hasn't gotten
2293 * around to pruning the Port_Binding yet. */
2297 if (sb
->chassis
&& (!nb
->up
|| !*nb
->up
)) {
2299 nbrec_logical_port_set_up(nb
, &up
, 1);
2300 } else if (!sb
->chassis
&& (!nb
->up
|| *nb
->up
)) {
2302 nbrec_logical_port_set_up(nb
, &up
, 1);
2306 HMAP_FOR_EACH_SAFE(hash_node
, hash_node_next
, node
, &lports_hmap
) {
2307 hmap_remove(&lports_hmap
, &hash_node
->node
);
2310 hmap_destroy(&lports_hmap
);
2314 static char *default_nb_db_
;
2319 if (!default_nb_db_
) {
2320 default_nb_db_
= xasprintf("unix:%s/ovnnb_db.sock", ovs_rundir());
2322 return default_nb_db_
;
2325 static char *default_sb_db_
;
2330 if (!default_sb_db_
) {
2331 default_sb_db_
= xasprintf("unix:%s/ovnsb_db.sock", ovs_rundir());
2333 return default_sb_db_
;
2337 parse_options(int argc OVS_UNUSED
, char *argv
[] OVS_UNUSED
)
2340 DAEMON_OPTION_ENUMS
,
2343 static const struct option long_options
[] = {
2344 {"ovnsb-db", required_argument
, NULL
, 'd'},
2345 {"ovnnb-db", required_argument
, NULL
, 'D'},
2346 {"help", no_argument
, NULL
, 'h'},
2347 {"options", no_argument
, NULL
, 'o'},
2348 {"version", no_argument
, NULL
, 'V'},
2349 DAEMON_LONG_OPTIONS
,
2351 STREAM_SSL_LONG_OPTIONS
,
2354 char *short_options
= ovs_cmdl_long_options_to_short_options(long_options
);
2359 c
= getopt_long(argc
, argv
, short_options
, long_options
, NULL
);
2365 DAEMON_OPTION_HANDLERS
;
2366 VLOG_OPTION_HANDLERS
;
2367 STREAM_SSL_OPTION_HANDLERS
;
2382 ovs_cmdl_print_options(long_options
);
2386 ovs_print_version(0, 0);
2395 ovnsb_db
= default_sb_db();
2399 ovnnb_db
= default_nb_db();
2402 free(short_options
);
2406 add_column_noalert(struct ovsdb_idl
*idl
,
2407 const struct ovsdb_idl_column
*column
)
2409 ovsdb_idl_add_column(idl
, column
);
2410 ovsdb_idl_omit_alert(idl
, column
);
2414 main(int argc
, char *argv
[])
2416 int res
= EXIT_SUCCESS
;
2417 struct unixctl_server
*unixctl
;
2421 fatal_ignore_sigpipe();
2422 set_program_name(argv
[0]);
2423 service_start(&argc
, &argv
);
2424 parse_options(argc
, argv
);
2426 daemonize_start(false);
2428 retval
= unixctl_server_create(NULL
, &unixctl
);
2432 unixctl_command_register("exit", "", 0, 0, ovn_northd_exit
, &exiting
);
2434 daemonize_complete();
2439 /* We want to detect all changes to the ovn-nb db. */
2440 struct ovsdb_idl_loop ovnnb_idl_loop
= OVSDB_IDL_LOOP_INITIALIZER(
2441 ovsdb_idl_create(ovnnb_db
, &nbrec_idl_class
, true, true));
2443 struct ovsdb_idl_loop ovnsb_idl_loop
= OVSDB_IDL_LOOP_INITIALIZER(
2444 ovsdb_idl_create(ovnsb_db
, &sbrec_idl_class
, false, true));
2446 ovsdb_idl_add_table(ovnsb_idl_loop
.idl
, &sbrec_table_logical_flow
);
2447 add_column_noalert(ovnsb_idl_loop
.idl
,
2448 &sbrec_logical_flow_col_logical_datapath
);
2449 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_logical_flow_col_pipeline
);
2450 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_logical_flow_col_table_id
);
2451 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_logical_flow_col_priority
);
2452 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_logical_flow_col_match
);
2453 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_logical_flow_col_actions
);
2455 ovsdb_idl_add_table(ovnsb_idl_loop
.idl
, &sbrec_table_multicast_group
);
2456 add_column_noalert(ovnsb_idl_loop
.idl
,
2457 &sbrec_multicast_group_col_datapath
);
2458 add_column_noalert(ovnsb_idl_loop
.idl
,
2459 &sbrec_multicast_group_col_tunnel_key
);
2460 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_multicast_group_col_name
);
2461 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_multicast_group_col_ports
);
2463 ovsdb_idl_add_table(ovnsb_idl_loop
.idl
, &sbrec_table_datapath_binding
);
2464 add_column_noalert(ovnsb_idl_loop
.idl
,
2465 &sbrec_datapath_binding_col_tunnel_key
);
2466 add_column_noalert(ovnsb_idl_loop
.idl
,
2467 &sbrec_datapath_binding_col_external_ids
);
2469 ovsdb_idl_add_table(ovnsb_idl_loop
.idl
, &sbrec_table_port_binding
);
2470 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_port_binding_col_datapath
);
2471 add_column_noalert(ovnsb_idl_loop
.idl
,
2472 &sbrec_port_binding_col_logical_port
);
2473 add_column_noalert(ovnsb_idl_loop
.idl
,
2474 &sbrec_port_binding_col_tunnel_key
);
2475 add_column_noalert(ovnsb_idl_loop
.idl
,
2476 &sbrec_port_binding_col_parent_port
);
2477 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_port_binding_col_tag
);
2478 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_port_binding_col_type
);
2479 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_port_binding_col_options
);
2480 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_port_binding_col_mac
);
2481 ovsdb_idl_add_column(ovnsb_idl_loop
.idl
, &sbrec_port_binding_col_chassis
);
2486 struct northd_context ctx
= {
2487 .ovnnb_idl
= ovnnb_idl_loop
.idl
,
2488 .ovnnb_txn
= ovsdb_idl_loop_run(&ovnnb_idl_loop
),
2489 .ovnsb_idl
= ovnsb_idl_loop
.idl
,
2490 .ovnsb_txn
= ovsdb_idl_loop_run(&ovnsb_idl_loop
),
2496 unixctl_server_run(unixctl
);
2497 unixctl_server_wait(unixctl
);
2499 poll_immediate_wake();
2501 ovsdb_idl_loop_commit_and_wait(&ovnnb_idl_loop
);
2502 ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop
);
2505 if (should_service_stop()) {
2510 unixctl_server_destroy(unixctl
);
2511 ovsdb_idl_loop_destroy(&ovnnb_idl_loop
);
2512 ovsdb_idl_loop_destroy(&ovnsb_idl_loop
);
2515 free(default_nb_db_
);
2516 free(default_sb_db_
);
2521 ovn_northd_exit(struct unixctl_conn
*conn
, int argc OVS_UNUSED
,
2522 const char *argv
[] OVS_UNUSED
, void *exiting_
)
2524 bool *exiting
= exiting_
;
2527 unixctl_command_reply(conn
, NULL
);