2 * Licensed under the Apache License, Version 2.0 (the "License");
3 * you may not use this file except in compliance with the License.
4 * You may obtain a copy of the License at:
6 * http://www.apache.org/licenses/LICENSE-2.0
8 * Unless required by applicable law or agreed to in writing, software
9 * distributed under the License is distributed on an "AS IS" BASIS,
10 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 * See the License for the specific language governing permissions and
12 * limitations under the License.
21 #include "command-line.h"
24 #include "dynamic-string.h"
25 #include "fatal-signal.h"
29 #include "ovn/lib/lex.h"
30 #include "ovn/lib/ovn-nb-idl.h"
31 #include "ovn/lib/ovn-sb-idl.h"
32 #include "poll-loop.h"
35 #include "stream-ssl.h"
39 #include "openvswitch/vlog.h"
41 VLOG_DEFINE_THIS_MODULE(ovn_northd
);
43 static unixctl_cb_func ovn_northd_exit
;
45 struct northd_context
{
46 struct ovsdb_idl
*ovnnb_idl
;
47 struct ovsdb_idl
*ovnsb_idl
;
48 struct ovsdb_idl_txn
*ovnnb_txn
;
49 struct ovsdb_idl_txn
*ovnsb_txn
;
52 static const char *ovnnb_db
;
53 static const char *ovnsb_db
;
55 static const char *default_db(void);
57 /* Pipeline stages. */
59 /* The two pipelines in an OVN logical flow table. */
61 P_IN
, /* Ingress pipeline. */
62 P_OUT
/* Egress pipeline. */
65 /* The two purposes for which ovn-northd uses OVN logical datapaths. */
66 enum ovn_datapath_type
{
67 DP_SWITCH
, /* OVN logical switch. */
68 DP_ROUTER
/* OVN logical router. */
71 /* Returns an "enum ovn_stage" built from the arguments.
73 * (It's better to use ovn_stage_build() for type-safety reasons, but inline
74 * functions can't be used in enums or switch cases.) */
75 #define OVN_STAGE_BUILD(DP_TYPE, PIPELINE, TABLE) \
76 (((DP_TYPE) << 9) | ((PIPELINE) << 8) | (TABLE))
78 /* A stage within an OVN logical switch or router.
80 * An "enum ovn_stage" indicates whether the stage is part of a logical switch
81 * or router, whether the stage is part of the ingress or egress pipeline, and
82 * the table within that pipeline. The first three components are combined to
83 * form the stage's full name, e.g. S_SWITCH_IN_PORT_SEC_L2,
84 * S_ROUTER_OUT_DELIVERY. */
86 #define PIPELINE_STAGES \
87 /* Logical switch ingress stages. */ \
88 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_L2, 0, "ls_in_port_sec_l2") \
89 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_IP, 1, "ls_in_port_sec_ip") \
90 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_ND, 2, "ls_in_port_sec_nd") \
91 PIPELINE_STAGE(SWITCH, IN, PRE_ACL, 3, "ls_in_pre_acl") \
92 PIPELINE_STAGE(SWITCH, IN, ACL, 4, "ls_in_acl") \
93 PIPELINE_STAGE(SWITCH, IN, ARP_RSP, 5, "ls_in_arp_rsp") \
94 PIPELINE_STAGE(SWITCH, IN, L2_LKUP, 6, "ls_in_l2_lkup") \
96 /* Logical switch egress stages. */ \
97 PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 0, "ls_out_pre_acl") \
98 PIPELINE_STAGE(SWITCH, OUT, ACL, 1, "ls_out_acl") \
99 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_IP, 2, "ls_out_port_sec_ip") \
100 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_L2, 3, "ls_out_port_sec_l2") \
102 /* Logical router ingress stages. */ \
103 PIPELINE_STAGE(ROUTER, IN, ADMISSION, 0, "lr_in_admission") \
104 PIPELINE_STAGE(ROUTER, IN, IP_INPUT, 1, "lr_in_ip_input") \
105 PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 2, "lr_in_ip_routing") \
106 PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 3, "lr_in_arp_resolve") \
107 PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 4, "lr_in_arp_request") \
109 /* Logical router egress stages. */ \
110 PIPELINE_STAGE(ROUTER, OUT, DELIVERY, 0, "lr_out_delivery")
112 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
113 S_##DP_TYPE##_##PIPELINE##_##STAGE \
114 = OVN_STAGE_BUILD(DP_##DP_TYPE, P_##PIPELINE, TABLE),
116 #undef PIPELINE_STAGE
119 /* Due to various hard-coded priorities need to implement ACLs, the
120 * northbound database supports a smaller range of ACL priorities than
121 * are available to logical flows. This value is added to an ACL
122 * priority to determine the ACL's logical flow priority. */
123 #define OVN_ACL_PRI_OFFSET 1000
125 /* Returns an "enum ovn_stage" built from the arguments. */
126 static enum ovn_stage
127 ovn_stage_build(enum ovn_datapath_type dp_type
, enum ovn_pipeline pipeline
,
130 return OVN_STAGE_BUILD(dp_type
, pipeline
, table
);
133 /* Returns the pipeline to which 'stage' belongs. */
134 static enum ovn_pipeline
135 ovn_stage_get_pipeline(enum ovn_stage stage
)
137 return (stage
>> 8) & 1;
140 /* Returns the table to which 'stage' belongs. */
142 ovn_stage_get_table(enum ovn_stage stage
)
147 /* Returns a string name for 'stage'. */
149 ovn_stage_to_str(enum ovn_stage stage
)
152 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
153 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return NAME;
155 #undef PIPELINE_STAGE
156 default: return "<unknown>";
164 %s: OVN northbound management daemon\n\
165 usage: %s [OPTIONS]\n\
168 --ovnnb-db=DATABASE connect to ovn-nb database at DATABASE\n\
170 --ovnsb-db=DATABASE connect to ovn-sb database at DATABASE\n\
172 -h, --help display this help message\n\
173 -o, --options list available options\n\
174 -V, --version display version information\n\
175 ", program_name
, program_name
, default_db(), default_db());
178 stream_usage("database", true, true, false);
182 struct hmap_node hmap_node
;
187 destroy_tnlids(struct hmap
*tnlids
)
189 struct tnlid_node
*node
, *next
;
190 HMAP_FOR_EACH_SAFE (node
, next
, hmap_node
, tnlids
) {
191 hmap_remove(tnlids
, &node
->hmap_node
);
194 hmap_destroy(tnlids
);
198 add_tnlid(struct hmap
*set
, uint32_t tnlid
)
200 struct tnlid_node
*node
= xmalloc(sizeof *node
);
201 hmap_insert(set
, &node
->hmap_node
, hash_int(tnlid
, 0));
206 tnlid_in_use(const struct hmap
*set
, uint32_t tnlid
)
208 const struct tnlid_node
*node
;
209 HMAP_FOR_EACH_IN_BUCKET (node
, hmap_node
, hash_int(tnlid
, 0), set
) {
210 if (node
->tnlid
== tnlid
) {
218 allocate_tnlid(struct hmap
*set
, const char *name
, uint32_t max
,
221 for (uint32_t tnlid
= *hint
+ 1; tnlid
!= *hint
;
222 tnlid
= tnlid
+ 1 <= max
? tnlid
+ 1 : 1) {
223 if (!tnlid_in_use(set
, tnlid
)) {
224 add_tnlid(set
, tnlid
);
230 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
231 VLOG_WARN_RL(&rl
, "all %s tunnel ids exhausted", name
);
235 /* The 'key' comes from nbs->header_.uuid or nbr->header_.uuid or
236 * sb->external_ids:logical-switch. */
237 struct ovn_datapath
{
238 struct hmap_node key_node
; /* Index on 'key'. */
239 struct uuid key
; /* (nbs/nbr)->header_.uuid. */
241 const struct nbrec_logical_switch
*nbs
; /* May be NULL. */
242 const struct nbrec_logical_router
*nbr
; /* May be NULL. */
243 const struct sbrec_datapath_binding
*sb
; /* May be NULL. */
245 struct ovs_list list
; /* In list of similar records. */
247 /* Logical router data (digested from nbr). */
248 const struct ovn_port
*gateway_port
;
251 /* Logical switch data. */
252 struct ovn_port
**router_ports
;
253 size_t n_router_ports
;
255 struct hmap port_tnlids
;
256 uint32_t port_key_hint
;
261 static struct ovn_datapath
*
262 ovn_datapath_create(struct hmap
*datapaths
, const struct uuid
*key
,
263 const struct nbrec_logical_switch
*nbs
,
264 const struct nbrec_logical_router
*nbr
,
265 const struct sbrec_datapath_binding
*sb
)
267 struct ovn_datapath
*od
= xzalloc(sizeof *od
);
272 hmap_init(&od
->port_tnlids
);
273 od
->port_key_hint
= 0;
274 hmap_insert(datapaths
, &od
->key_node
, uuid_hash(&od
->key
));
279 ovn_datapath_destroy(struct hmap
*datapaths
, struct ovn_datapath
*od
)
282 /* Don't remove od->list. It is used within build_datapaths() as a
283 * private list and once we've exited that function it is not safe to
285 hmap_remove(datapaths
, &od
->key_node
);
286 destroy_tnlids(&od
->port_tnlids
);
287 free(od
->router_ports
);
292 static struct ovn_datapath
*
293 ovn_datapath_find(struct hmap
*datapaths
, const struct uuid
*uuid
)
295 struct ovn_datapath
*od
;
297 HMAP_FOR_EACH_WITH_HASH (od
, key_node
, uuid_hash(uuid
), datapaths
) {
298 if (uuid_equals(uuid
, &od
->key
)) {
305 static struct ovn_datapath
*
306 ovn_datapath_from_sbrec(struct hmap
*datapaths
,
307 const struct sbrec_datapath_binding
*sb
)
311 if (!smap_get_uuid(&sb
->external_ids
, "logical-switch", &key
) &&
312 !smap_get_uuid(&sb
->external_ids
, "logical-router", &key
)) {
315 return ovn_datapath_find(datapaths
, &key
);
319 join_datapaths(struct northd_context
*ctx
, struct hmap
*datapaths
,
320 struct ovs_list
*sb_only
, struct ovs_list
*nb_only
,
321 struct ovs_list
*both
)
323 hmap_init(datapaths
);
328 const struct sbrec_datapath_binding
*sb
, *sb_next
;
329 SBREC_DATAPATH_BINDING_FOR_EACH_SAFE (sb
, sb_next
, ctx
->ovnsb_idl
) {
331 if (!smap_get_uuid(&sb
->external_ids
, "logical-switch", &key
) &&
332 !smap_get_uuid(&sb
->external_ids
, "logical-router", &key
)) {
333 ovsdb_idl_txn_add_comment(
335 "deleting Datapath_Binding "UUID_FMT
" that lacks "
336 "external-ids:logical-switch and "
337 "external-ids:logical-router",
338 UUID_ARGS(&sb
->header_
.uuid
));
339 sbrec_datapath_binding_delete(sb
);
343 if (ovn_datapath_find(datapaths
, &key
)) {
344 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
346 &rl
, "deleting Datapath_Binding "UUID_FMT
" with "
347 "duplicate external-ids:logical-switch/router "UUID_FMT
,
348 UUID_ARGS(&sb
->header_
.uuid
), UUID_ARGS(&key
));
349 sbrec_datapath_binding_delete(sb
);
353 struct ovn_datapath
*od
= ovn_datapath_create(datapaths
, &key
,
355 list_push_back(sb_only
, &od
->list
);
358 const struct nbrec_logical_switch
*nbs
;
359 NBREC_LOGICAL_SWITCH_FOR_EACH (nbs
, ctx
->ovnnb_idl
) {
360 struct ovn_datapath
*od
= ovn_datapath_find(datapaths
,
364 list_remove(&od
->list
);
365 list_push_back(both
, &od
->list
);
367 od
= ovn_datapath_create(datapaths
, &nbs
->header_
.uuid
,
369 list_push_back(nb_only
, &od
->list
);
373 const struct nbrec_logical_router
*nbr
;
374 NBREC_LOGICAL_ROUTER_FOR_EACH (nbr
, ctx
->ovnnb_idl
) {
375 struct ovn_datapath
*od
= ovn_datapath_find(datapaths
,
380 list_remove(&od
->list
);
381 list_push_back(both
, &od
->list
);
384 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
386 "duplicate UUID "UUID_FMT
" in OVN_Northbound",
387 UUID_ARGS(&nbr
->header_
.uuid
));
391 od
= ovn_datapath_create(datapaths
, &nbr
->header_
.uuid
,
393 list_push_back(nb_only
, &od
->list
);
397 if (nbr
->default_gw
) {
399 if (!ip_parse(nbr
->default_gw
, &ip
) || !ip
) {
400 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
401 VLOG_WARN_RL(&rl
, "bad 'gateway' %s", nbr
->default_gw
);
407 /* Set the gateway port to NULL. If there is a gateway, it will get
408 * filled in as we go through the ports later. */
409 od
->gateway_port
= NULL
;
414 ovn_datapath_allocate_key(struct hmap
*dp_tnlids
)
416 static uint32_t hint
;
417 return allocate_tnlid(dp_tnlids
, "datapath", (1u << 24) - 1, &hint
);
420 /* Updates the southbound Datapath_Binding table so that it contains the
421 * logical switches and routers specified by the northbound database.
423 * Initializes 'datapaths' to contain a "struct ovn_datapath" for every logical
424 * switch and router. */
426 build_datapaths(struct northd_context
*ctx
, struct hmap
*datapaths
)
428 struct ovs_list sb_only
, nb_only
, both
;
430 join_datapaths(ctx
, datapaths
, &sb_only
, &nb_only
, &both
);
432 if (!list_is_empty(&nb_only
)) {
433 /* First index the in-use datapath tunnel IDs. */
434 struct hmap dp_tnlids
= HMAP_INITIALIZER(&dp_tnlids
);
435 struct ovn_datapath
*od
;
436 LIST_FOR_EACH (od
, list
, &both
) {
437 add_tnlid(&dp_tnlids
, od
->sb
->tunnel_key
);
440 /* Add southbound record for each unmatched northbound record. */
441 LIST_FOR_EACH (od
, list
, &nb_only
) {
442 uint16_t tunnel_key
= ovn_datapath_allocate_key(&dp_tnlids
);
447 od
->sb
= sbrec_datapath_binding_insert(ctx
->ovnsb_txn
);
449 char uuid_s
[UUID_LEN
+ 1];
450 sprintf(uuid_s
, UUID_FMT
, UUID_ARGS(&od
->key
));
451 const char *key
= od
->nbs
? "logical-switch" : "logical-router";
452 const struct smap id
= SMAP_CONST1(&id
, key
, uuid_s
);
453 sbrec_datapath_binding_set_external_ids(od
->sb
, &id
);
455 sbrec_datapath_binding_set_tunnel_key(od
->sb
, tunnel_key
);
457 destroy_tnlids(&dp_tnlids
);
460 /* Delete southbound records without northbound matches. */
461 struct ovn_datapath
*od
, *next
;
462 LIST_FOR_EACH_SAFE (od
, next
, list
, &sb_only
) {
463 list_remove(&od
->list
);
464 sbrec_datapath_binding_delete(od
->sb
);
465 ovn_datapath_destroy(datapaths
, od
);
470 struct hmap_node key_node
; /* Index on 'key'. */
471 char *key
; /* nbs->name, nbr->name, sb->logical_port. */
472 char *json_key
; /* 'key', quoted for use in JSON. */
474 const struct nbrec_logical_port
*nbs
; /* May be NULL. */
475 const struct nbrec_logical_router_port
*nbr
; /* May be NULL. */
476 const struct sbrec_port_binding
*sb
; /* May be NULL. */
478 /* Logical router port data. */
479 ovs_be32 ip
, mask
; /* 192.168.10.123/24. */
480 ovs_be32 network
; /* 192.168.10.0. */
481 ovs_be32 bcast
; /* 192.168.10.255. */
483 struct ovn_port
*peer
;
485 struct ovn_datapath
*od
;
487 struct ovs_list list
; /* In list of similar records. */
490 static struct ovn_port
*
491 ovn_port_create(struct hmap
*ports
, const char *key
,
492 const struct nbrec_logical_port
*nbs
,
493 const struct nbrec_logical_router_port
*nbr
,
494 const struct sbrec_port_binding
*sb
)
496 struct ovn_port
*op
= xzalloc(sizeof *op
);
498 struct ds json_key
= DS_EMPTY_INITIALIZER
;
499 json_string_escape(key
, &json_key
);
500 op
->json_key
= ds_steal_cstr(&json_key
);
502 op
->key
= xstrdup(key
);
506 hmap_insert(ports
, &op
->key_node
, hash_string(op
->key
, 0));
511 ovn_port_destroy(struct hmap
*ports
, struct ovn_port
*port
)
514 /* Don't remove port->list. It is used within build_ports() as a
515 * private list and once we've exited that function it is not safe to
517 hmap_remove(ports
, &port
->key_node
);
518 free(port
->json_key
);
524 static struct ovn_port
*
525 ovn_port_find(struct hmap
*ports
, const char *name
)
529 HMAP_FOR_EACH_WITH_HASH (op
, key_node
, hash_string(name
, 0), ports
) {
530 if (!strcmp(op
->key
, name
)) {
538 ovn_port_allocate_key(struct ovn_datapath
*od
)
540 return allocate_tnlid(&od
->port_tnlids
, "port",
541 (1u << 15) - 1, &od
->port_key_hint
);
545 join_logical_ports(struct northd_context
*ctx
,
546 struct hmap
*datapaths
, struct hmap
*ports
,
547 struct ovs_list
*sb_only
, struct ovs_list
*nb_only
,
548 struct ovs_list
*both
)
555 const struct sbrec_port_binding
*sb
;
556 SBREC_PORT_BINDING_FOR_EACH (sb
, ctx
->ovnsb_idl
) {
557 struct ovn_port
*op
= ovn_port_create(ports
, sb
->logical_port
,
559 list_push_back(sb_only
, &op
->list
);
562 struct ovn_datapath
*od
;
563 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
565 for (size_t i
= 0; i
< od
->nbs
->n_ports
; i
++) {
566 const struct nbrec_logical_port
*nbs
= od
->nbs
->ports
[i
];
567 struct ovn_port
*op
= ovn_port_find(ports
, nbs
->name
);
569 if (op
->nbs
|| op
->nbr
) {
570 static struct vlog_rate_limit rl
571 = VLOG_RATE_LIMIT_INIT(5, 1);
572 VLOG_WARN_RL(&rl
, "duplicate logical port %s",
577 list_remove(&op
->list
);
578 list_push_back(both
, &op
->list
);
580 op
= ovn_port_create(ports
, nbs
->name
, nbs
, NULL
, NULL
);
581 list_push_back(nb_only
, &op
->list
);
587 for (size_t i
= 0; i
< od
->nbr
->n_ports
; i
++) {
588 const struct nbrec_logical_router_port
*nbr
592 if (!eth_addr_from_string(nbr
->mac
, &mac
)) {
593 static struct vlog_rate_limit rl
594 = VLOG_RATE_LIMIT_INIT(5, 1);
595 VLOG_WARN_RL(&rl
, "bad 'mac' %s", nbr
->mac
);
600 char *error
= ip_parse_masked(nbr
->network
, &ip
, &mask
);
601 if (error
|| mask
== OVS_BE32_MAX
|| !ip_is_cidr(mask
)) {
602 static struct vlog_rate_limit rl
603 = VLOG_RATE_LIMIT_INIT(5, 1);
604 VLOG_WARN_RL(&rl
, "bad 'network' %s", nbr
->network
);
609 struct ovn_port
*op
= ovn_port_find(ports
, nbr
->name
);
611 if (op
->nbs
|| op
->nbr
) {
612 static struct vlog_rate_limit rl
613 = VLOG_RATE_LIMIT_INIT(5, 1);
614 VLOG_WARN_RL(&rl
, "duplicate logical router port %s",
619 list_remove(&op
->list
);
620 list_push_back(both
, &op
->list
);
622 op
= ovn_port_create(ports
, nbr
->name
, NULL
, nbr
, NULL
);
623 list_push_back(nb_only
, &op
->list
);
628 op
->network
= ip
& mask
;
629 op
->bcast
= ip
| ~mask
;
634 /* If 'od' has a gateway and 'op' routes to it... */
635 if (od
->gateway
&& !((op
->network
^ od
->gateway
) & op
->mask
)) {
636 /* ...and if 'op' is a longer match than the current
638 const struct ovn_port
*gw
= od
->gateway_port
;
639 int len
= gw
? ip_count_cidr_bits(gw
->mask
) : 0;
640 if (ip_count_cidr_bits(op
->mask
) > len
) {
641 /* ...then it's the default gateway port. */
642 od
->gateway_port
= op
;
649 /* Connect logical router ports, and logical switch ports of type "router",
652 HMAP_FOR_EACH (op
, key_node
, ports
) {
653 if (op
->nbs
&& !strcmp(op
->nbs
->type
, "router")) {
654 const char *peer_name
= smap_get(&op
->nbs
->options
, "router-port");
659 struct ovn_port
*peer
= ovn_port_find(ports
, peer_name
);
660 if (!peer
|| !peer
->nbr
) {
666 op
->od
->router_ports
= xrealloc(
667 op
->od
->router_ports
,
668 sizeof *op
->od
->router_ports
* (op
->od
->n_router_ports
+ 1));
669 op
->od
->router_ports
[op
->od
->n_router_ports
++] = op
;
670 } else if (op
->nbr
&& op
->nbr
->peer
) {
671 op
->peer
= ovn_port_find(ports
, op
->nbr
->name
);
677 ovn_port_update_sbrec(const struct ovn_port
*op
)
679 sbrec_port_binding_set_datapath(op
->sb
, op
->od
->sb
);
681 sbrec_port_binding_set_type(op
->sb
, "patch");
683 const char *peer
= op
->peer
? op
->peer
->key
: "<error>";
684 const struct smap ids
= SMAP_CONST1(&ids
, "peer", peer
);
685 sbrec_port_binding_set_options(op
->sb
, &ids
);
687 sbrec_port_binding_set_parent_port(op
->sb
, NULL
);
688 sbrec_port_binding_set_tag(op
->sb
, NULL
, 0);
689 sbrec_port_binding_set_mac(op
->sb
, NULL
, 0);
691 if (strcmp(op
->nbs
->type
, "router")) {
692 sbrec_port_binding_set_type(op
->sb
, op
->nbs
->type
);
693 sbrec_port_binding_set_options(op
->sb
, &op
->nbs
->options
);
695 sbrec_port_binding_set_type(op
->sb
, "patch");
697 const char *router_port
= smap_get(&op
->nbs
->options
,
700 router_port
= "<error>";
702 const struct smap ids
= SMAP_CONST1(&ids
, "peer", router_port
);
703 sbrec_port_binding_set_options(op
->sb
, &ids
);
705 sbrec_port_binding_set_parent_port(op
->sb
, op
->nbs
->parent_name
);
706 sbrec_port_binding_set_tag(op
->sb
, op
->nbs
->tag
, op
->nbs
->n_tag
);
707 sbrec_port_binding_set_mac(op
->sb
, (const char **) op
->nbs
->addresses
,
708 op
->nbs
->n_addresses
);
712 /* Updates the southbound Port_Binding table so that it contains the logical
713 * ports specified by the northbound database.
715 * Initializes 'ports' to contain a "struct ovn_port" for every logical port,
716 * using the "struct ovn_datapath"s in 'datapaths' to look up logical
719 build_ports(struct northd_context
*ctx
, struct hmap
*datapaths
,
722 struct ovs_list sb_only
, nb_only
, both
;
724 join_logical_ports(ctx
, datapaths
, ports
, &sb_only
, &nb_only
, &both
);
726 /* For logical ports that are in both databases, update the southbound
727 * record based on northbound data. Also index the in-use tunnel_keys. */
728 struct ovn_port
*op
, *next
;
729 LIST_FOR_EACH_SAFE (op
, next
, list
, &both
) {
730 ovn_port_update_sbrec(op
);
732 add_tnlid(&op
->od
->port_tnlids
, op
->sb
->tunnel_key
);
733 if (op
->sb
->tunnel_key
> op
->od
->port_key_hint
) {
734 op
->od
->port_key_hint
= op
->sb
->tunnel_key
;
738 /* Add southbound record for each unmatched northbound record. */
739 LIST_FOR_EACH_SAFE (op
, next
, list
, &nb_only
) {
740 uint16_t tunnel_key
= ovn_port_allocate_key(op
->od
);
745 op
->sb
= sbrec_port_binding_insert(ctx
->ovnsb_txn
);
746 ovn_port_update_sbrec(op
);
748 sbrec_port_binding_set_logical_port(op
->sb
, op
->key
);
749 sbrec_port_binding_set_tunnel_key(op
->sb
, tunnel_key
);
752 /* Delete southbound records without northbound matches. */
753 LIST_FOR_EACH_SAFE(op
, next
, list
, &sb_only
) {
754 list_remove(&op
->list
);
755 sbrec_port_binding_delete(op
->sb
);
756 ovn_port_destroy(ports
, op
);
760 #define OVN_MIN_MULTICAST 32768
761 #define OVN_MAX_MULTICAST 65535
763 struct multicast_group
{
765 uint16_t key
; /* OVN_MIN_MULTICAST...OVN_MAX_MULTICAST. */
768 #define MC_FLOOD "_MC_flood"
769 static const struct multicast_group mc_flood
= { MC_FLOOD
, 65535 };
771 #define MC_UNKNOWN "_MC_unknown"
772 static const struct multicast_group mc_unknown
= { MC_UNKNOWN
, 65534 };
775 multicast_group_equal(const struct multicast_group
*a
,
776 const struct multicast_group
*b
)
778 return !strcmp(a
->name
, b
->name
) && a
->key
== b
->key
;
781 /* Multicast group entry. */
782 struct ovn_multicast
{
783 struct hmap_node hmap_node
; /* Index on 'datapath' and 'key'. */
784 struct ovn_datapath
*datapath
;
785 const struct multicast_group
*group
;
787 struct ovn_port
**ports
;
788 size_t n_ports
, allocated_ports
;
792 ovn_multicast_hash(const struct ovn_datapath
*datapath
,
793 const struct multicast_group
*group
)
795 return hash_pointer(datapath
, group
->key
);
798 static struct ovn_multicast
*
799 ovn_multicast_find(struct hmap
*mcgroups
, struct ovn_datapath
*datapath
,
800 const struct multicast_group
*group
)
802 struct ovn_multicast
*mc
;
804 HMAP_FOR_EACH_WITH_HASH (mc
, hmap_node
,
805 ovn_multicast_hash(datapath
, group
), mcgroups
) {
806 if (mc
->datapath
== datapath
807 && multicast_group_equal(mc
->group
, group
)) {
815 ovn_multicast_add(struct hmap
*mcgroups
, const struct multicast_group
*group
,
816 struct ovn_port
*port
)
818 struct ovn_datapath
*od
= port
->od
;
819 struct ovn_multicast
*mc
= ovn_multicast_find(mcgroups
, od
, group
);
821 mc
= xmalloc(sizeof *mc
);
822 hmap_insert(mcgroups
, &mc
->hmap_node
, ovn_multicast_hash(od
, group
));
826 mc
->allocated_ports
= 4;
827 mc
->ports
= xmalloc(mc
->allocated_ports
* sizeof *mc
->ports
);
829 if (mc
->n_ports
>= mc
->allocated_ports
) {
830 mc
->ports
= x2nrealloc(mc
->ports
, &mc
->allocated_ports
,
833 mc
->ports
[mc
->n_ports
++] = port
;
837 ovn_multicast_destroy(struct hmap
*mcgroups
, struct ovn_multicast
*mc
)
840 hmap_remove(mcgroups
, &mc
->hmap_node
);
847 ovn_multicast_update_sbrec(const struct ovn_multicast
*mc
,
848 const struct sbrec_multicast_group
*sb
)
850 struct sbrec_port_binding
**ports
= xmalloc(mc
->n_ports
* sizeof *ports
);
851 for (size_t i
= 0; i
< mc
->n_ports
; i
++) {
852 ports
[i
] = CONST_CAST(struct sbrec_port_binding
*, mc
->ports
[i
]->sb
);
854 sbrec_multicast_group_set_ports(sb
, ports
, mc
->n_ports
);
858 /* Logical flow generation.
860 * This code generates the Logical_Flow table in the southbound database, as a
861 * function of most of the northbound database.
865 struct hmap_node hmap_node
;
867 struct ovn_datapath
*od
;
868 enum ovn_stage stage
;
875 ovn_lflow_hash(const struct ovn_lflow
*lflow
)
877 size_t hash
= uuid_hash(&lflow
->od
->key
);
878 hash
= hash_2words((lflow
->stage
<< 16) | lflow
->priority
, hash
);
879 hash
= hash_string(lflow
->match
, hash
);
880 return hash_string(lflow
->actions
, hash
);
884 ovn_lflow_equal(const struct ovn_lflow
*a
, const struct ovn_lflow
*b
)
886 return (a
->od
== b
->od
887 && a
->stage
== b
->stage
888 && a
->priority
== b
->priority
889 && !strcmp(a
->match
, b
->match
)
890 && !strcmp(a
->actions
, b
->actions
));
894 ovn_lflow_init(struct ovn_lflow
*lflow
, struct ovn_datapath
*od
,
895 enum ovn_stage stage
, uint16_t priority
,
896 char *match
, char *actions
)
899 lflow
->stage
= stage
;
900 lflow
->priority
= priority
;
901 lflow
->match
= match
;
902 lflow
->actions
= actions
;
905 /* Adds a row with the specified contents to the Logical_Flow table. */
907 ovn_lflow_add(struct hmap
*lflow_map
, struct ovn_datapath
*od
,
908 enum ovn_stage stage
, uint16_t priority
,
909 const char *match
, const char *actions
)
911 struct ovn_lflow
*lflow
= xmalloc(sizeof *lflow
);
912 ovn_lflow_init(lflow
, od
, stage
, priority
,
913 xstrdup(match
), xstrdup(actions
));
914 hmap_insert(lflow_map
, &lflow
->hmap_node
, ovn_lflow_hash(lflow
));
917 static struct ovn_lflow
*
918 ovn_lflow_find(struct hmap
*lflows
, struct ovn_datapath
*od
,
919 enum ovn_stage stage
, uint16_t priority
,
920 const char *match
, const char *actions
)
922 struct ovn_lflow target
;
923 ovn_lflow_init(&target
, od
, stage
, priority
,
924 CONST_CAST(char *, match
), CONST_CAST(char *, actions
));
926 struct ovn_lflow
*lflow
;
927 HMAP_FOR_EACH_WITH_HASH (lflow
, hmap_node
, ovn_lflow_hash(&target
),
929 if (ovn_lflow_equal(lflow
, &target
)) {
937 ovn_lflow_destroy(struct hmap
*lflows
, struct ovn_lflow
*lflow
)
940 hmap_remove(lflows
, &lflow
->hmap_node
);
942 free(lflow
->actions
);
947 struct ipv4_netaddr
{
952 struct ipv6_netaddr
{
953 struct in6_addr addr
;
957 struct lport_addresses
{
960 struct ipv4_netaddr
*ipv4_addrs
;
962 struct ipv6_netaddr
*ipv6_addrs
;
966 * Extracts the mac, ipv4 and ipv6 addresses from the input param 'address'
967 * which should be of the format 'MAC [IP1 IP2 ..]" where IPn should be
968 * a valid IPv4 or IPv6 address and stores them in the 'ipv4_addrs' and
969 * 'ipv6_addrs' fields of input param 'laddrs'.
970 * The caller has to free the 'ipv4_addrs' and 'ipv6_addrs' fields.
971 * If input param 'store_ipv6' is true only then extracted ipv6 addresses
972 * are stored in 'ipv6_addrs' fields.
973 * Return true if at least 'MAC' is found in 'address', false otherwise.
975 * If 'address' = '00:00:00:00:00:01 10.0.0.4 fe80::ea2a:eaff:fe28:3390/64
976 * 30.0.0.3/23' and 'store_ipv6' = true
977 * then returns true with laddrs->n_ipv4_addrs = 2, naddrs->n_ipv6_addrs = 1.
980 * If 'address' = '00:00:00:00:00:01 10.0.0.4 fe80::ea2a:eaff:fe28:3390/64
981 * 30.0.0.3/23' and 'store_ipv6' = false
982 * then returns true with laddrs->n_ipv4_addrs = 2, naddrs->n_ipv6_addrs = 0.
984 * Eg 3. If 'address' = '00:00:00:00:00:01 10.0.0.4 addr 30.0.0.4', then
985 * returns true with laddrs->n_ipv4_addrs = 1 and laddrs->n_ipv6_addrs = 0.
988 extract_lport_addresses(char *address
, struct lport_addresses
*laddrs
,
993 char *buf_end
= buf
+ strlen(address
);
994 if (!ovs_scan_len(buf
, &buf_index
, ETH_ADDR_SCAN_FMT
,
995 ETH_ADDR_SCAN_ARGS(laddrs
->ea
))) {
1000 struct in6_addr ip6
;
1004 laddrs
->n_ipv4_addrs
= 0;
1005 laddrs
->n_ipv6_addrs
= 0;
1006 laddrs
->ipv4_addrs
= NULL
;
1007 laddrs
->ipv6_addrs
= NULL
;
1009 /* Loop through the buffer and extract the IPv4/IPv6 addresses
1010 * and store in the 'laddrs'. Break the loop if invalid data is found.
1013 while (buf
< buf_end
) {
1015 error
= ip_parse_cidr_len(buf
, &buf_index
, &ip4
, &plen
);
1017 laddrs
->n_ipv4_addrs
++;
1018 laddrs
->ipv4_addrs
= xrealloc(
1020 sizeof (struct ipv4_netaddr
) * laddrs
->n_ipv4_addrs
);
1021 laddrs
->ipv4_addrs
[laddrs
->n_ipv4_addrs
- 1].addr
= ip4
;
1022 laddrs
->ipv4_addrs
[laddrs
->n_ipv4_addrs
- 1].plen
= plen
;
1027 error
= ipv6_parse_cidr_len(buf
, &buf_index
, &ip6
, &plen
);
1028 if (!error
&& store_ipv6
) {
1029 laddrs
->n_ipv6_addrs
++;
1030 laddrs
->ipv6_addrs
= xrealloc(
1032 sizeof(struct ipv6_netaddr
) * laddrs
->n_ipv6_addrs
);
1033 memcpy(&laddrs
->ipv6_addrs
[laddrs
->n_ipv6_addrs
- 1].addr
, &ip6
,
1034 sizeof(struct in6_addr
));
1035 laddrs
->ipv6_addrs
[laddrs
->n_ipv6_addrs
- 1].plen
= plen
;
1039 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
1040 VLOG_INFO_RL(&rl
, "invalid syntax '%s' in address", address
);
1050 /* Appends port security constraints on L2 address field 'eth_addr_field'
1051 * (e.g. "eth.src" or "eth.dst") to 'match'. 'port_security', with
1052 * 'n_port_security' elements, is the collection of port_security constraints
1053 * from an OVN_NB Logical_Port row. */
1055 build_port_security_l2(const char *eth_addr_field
,
1056 char **port_security
, size_t n_port_security
,
1059 size_t base_len
= match
->length
;
1060 ds_put_format(match
, " && %s == {", eth_addr_field
);
1063 for (size_t i
= 0; i
< n_port_security
; i
++) {
1066 if (eth_addr_from_string(port_security
[i
], &ea
)) {
1067 ds_put_format(match
, ETH_ADDR_FMT
, ETH_ADDR_ARGS(ea
));
1068 ds_put_char(match
, ' ');
1072 ds_chomp(match
, ' ');
1073 ds_put_cstr(match
, "}");
1076 match
->length
= base_len
;
1081 build_port_security_ipv6_nd_flow(
1082 struct ds
*match
, struct eth_addr ea
, struct ipv6_netaddr
*ipv6_addrs
,
1085 ds_put_format(match
, " && ip6 && nd && ((nd.sll == "ETH_ADDR_FMT
" || "
1086 "nd.sll == "ETH_ADDR_FMT
") || ((nd.tll == "ETH_ADDR_FMT
" || "
1087 "nd.tll == "ETH_ADDR_FMT
")", ETH_ADDR_ARGS(eth_addr_zero
),
1088 ETH_ADDR_ARGS(ea
), ETH_ADDR_ARGS(eth_addr_zero
),
1090 if (!n_ipv6_addrs
) {
1091 ds_put_cstr(match
, "))");
1095 char ip6_str
[INET6_ADDRSTRLEN
+ 1];
1096 struct in6_addr lla
;
1097 in6_generate_lla(ea
, &lla
);
1098 memset(ip6_str
, 0, sizeof(ip6_str
));
1099 ipv6_string_mapped(ip6_str
, &lla
);
1100 ds_put_format(match
, " && (nd.target == %s", ip6_str
);
1102 for(int i
= 0; i
< n_ipv6_addrs
; i
++) {
1103 memset(ip6_str
, 0, sizeof(ip6_str
));
1104 ipv6_string_mapped(ip6_str
, &ipv6_addrs
[i
].addr
);
1105 ds_put_format(match
, " || nd.target == %s", ip6_str
);
1108 ds_put_format(match
, ")))");
1112 build_port_security_ipv6_flow(
1113 enum ovn_pipeline pipeline
, struct ds
*match
, struct eth_addr ea
,
1114 struct ipv6_netaddr
*ipv6_addrs
, int n_ipv6_addrs
)
1116 char ip6_str
[INET6_ADDRSTRLEN
+ 1];
1118 ds_put_format(match
, " && %s == {",
1119 pipeline
== P_IN
? "ip6.src" : "ip6.dst");
1121 /* Allow link-local address. */
1122 struct in6_addr lla
;
1123 in6_generate_lla(ea
, &lla
);
1124 ipv6_string_mapped(ip6_str
, &lla
);
1125 ds_put_format(match
, "%s, ", ip6_str
);
1127 /* Allow ip6.src=:: and ip6.dst=ff00::/8 for ND packets */
1128 ds_put_cstr(match
, pipeline
== P_IN
? "::" : "ff00::/8");
1129 for(int i
= 0; i
< n_ipv6_addrs
; i
++) {
1130 ipv6_string_mapped(ip6_str
, &ipv6_addrs
[i
].addr
);
1131 ds_put_format(match
, ", %s", ip6_str
);
1133 ds_put_cstr(match
, "}");
1137 * Build port security constraints on ARP and IPv6 ND fields
1138 * and add logical flows to S_SWITCH_IN_PORT_SEC_ND stage.
1140 * For each port security of the logical port, following
1141 * logical flows are added
1142 * - If the port security has no IP (both IPv4 and IPv6) or
1143 * if it has IPv4 address(es)
1144 * - Priority 90 flow to allow ARP packets for known MAC addresses
1145 * in the eth.src and arp.spa fields. If the port security
1146 * has IPv4 addresses, allow known IPv4 addresses in the arp.tpa field.
1148 * - If the port security has no IP (both IPv4 and IPv6) or
1149 * if it has IPv6 address(es)
1150 * - Priority 90 flow to allow IPv6 ND packets for known MAC addresses
1151 * in the eth.src and nd.sll/nd.tll fields. If the port security
1152 * has IPv6 addresses, allow known IPv6 addresses in the nd.target field
1153 * for IPv6 Neighbor Advertisement packet.
1155 * - Priority 80 flow to drop ARP and IPv6 ND packets.
1158 build_port_security_nd(struct ovn_port
*op
, struct hmap
*lflows
)
1160 for (size_t i
= 0; i
< op
->nbs
->n_port_security
; i
++) {
1161 struct lport_addresses ps
;
1162 if (!extract_lport_addresses(op
->nbs
->port_security
[i
], &ps
, true)) {
1163 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
1164 VLOG_INFO_RL(&rl
, "invalid syntax '%s' in port security. No MAC"
1165 " address found", op
->nbs
->port_security
[i
]);
1169 bool no_ip
= !(ps
.n_ipv4_addrs
|| ps
.n_ipv6_addrs
);
1170 struct ds match
= DS_EMPTY_INITIALIZER
;
1172 if (ps
.n_ipv4_addrs
|| no_ip
) {
1174 &match
, "inport == %s && eth.src == "ETH_ADDR_FMT
" && arp.sha == "
1175 ETH_ADDR_FMT
, op
->json_key
, ETH_ADDR_ARGS(ps
.ea
),
1176 ETH_ADDR_ARGS(ps
.ea
));
1178 if (ps
.n_ipv4_addrs
) {
1179 ds_put_cstr(&match
, " && (");
1180 for (size_t i
= 0; i
< ps
.n_ipv4_addrs
; i
++) {
1181 ds_put_format(&match
, "arp.spa == "IP_FMT
" || ",
1182 IP_ARGS(ps
.ipv4_addrs
[i
].addr
));
1184 ds_chomp(&match
, ' ');
1185 ds_chomp(&match
, '|');
1186 ds_chomp(&match
, '|');
1187 ds_put_cstr(&match
, ")");
1189 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_PORT_SEC_ND
, 90,
1190 ds_cstr(&match
), "next;");
1194 if (ps
.n_ipv6_addrs
|| no_ip
) {
1196 ds_put_format(&match
, "inport == %s && eth.src == "ETH_ADDR_FMT
,
1197 op
->json_key
, ETH_ADDR_ARGS(ps
.ea
));
1198 build_port_security_ipv6_nd_flow(&match
, ps
.ea
, ps
.ipv6_addrs
,
1200 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_PORT_SEC_ND
, 90,
1201 ds_cstr(&match
), "next;");
1204 free(ps
.ipv4_addrs
);
1205 free(ps
.ipv6_addrs
);
1208 char *match
= xasprintf("inport == %s && (arp || nd)", op
->json_key
);
1209 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_PORT_SEC_ND
, 80,
1215 * Build port security constraints on IPv4 and IPv6 src and dst fields
1216 * and add logical flows to S_SWITCH_(IN/OUT)_PORT_SEC_IP stage.
1218 * For each port security of the logical port, following
1219 * logical flows are added
1220 * - If the port security has IPv4 addresses,
1221 * - Priority 90 flow to allow IPv4 packets for known IPv4 addresses
1223 * - If the port security has IPv6 addresses,
1224 * - Priority 90 flow to allow IPv6 packets for known IPv6 addresses
1226 * - If the port security has IPv4 addresses or IPv6 addresses or both
1227 * - Priority 80 flow to drop all IPv4 and IPv6 traffic
1230 build_port_security_ip(enum ovn_pipeline pipeline
, struct ovn_port
*op
,
1231 struct hmap
*lflows
)
1233 char *port_direction
;
1234 enum ovn_stage stage
;
1235 if (pipeline
== P_IN
) {
1236 port_direction
= "inport";
1237 stage
= S_SWITCH_IN_PORT_SEC_IP
;
1239 port_direction
= "outport";
1240 stage
= S_SWITCH_OUT_PORT_SEC_IP
;
1243 for (size_t i
= 0; i
< op
->nbs
->n_port_security
; i
++) {
1244 struct lport_addresses ps
;
1245 if (!extract_lport_addresses(op
->nbs
->port_security
[i
], &ps
, true)) {
1249 if (!(ps
.n_ipv4_addrs
|| ps
.n_ipv6_addrs
)) {
1253 if (ps
.n_ipv4_addrs
) {
1254 struct ds match
= DS_EMPTY_INITIALIZER
;
1255 if (pipeline
== P_IN
) {
1256 ds_put_format(&match
, "inport == %s && eth.src == "ETH_ADDR_FMT
1257 " && ip4.src == {0.0.0.0, ", op
->json_key
,
1258 ETH_ADDR_ARGS(ps
.ea
));
1260 ds_put_format(&match
, "outport == %s && eth.dst == "ETH_ADDR_FMT
1261 " && ip4.dst == {255.255.255.255, 224.0.0.0/4, ",
1262 op
->json_key
, ETH_ADDR_ARGS(ps
.ea
));
1265 for (int i
= 0; i
< ps
.n_ipv4_addrs
; i
++) {
1266 ds_put_format(&match
, IP_FMT
", ", IP_ARGS(ps
.ipv4_addrs
[i
].addr
));
1269 /* Replace ", " by "}". */
1270 ds_chomp(&match
, ' ');
1271 ds_chomp(&match
, ',');
1272 ds_put_cstr(&match
, "}");
1273 ovn_lflow_add(lflows
, op
->od
, stage
, 90, ds_cstr(&match
), "next;");
1275 free(ps
.ipv4_addrs
);
1278 if (ps
.n_ipv6_addrs
) {
1279 struct ds match
= DS_EMPTY_INITIALIZER
;
1280 ds_put_format(&match
, "%s == %s && %s == "ETH_ADDR_FMT
"",
1281 port_direction
, op
->json_key
,
1282 pipeline
== P_IN
? "eth.src" : "eth.dst",
1283 ETH_ADDR_ARGS(ps
.ea
));
1284 build_port_security_ipv6_flow(pipeline
, &match
, ps
.ea
,
1285 ps
.ipv6_addrs
, ps
.n_ipv6_addrs
);
1286 ovn_lflow_add(lflows
, op
->od
, stage
, 90,
1287 ds_cstr(&match
), "next;");
1289 free(ps
.ipv6_addrs
);
1292 char *match
= xasprintf(
1293 "%s == %s && %s == "ETH_ADDR_FMT
" && ip", port_direction
,
1294 op
->json_key
, pipeline
== P_IN
? "eth.src" : "eth.dst",
1295 ETH_ADDR_ARGS(ps
.ea
));
1296 ovn_lflow_add(lflows
, op
->od
, stage
, 80, match
, "drop;");
1302 lport_is_enabled(const struct nbrec_logical_port
*lport
)
1304 return !lport
->enabled
|| *lport
->enabled
;
1308 lport_is_up(const struct nbrec_logical_port
*lport
)
1310 return !lport
->up
|| *lport
->up
;
1314 has_stateful_acl(struct ovn_datapath
*od
)
1316 for (size_t i
= 0; i
< od
->nbs
->n_acls
; i
++) {
1317 struct nbrec_acl
*acl
= od
->nbs
->acls
[i
];
1318 if (!strcmp(acl
->action
, "allow-related")) {
1327 build_acls(struct ovn_datapath
*od
, struct hmap
*lflows
, struct hmap
*ports
)
1329 bool has_stateful
= has_stateful_acl(od
);
1330 struct ovn_port
*op
;
1331 struct ds match_in
, match_out
;
1333 /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are
1334 * allowed by default. */
1335 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_PRE_ACL
, 0, "1", "next;");
1336 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_PRE_ACL
, 0, "1", "next;");
1338 /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
1339 * default. A related rule at priority 1 is added below if there
1340 * are any stateful ACLs in this datapath. */
1341 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_ACL
, 0, "1", "next;");
1342 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_ACL
, 0, "1", "next;");
1344 /* If there are any stateful ACL rules in this dapapath, we must
1345 * send all IP packets through the conntrack action, which handles
1346 * defragmentation, in order to match L4 headers. */
1348 HMAP_FOR_EACH (op
, key_node
, ports
) {
1349 if (op
->od
== od
&& !strcmp(op
->nbs
->type
, "router")) {
1350 /* Can't use ct() for router ports. Consider the following configuration:
1351 lp1(10.0.0.2) on hostA--ls1--lr0--ls2--lp2(10.0.1.2) on hostB,
1352 For a ping from lp1 to lp2, First, the response will go through ct()
1353 with a zone for lp2 in the ls2 ingress pipeline on hostB.
1354 That ct zone knows about this connection. Next, it goes through ct()
1355 with the zone for the router port in the egress pipeline of ls2 on hostB.
1356 This zone does not know about the connection, as the icmp request
1357 went through the logical router on hostA, not hostB. This would only work
1358 with distributed conntrack state across all chassis. */
1361 ds_init(&match_out
);
1362 ds_put_format(&match_in
, "ip && inport == %s", op
->json_key
);
1363 ds_put_format(&match_out
, "ip && outport == %s", op
->json_key
);
1364 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_PRE_ACL
, 110, ds_cstr(&match_in
), "next;");
1365 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_PRE_ACL
, 110, ds_cstr(&match_out
), "next;");
1367 ds_destroy(&match_in
);
1368 ds_destroy(&match_out
);
1372 /* Ingress and Egress Pre-ACL Table (Priority 100).
1374 * Regardless of whether the ACL is "from-lport" or "to-lport",
1375 * we need rules in both the ingress and egress table, because
1376 * the return traffic needs to be followed. */
1377 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_PRE_ACL
, 100, "ip", "ct_next;");
1378 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_PRE_ACL
, 100, "ip", "ct_next;");
1380 /* Ingress and Egress ACL Table (Priority 1).
1382 * By default, traffic is allowed. This is partially handled by
1383 * the Priority 0 ACL flows added earlier, but we also need to
1384 * commit IP flows. This is because, while the initiater's
1385 * direction may not have any stateful rules, the server's may
1386 * and then its return traffic would not have an associated
1387 * conntrack entry and would return "+invalid". */
1388 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_ACL
, 1, "ip",
1389 "ct_commit; next;");
1390 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_ACL
, 1, "ip",
1391 "ct_commit; next;");
1393 /* Ingress and Egress ACL Table (Priority 65535).
1395 * Always drop traffic that's in an invalid state. This is
1396 * enforced at a higher priority than ACLs can be defined. */
1397 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_ACL
, UINT16_MAX
,
1399 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_ACL
, UINT16_MAX
,
1402 /* Ingress and Egress ACL Table (Priority 65535).
1404 * Always allow traffic that is established to a committed
1405 * conntrack entry. This is enforced at a higher priority than
1406 * ACLs can be defined. */
1407 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_ACL
, UINT16_MAX
,
1408 "ct.est && !ct.rel && !ct.new && !ct.inv",
1410 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_ACL
, UINT16_MAX
,
1411 "ct.est && !ct.rel && !ct.new && !ct.inv",
1414 /* Ingress and Egress ACL Table (Priority 65535).
1416 * Always allow traffic that is related to an existing conntrack
1417 * entry. This is enforced at a higher priority than ACLs can
1420 * NOTE: This does not support related data sessions (eg,
1421 * a dynamically negotiated FTP data channel), but will allow
1422 * related traffic such as an ICMP Port Unreachable through
1423 * that's generated from a non-listening UDP port. */
1424 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_ACL
, UINT16_MAX
,
1425 "!ct.est && ct.rel && !ct.new && !ct.inv",
1427 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_ACL
, UINT16_MAX
,
1428 "!ct.est && ct.rel && !ct.new && !ct.inv",
1432 /* Ingress or Egress ACL Table (Various priorities). */
1433 for (size_t i
= 0; i
< od
->nbs
->n_acls
; i
++) {
1434 struct nbrec_acl
*acl
= od
->nbs
->acls
[i
];
1435 bool ingress
= !strcmp(acl
->direction
, "from-lport") ? true :false;
1436 enum ovn_stage stage
= ingress
? S_SWITCH_IN_ACL
: S_SWITCH_OUT_ACL
;
1438 if (!strcmp(acl
->action
, "allow")) {
1439 /* If there are any stateful flows, we must even commit "allow"
1440 * actions. This is because, while the initiater's
1441 * direction may not have any stateful rules, the server's
1442 * may and then its return traffic would not have an
1443 * associated conntrack entry and would return "+invalid". */
1444 const char *actions
= has_stateful
? "ct_commit; next;" : "next;";
1445 ovn_lflow_add(lflows
, od
, stage
,
1446 acl
->priority
+ OVN_ACL_PRI_OFFSET
,
1447 acl
->match
, actions
);
1448 } else if (!strcmp(acl
->action
, "allow-related")) {
1449 struct ds match
= DS_EMPTY_INITIALIZER
;
1451 /* Commit the connection tracking entry, which allows all
1452 * other traffic related to this entry to flow due to the
1453 * 65535 priority flow defined earlier. */
1454 ds_put_format(&match
, "ct.new && (%s)", acl
->match
);
1455 ovn_lflow_add(lflows
, od
, stage
,
1456 acl
->priority
+ OVN_ACL_PRI_OFFSET
,
1457 ds_cstr(&match
), "ct_commit; next;");
1460 } else if (!strcmp(acl
->action
, "drop")) {
1461 ovn_lflow_add(lflows
, od
, stage
,
1462 acl
->priority
+ OVN_ACL_PRI_OFFSET
,
1463 acl
->match
, "drop;");
1464 } else if (!strcmp(acl
->action
, "reject")) {
1465 /* xxx Need to support "reject". */
1466 VLOG_INFO("reject is not a supported action");
1467 ovn_lflow_add(lflows
, od
, stage
,
1468 acl
->priority
+ OVN_ACL_PRI_OFFSET
,
1469 acl
->match
, "drop;");
1475 build_lswitch_flows(struct hmap
*datapaths
, struct hmap
*ports
,
1476 struct hmap
*lflows
, struct hmap
*mcgroups
)
1478 /* This flow table structure is documented in ovn-northd(8), so please
1479 * update ovn-northd.8.xml if you change anything. */
1481 /* Build pre-ACL and ACL tables for both ingress and egress.
1482 * Ingress tables 3 and 4. Egress tables 0 and 1. */
1483 struct ovn_datapath
*od
;
1484 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
1489 build_acls(od
, lflows
, ports
);
1492 /* Logical switch ingress table 0: Admission control framework (priority
1494 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
1499 /* Logical VLANs not supported. */
1500 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_PORT_SEC_L2
, 100, "vlan.present",
1503 /* Broadcast/multicast source address is invalid. */
1504 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_PORT_SEC_L2
, 100, "eth.src[40]",
1507 /* Port security flows have priority 50 (see below) and will continue
1508 * to the next table if packet source is acceptable. */
1511 /* Logical switch ingress table 0: Ingress port security - L2
1513 * Ingress table 1: Ingress port security - IP (priority 90 and 80)
1514 * Ingress table 2: Ingress port security - ND (priority 90 and 80)
1516 struct ovn_port
*op
;
1517 HMAP_FOR_EACH (op
, key_node
, ports
) {
1522 if (!lport_is_enabled(op
->nbs
)) {
1523 /* Drop packets from disabled logical ports (since logical flow
1524 * tables are default-drop). */
1528 struct ds match
= DS_EMPTY_INITIALIZER
;
1529 ds_put_format(&match
, "inport == %s", op
->json_key
);
1530 build_port_security_l2(
1531 "eth.src", op
->nbs
->port_security
, op
->nbs
->n_port_security
,
1533 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_PORT_SEC_L2
, 50,
1534 ds_cstr(&match
), "next;");
1537 if (op
->nbs
->n_port_security
) {
1538 build_port_security_ip(P_IN
, op
, lflows
);
1539 build_port_security_nd(op
, lflows
);
1543 /* Ingress table 1 and 2: Port security - IP and ND, by default goto next.
1545 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
1550 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_PORT_SEC_ND
, 0, "1", "next;");
1551 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_PORT_SEC_IP
, 0, "1", "next;");
1554 /* Ingress table 3: ARP responder, skip requests coming from localnet ports.
1555 * (priority 100). */
1556 HMAP_FOR_EACH (op
, key_node
, ports
) {
1561 if (!strcmp(op
->nbs
->type
, "localnet")) {
1562 char *match
= xasprintf("inport == %s", op
->json_key
);
1563 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_ARP_RSP
, 100,
1569 /* Ingress table 5: ARP responder, reply for known IPs.
1571 HMAP_FOR_EACH (op
, key_node
, ports
) {
1577 * Add ARP reply flows if either the
1579 * - port type is router
1581 if (!lport_is_up(op
->nbs
) && strcmp(op
->nbs
->type
, "router")) {
1585 for (size_t i
= 0; i
< op
->nbs
->n_addresses
; i
++) {
1586 struct lport_addresses laddrs
;
1587 if (!extract_lport_addresses(op
->nbs
->addresses
[i
], &laddrs
,
1591 for (size_t j
= 0; j
< laddrs
.n_ipv4_addrs
; j
++) {
1592 char *match
= xasprintf(
1593 "arp.tpa == "IP_FMT
" && arp.op == 1",
1594 IP_ARGS(laddrs
.ipv4_addrs
[j
].addr
));
1595 char *actions
= xasprintf(
1596 "eth.dst = eth.src; "
1597 "eth.src = "ETH_ADDR_FMT
"; "
1598 "arp.op = 2; /* ARP reply */ "
1599 "arp.tha = arp.sha; "
1600 "arp.sha = "ETH_ADDR_FMT
"; "
1601 "arp.tpa = arp.spa; "
1602 "arp.spa = "IP_FMT
"; "
1603 "outport = inport; "
1604 "inport = \"\"; /* Allow sending out inport. */ "
1606 ETH_ADDR_ARGS(laddrs
.ea
),
1607 ETH_ADDR_ARGS(laddrs
.ea
),
1608 IP_ARGS(laddrs
.ipv4_addrs
[j
].addr
));
1609 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_ARP_RSP
, 50,
1615 free(laddrs
.ipv4_addrs
);
1619 /* Ingress table 5: ARP responder, by default goto next.
1621 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
1626 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_ARP_RSP
, 0, "1", "next;");
1629 /* Ingress table 6: Destination lookup, broadcast and multicast handling
1630 * (priority 100). */
1631 HMAP_FOR_EACH (op
, key_node
, ports
) {
1636 if (lport_is_enabled(op
->nbs
)) {
1637 ovn_multicast_add(mcgroups
, &mc_flood
, op
);
1640 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
1645 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_L2_LKUP
, 100, "eth.mcast",
1646 "outport = \""MC_FLOOD
"\"; output;");
1649 /* Ingress table 6: Destination lookup, unicast handling (priority 50), */
1650 HMAP_FOR_EACH (op
, key_node
, ports
) {
1655 for (size_t i
= 0; i
< op
->nbs
->n_addresses
; i
++) {
1656 struct eth_addr mac
;
1658 if (eth_addr_from_string(op
->nbs
->addresses
[i
], &mac
)) {
1659 struct ds match
, actions
;
1662 ds_put_format(&match
, "eth.dst == "ETH_ADDR_FMT
,
1663 ETH_ADDR_ARGS(mac
));
1666 ds_put_format(&actions
, "outport = %s; output;", op
->json_key
);
1667 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_L2_LKUP
, 50,
1668 ds_cstr(&match
), ds_cstr(&actions
));
1669 ds_destroy(&actions
);
1671 } else if (!strcmp(op
->nbs
->addresses
[i
], "unknown")) {
1672 if (lport_is_enabled(op
->nbs
)) {
1673 ovn_multicast_add(mcgroups
, &mc_unknown
, op
);
1674 op
->od
->has_unknown
= true;
1677 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
1680 "%s: invalid syntax '%s' in addresses column",
1681 op
->nbs
->name
, op
->nbs
->addresses
[i
]);
1686 /* Ingress table 6: Destination lookup for unknown MACs (priority 0). */
1687 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
1692 if (od
->has_unknown
) {
1693 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_L2_LKUP
, 0, "1",
1694 "outport = \""MC_UNKNOWN
"\"; output;");
1698 /* Egress table 2: Egress port security - IP (priority 0)
1699 * port security L2 - multicast/broadcast (priority
1701 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
1706 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_PORT_SEC_IP
, 0, "1", "next;");
1707 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_PORT_SEC_L2
, 100, "eth.mcast",
1711 /* Egress table 2: Egress port security - IP (priorities 90 and 80)
1712 * if port security enabled.
1714 * Egress table 3: Egress port security - L2 (priorities 50 and 150).
1716 * Priority 50 rules implement port security for enabled logical port.
1718 * Priority 150 rules drop packets to disabled logical ports, so that they
1719 * don't even receive multicast or broadcast packets. */
1720 HMAP_FOR_EACH (op
, key_node
, ports
) {
1725 struct ds match
= DS_EMPTY_INITIALIZER
;
1726 ds_put_format(&match
, "outport == %s", op
->json_key
);
1727 if (lport_is_enabled(op
->nbs
)) {
1728 build_port_security_l2("eth.dst", op
->nbs
->port_security
,
1729 op
->nbs
->n_port_security
, &match
);
1730 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_OUT_PORT_SEC_L2
, 50,
1731 ds_cstr(&match
), "output;");
1733 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_OUT_PORT_SEC_L2
, 150,
1734 ds_cstr(&match
), "drop;");
1739 if (op
->nbs
->n_port_security
) {
1740 build_port_security_ip(P_OUT
, op
, lflows
);
1746 lrport_is_enabled(const struct nbrec_logical_router_port
*lrport
)
1748 return !lrport
->enabled
|| *lrport
->enabled
;
1752 add_route(struct hmap
*lflows
, const struct ovn_port
*op
,
1753 ovs_be32 network
, ovs_be32 mask
, ovs_be32 gateway
)
1755 char *match
= xasprintf("ip4.dst == "IP_FMT
"/"IP_FMT
,
1756 IP_ARGS(network
), IP_ARGS(mask
));
1758 struct ds actions
= DS_EMPTY_INITIALIZER
;
1759 ds_put_cstr(&actions
, "ip.ttl--; reg0 = ");
1761 ds_put_format(&actions
, IP_FMT
, IP_ARGS(gateway
));
1763 ds_put_cstr(&actions
, "ip4.dst");
1765 ds_put_format(&actions
,
1768 "eth.src = "ETH_ADDR_FMT
"; "
1771 IP_ARGS(op
->ip
), ETH_ADDR_ARGS(op
->mac
), op
->json_key
);
1773 /* The priority here is calculated to implement longest-prefix-match
1775 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_IP_ROUTING
,
1776 count_1bits(ntohl(mask
)), match
, ds_cstr(&actions
));
1777 ds_destroy(&actions
);
1782 build_lrouter_flows(struct hmap
*datapaths
, struct hmap
*ports
,
1783 struct hmap
*lflows
)
1785 /* This flow table structure is documented in ovn-northd(8), so please
1786 * update ovn-northd.8.xml if you change anything. */
1788 /* Logical router ingress table 0: Admission control framework. */
1789 struct ovn_datapath
*od
;
1790 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
1795 /* Logical VLANs not supported.
1796 * Broadcast/multicast source address is invalid. */
1797 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_ADMISSION
, 100,
1798 "vlan.present || eth.src[40]", "drop;");
1801 /* Logical router ingress table 0: match (priority 50). */
1802 struct ovn_port
*op
;
1803 HMAP_FOR_EACH (op
, key_node
, ports
) {
1808 if (!lrport_is_enabled(op
->nbr
)) {
1809 /* Drop packets from disabled logical ports (since logical flow
1810 * tables are default-drop). */
1814 char *match
= xasprintf(
1815 "(eth.mcast || eth.dst == "ETH_ADDR_FMT
") && inport == %s",
1816 ETH_ADDR_ARGS(op
->mac
), op
->json_key
);
1817 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_ADMISSION
, 50,
1822 /* Logical router ingress table 1: IP Input. */
1823 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
1828 /* L3 admission control: drop multicast and broadcast source, localhost
1829 * source or destination, and zero network source or destination
1830 * (priority 100). */
1831 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_IP_INPUT
, 100,
1833 "ip4.src == 255.255.255.255 || "
1834 "ip4.src == 127.0.0.0/8 || "
1835 "ip4.dst == 127.0.0.0/8 || "
1836 "ip4.src == 0.0.0.0/8 || "
1837 "ip4.dst == 0.0.0.0/8",
1840 /* ARP reply handling. Use ARP replies to populate the logical
1841 * router's ARP table. */
1842 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_IP_INPUT
, 90, "arp.op == 2",
1843 "put_arp(inport, arp.spa, arp.sha);");
1845 /* Drop Ethernet local broadcast. By definition this traffic should
1846 * not be forwarded.*/
1847 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_IP_INPUT
, 50,
1848 "eth.bcast", "drop;");
1850 /* Drop IP multicast. */
1851 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_IP_INPUT
, 50,
1852 "ip4.mcast", "drop;");
1856 * XXX Need to send ICMP time exceeded if !ip.later_frag. */
1857 char *match
= xasprintf("ip4 && ip.ttl == {0, 1}");
1858 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_IP_INPUT
, 30, match
, "drop;");
1861 /* Pass other traffic not already handled to the next table for
1863 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_IP_INPUT
, 0, "1", "next;");
1866 HMAP_FOR_EACH (op
, key_node
, ports
) {
1871 /* L3 admission control: drop packets that originate from an IP address
1872 * owned by the router or a broadcast address known to the router
1873 * (priority 100). */
1874 char *match
= xasprintf("ip4.src == {"IP_FMT
", "IP_FMT
"}",
1875 IP_ARGS(op
->ip
), IP_ARGS(op
->bcast
));
1876 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_IP_INPUT
, 100,
1880 /* ICMP echo reply. These flows reply to ICMP echo requests
1881 * received for the router's IP address. */
1883 "inport == %s && (ip4.dst == "IP_FMT
" || ip4.dst == "IP_FMT
") && "
1884 "icmp4.type == 8 && icmp4.code == 0",
1885 op
->json_key
, IP_ARGS(op
->ip
), IP_ARGS(op
->bcast
));
1886 char *actions
= xasprintf(
1887 "ip4.dst = ip4.src; "
1888 "ip4.src = "IP_FMT
"; "
1891 "inport = \"\"; /* Allow sending out inport. */ "
1894 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_IP_INPUT
, 90,
1899 /* ARP reply. These flows reply to ARP requests for the router's own
1902 "inport == %s && arp.tpa == "IP_FMT
" && arp.op == 1",
1903 op
->json_key
, IP_ARGS(op
->ip
));
1904 actions
= xasprintf(
1905 "eth.dst = eth.src; "
1906 "eth.src = "ETH_ADDR_FMT
"; "
1907 "arp.op = 2; /* ARP reply */ "
1908 "arp.tha = arp.sha; "
1909 "arp.sha = "ETH_ADDR_FMT
"; "
1910 "arp.tpa = arp.spa; "
1911 "arp.spa = "IP_FMT
"; "
1913 "inport = \"\"; /* Allow sending out inport. */ "
1915 ETH_ADDR_ARGS(op
->mac
),
1916 ETH_ADDR_ARGS(op
->mac
),
1919 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_IP_INPUT
, 90,
1924 /* Drop IP traffic to this router. */
1925 match
= xasprintf("ip4.dst == "IP_FMT
, IP_ARGS(op
->ip
));
1926 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_IP_INPUT
, 60,
1931 /* Logical router ingress table 2: IP Routing.
1933 * A packet that arrives at this table is an IP packet that should be
1934 * routed to the address in ip4.dst. This table sets outport to the correct
1935 * output port, eth.src to the output port's MAC address, and reg0 to the
1936 * next-hop IP address (leaving ip4.dst, the packet’s final destination,
1937 * unchanged), and advances to the next table for ARP resolution. */
1938 HMAP_FOR_EACH (op
, key_node
, ports
) {
1943 add_route(lflows
, op
, op
->network
, op
->mask
, 0);
1945 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
1950 if (od
->gateway
&& od
->gateway_port
) {
1951 add_route(lflows
, od
->gateway_port
, 0, 0, od
->gateway
);
1954 /* XXX destination unreachable */
1956 /* Local router ingress table 3: ARP Resolution.
1958 * Any packet that reaches this table is an IP packet whose next-hop IP
1959 * address is in reg0. (ip4.dst is the final destination.) This table
1960 * resolves the IP address in reg0 into an output port in outport and an
1961 * Ethernet address in eth.dst. */
1962 HMAP_FOR_EACH (op
, key_node
, ports
) {
1964 /* XXX ARP for neighboring router */
1965 } else if (op
->od
->n_router_ports
) {
1966 for (size_t i
= 0; i
< op
->nbs
->n_addresses
; i
++) {
1967 struct lport_addresses laddrs
;
1968 if (!extract_lport_addresses(op
->nbs
->addresses
[i
], &laddrs
,
1973 for (size_t k
= 0; k
< laddrs
.n_ipv4_addrs
; k
++) {
1974 ovs_be32 ip
= laddrs
.ipv4_addrs
[k
].addr
;
1975 for (size_t j
= 0; j
< op
->od
->n_router_ports
; j
++) {
1976 /* Get the Logical_Router_Port that the Logical_Port is
1977 * connected to, as 'peer'. */
1978 const char *peer_name
= smap_get(
1979 &op
->od
->router_ports
[j
]->nbs
->options
,
1985 struct ovn_port
*peer
1986 = ovn_port_find(ports
, peer_name
);
1987 if (!peer
|| !peer
->nbr
) {
1991 /* Make sure that 'ip' is in 'peer''s network. */
1992 if ((ip
^ peer
->network
) & peer
->mask
) {
1996 char *match
= xasprintf(
1997 "outport == %s && reg0 == "IP_FMT
,
1998 peer
->json_key
, IP_ARGS(ip
));
1999 char *actions
= xasprintf("eth.dst = "ETH_ADDR_FMT
"; "
2001 ETH_ADDR_ARGS(laddrs
.ea
));
2002 ovn_lflow_add(lflows
, peer
->od
,
2003 S_ROUTER_IN_ARP_RESOLVE
,
2004 100, match
, actions
);
2011 free(laddrs
.ipv4_addrs
);
2015 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
2020 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_ARP_RESOLVE
, 0, "1",
2021 "get_arp(outport, reg0); next;");
2024 /* Local router ingress table 4: ARP request.
2026 * In the common case where the Ethernet destination has been resolved,
2027 * this table outputs the packet (priority 100). Otherwise, it composes
2028 * and sends an ARP request (priority 0). */
2029 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
2034 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_ARP_REQUEST
, 100,
2035 "eth.dst == 00:00:00:00:00:00",
2037 "eth.dst = ff:ff:ff:ff:ff:ff; "
2039 "arp.op = 1; " /* ARP request */
2042 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_ARP_REQUEST
, 0, "1", "output;");
2045 /* Logical router egress table 0: Delivery (priority 100).
2047 * Priority 100 rules deliver packets to enabled logical ports. */
2048 HMAP_FOR_EACH (op
, key_node
, ports
) {
2053 if (!lrport_is_enabled(op
->nbr
)) {
2054 /* Drop packets to disabled logical ports (since logical flow
2055 * tables are default-drop). */
2059 char *match
= xasprintf("outport == %s", op
->json_key
);
2060 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_OUT_DELIVERY
, 100,
2066 /* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
2067 * constructing their contents based on the OVN_NB database. */
2069 build_lflows(struct northd_context
*ctx
, struct hmap
*datapaths
,
2072 struct hmap lflows
= HMAP_INITIALIZER(&lflows
);
2073 struct hmap mcgroups
= HMAP_INITIALIZER(&mcgroups
);
2075 build_lswitch_flows(datapaths
, ports
, &lflows
, &mcgroups
);
2076 build_lrouter_flows(datapaths
, ports
, &lflows
);
2078 /* Push changes to the Logical_Flow table to database. */
2079 const struct sbrec_logical_flow
*sbflow
, *next_sbflow
;
2080 SBREC_LOGICAL_FLOW_FOR_EACH_SAFE (sbflow
, next_sbflow
, ctx
->ovnsb_idl
) {
2081 struct ovn_datapath
*od
2082 = ovn_datapath_from_sbrec(datapaths
, sbflow
->logical_datapath
);
2084 sbrec_logical_flow_delete(sbflow
);
2088 enum ovn_datapath_type dp_type
= od
->nbs
? DP_SWITCH
: DP_ROUTER
;
2089 enum ovn_pipeline pipeline
2090 = !strcmp(sbflow
->pipeline
, "ingress") ? P_IN
: P_OUT
;
2091 struct ovn_lflow
*lflow
= ovn_lflow_find(
2092 &lflows
, od
, ovn_stage_build(dp_type
, pipeline
, sbflow
->table_id
),
2093 sbflow
->priority
, sbflow
->match
, sbflow
->actions
);
2095 ovn_lflow_destroy(&lflows
, lflow
);
2097 sbrec_logical_flow_delete(sbflow
);
2100 struct ovn_lflow
*lflow
, *next_lflow
;
2101 HMAP_FOR_EACH_SAFE (lflow
, next_lflow
, hmap_node
, &lflows
) {
2102 enum ovn_pipeline pipeline
= ovn_stage_get_pipeline(lflow
->stage
);
2103 uint8_t table
= ovn_stage_get_table(lflow
->stage
);
2105 sbflow
= sbrec_logical_flow_insert(ctx
->ovnsb_txn
);
2106 sbrec_logical_flow_set_logical_datapath(sbflow
, lflow
->od
->sb
);
2107 sbrec_logical_flow_set_pipeline(
2108 sbflow
, pipeline
== P_IN
? "ingress" : "egress");
2109 sbrec_logical_flow_set_table_id(sbflow
, table
);
2110 sbrec_logical_flow_set_priority(sbflow
, lflow
->priority
);
2111 sbrec_logical_flow_set_match(sbflow
, lflow
->match
);
2112 sbrec_logical_flow_set_actions(sbflow
, lflow
->actions
);
2114 const struct smap ids
= SMAP_CONST1(&ids
, "stage-name",
2115 ovn_stage_to_str(lflow
->stage
));
2116 sbrec_logical_flow_set_external_ids(sbflow
, &ids
);
2118 ovn_lflow_destroy(&lflows
, lflow
);
2120 hmap_destroy(&lflows
);
2122 /* Push changes to the Multicast_Group table to database. */
2123 const struct sbrec_multicast_group
*sbmc
, *next_sbmc
;
2124 SBREC_MULTICAST_GROUP_FOR_EACH_SAFE (sbmc
, next_sbmc
, ctx
->ovnsb_idl
) {
2125 struct ovn_datapath
*od
= ovn_datapath_from_sbrec(datapaths
,
2128 sbrec_multicast_group_delete(sbmc
);
2132 struct multicast_group group
= { .name
= sbmc
->name
,
2133 .key
= sbmc
->tunnel_key
};
2134 struct ovn_multicast
*mc
= ovn_multicast_find(&mcgroups
, od
, &group
);
2136 ovn_multicast_update_sbrec(mc
, sbmc
);
2137 ovn_multicast_destroy(&mcgroups
, mc
);
2139 sbrec_multicast_group_delete(sbmc
);
2142 struct ovn_multicast
*mc
, *next_mc
;
2143 HMAP_FOR_EACH_SAFE (mc
, next_mc
, hmap_node
, &mcgroups
) {
2144 sbmc
= sbrec_multicast_group_insert(ctx
->ovnsb_txn
);
2145 sbrec_multicast_group_set_datapath(sbmc
, mc
->datapath
->sb
);
2146 sbrec_multicast_group_set_name(sbmc
, mc
->group
->name
);
2147 sbrec_multicast_group_set_tunnel_key(sbmc
, mc
->group
->key
);
2148 ovn_multicast_update_sbrec(mc
, sbmc
);
2149 ovn_multicast_destroy(&mcgroups
, mc
);
2151 hmap_destroy(&mcgroups
);
2155 ovnnb_db_run(struct northd_context
*ctx
)
2157 if (!ctx
->ovnsb_txn
) {
2160 VLOG_DBG("ovn-nb db contents may have changed.");
2161 struct hmap datapaths
, ports
;
2162 build_datapaths(ctx
, &datapaths
);
2163 build_ports(ctx
, &datapaths
, &ports
);
2164 build_lflows(ctx
, &datapaths
, &ports
);
2166 struct ovn_datapath
*dp
, *next_dp
;
2167 HMAP_FOR_EACH_SAFE (dp
, next_dp
, key_node
, &datapaths
) {
2168 ovn_datapath_destroy(&datapaths
, dp
);
2170 hmap_destroy(&datapaths
);
2172 struct ovn_port
*port
, *next_port
;
2173 HMAP_FOR_EACH_SAFE (port
, next_port
, key_node
, &ports
) {
2174 ovn_port_destroy(&ports
, port
);
2176 hmap_destroy(&ports
);
2180 * The only change we get notified about is if the 'chassis' column of the
2181 * 'Port_Binding' table changes. When this column is not empty, it means we
2182 * need to set the corresponding logical port as 'up' in the northbound DB.
2185 ovnsb_db_run(struct northd_context
*ctx
)
2187 if (!ctx
->ovnnb_txn
) {
2190 struct hmap lports_hmap
;
2191 const struct sbrec_port_binding
*sb
;
2192 const struct nbrec_logical_port
*nb
;
2194 struct lport_hash_node
{
2195 struct hmap_node node
;
2196 const struct nbrec_logical_port
*nb
;
2197 } *hash_node
, *hash_node_next
;
2199 VLOG_DBG("Recalculating port up states for ovn-nb db.");
2201 hmap_init(&lports_hmap
);
2203 NBREC_LOGICAL_PORT_FOR_EACH(nb
, ctx
->ovnnb_idl
) {
2204 hash_node
= xzalloc(sizeof *hash_node
);
2206 hmap_insert(&lports_hmap
, &hash_node
->node
, hash_string(nb
->name
, 0));
2209 SBREC_PORT_BINDING_FOR_EACH(sb
, ctx
->ovnsb_idl
) {
2211 HMAP_FOR_EACH_WITH_HASH(hash_node
, node
,
2212 hash_string(sb
->logical_port
, 0),
2214 if (!strcmp(sb
->logical_port
, hash_node
->nb
->name
)) {
2221 /* The logical port doesn't exist for this port binding. This can
2222 * happen under normal circumstances when ovn-northd hasn't gotten
2223 * around to pruning the Port_Binding yet. */
2227 if (sb
->chassis
&& (!nb
->up
|| !*nb
->up
)) {
2229 nbrec_logical_port_set_up(nb
, &up
, 1);
2230 } else if (!sb
->chassis
&& (!nb
->up
|| *nb
->up
)) {
2232 nbrec_logical_port_set_up(nb
, &up
, 1);
2236 HMAP_FOR_EACH_SAFE(hash_node
, hash_node_next
, node
, &lports_hmap
) {
2237 hmap_remove(&lports_hmap
, &hash_node
->node
);
2240 hmap_destroy(&lports_hmap
);
2244 static char *default_db_
;
2250 default_db_
= xasprintf("unix:%s/db.sock", ovs_rundir());
2256 parse_options(int argc OVS_UNUSED
, char *argv
[] OVS_UNUSED
)
2259 DAEMON_OPTION_ENUMS
,
2262 static const struct option long_options
[] = {
2263 {"ovnsb-db", required_argument
, NULL
, 'd'},
2264 {"ovnnb-db", required_argument
, NULL
, 'D'},
2265 {"help", no_argument
, NULL
, 'h'},
2266 {"options", no_argument
, NULL
, 'o'},
2267 {"version", no_argument
, NULL
, 'V'},
2268 DAEMON_LONG_OPTIONS
,
2270 STREAM_SSL_LONG_OPTIONS
,
2273 char *short_options
= ovs_cmdl_long_options_to_short_options(long_options
);
2278 c
= getopt_long(argc
, argv
, short_options
, long_options
, NULL
);
2284 DAEMON_OPTION_HANDLERS
;
2285 VLOG_OPTION_HANDLERS
;
2286 STREAM_SSL_OPTION_HANDLERS
;
2301 ovs_cmdl_print_options(long_options
);
2305 ovs_print_version(0, 0);
2314 ovnsb_db
= default_db();
2318 ovnnb_db
= default_db();
2321 free(short_options
);
2325 add_column_noalert(struct ovsdb_idl
*idl
,
2326 const struct ovsdb_idl_column
*column
)
2328 ovsdb_idl_add_column(idl
, column
);
2329 ovsdb_idl_omit_alert(idl
, column
);
2333 main(int argc
, char *argv
[])
2335 int res
= EXIT_SUCCESS
;
2336 struct unixctl_server
*unixctl
;
2340 fatal_ignore_sigpipe();
2341 set_program_name(argv
[0]);
2342 service_start(&argc
, &argv
);
2343 parse_options(argc
, argv
);
2345 daemonize_start(false);
2347 retval
= unixctl_server_create(NULL
, &unixctl
);
2351 unixctl_command_register("exit", "", 0, 0, ovn_northd_exit
, &exiting
);
2353 daemonize_complete();
2358 /* We want to detect all changes to the ovn-nb db. */
2359 struct ovsdb_idl_loop ovnnb_idl_loop
= OVSDB_IDL_LOOP_INITIALIZER(
2360 ovsdb_idl_create(ovnnb_db
, &nbrec_idl_class
, true, true));
2362 struct ovsdb_idl_loop ovnsb_idl_loop
= OVSDB_IDL_LOOP_INITIALIZER(
2363 ovsdb_idl_create(ovnsb_db
, &sbrec_idl_class
, false, true));
2365 ovsdb_idl_add_table(ovnsb_idl_loop
.idl
, &sbrec_table_logical_flow
);
2366 add_column_noalert(ovnsb_idl_loop
.idl
,
2367 &sbrec_logical_flow_col_logical_datapath
);
2368 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_logical_flow_col_pipeline
);
2369 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_logical_flow_col_table_id
);
2370 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_logical_flow_col_priority
);
2371 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_logical_flow_col_match
);
2372 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_logical_flow_col_actions
);
2374 ovsdb_idl_add_table(ovnsb_idl_loop
.idl
, &sbrec_table_multicast_group
);
2375 add_column_noalert(ovnsb_idl_loop
.idl
,
2376 &sbrec_multicast_group_col_datapath
);
2377 add_column_noalert(ovnsb_idl_loop
.idl
,
2378 &sbrec_multicast_group_col_tunnel_key
);
2379 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_multicast_group_col_name
);
2380 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_multicast_group_col_ports
);
2382 ovsdb_idl_add_table(ovnsb_idl_loop
.idl
, &sbrec_table_datapath_binding
);
2383 add_column_noalert(ovnsb_idl_loop
.idl
,
2384 &sbrec_datapath_binding_col_tunnel_key
);
2385 add_column_noalert(ovnsb_idl_loop
.idl
,
2386 &sbrec_datapath_binding_col_external_ids
);
2388 ovsdb_idl_add_table(ovnsb_idl_loop
.idl
, &sbrec_table_port_binding
);
2389 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_port_binding_col_datapath
);
2390 add_column_noalert(ovnsb_idl_loop
.idl
,
2391 &sbrec_port_binding_col_logical_port
);
2392 add_column_noalert(ovnsb_idl_loop
.idl
,
2393 &sbrec_port_binding_col_tunnel_key
);
2394 add_column_noalert(ovnsb_idl_loop
.idl
,
2395 &sbrec_port_binding_col_parent_port
);
2396 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_port_binding_col_tag
);
2397 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_port_binding_col_type
);
2398 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_port_binding_col_options
);
2399 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_port_binding_col_mac
);
2400 ovsdb_idl_add_column(ovnsb_idl_loop
.idl
, &sbrec_port_binding_col_chassis
);
2405 struct northd_context ctx
= {
2406 .ovnnb_idl
= ovnnb_idl_loop
.idl
,
2407 .ovnnb_txn
= ovsdb_idl_loop_run(&ovnnb_idl_loop
),
2408 .ovnsb_idl
= ovnsb_idl_loop
.idl
,
2409 .ovnsb_txn
= ovsdb_idl_loop_run(&ovnsb_idl_loop
),
2415 unixctl_server_run(unixctl
);
2416 unixctl_server_wait(unixctl
);
2418 poll_immediate_wake();
2420 ovsdb_idl_loop_commit_and_wait(&ovnnb_idl_loop
);
2421 ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop
);
2424 if (should_service_stop()) {
2429 unixctl_server_destroy(unixctl
);
2430 ovsdb_idl_loop_destroy(&ovnnb_idl_loop
);
2431 ovsdb_idl_loop_destroy(&ovnsb_idl_loop
);
2439 ovn_northd_exit(struct unixctl_conn
*conn
, int argc OVS_UNUSED
,
2440 const char *argv
[] OVS_UNUSED
, void *exiting_
)
2442 bool *exiting
= exiting_
;
2445 unixctl_command_reply(conn
, NULL
);