2 * Licensed under the Apache License, Version 2.0 (the "License");
3 * you may not use this file except in compliance with the License.
4 * You may obtain a copy of the License at:
6 * http://www.apache.org/licenses/LICENSE-2.0
8 * Unless required by applicable law or agreed to in writing, software
9 * distributed under the License is distributed on an "AS IS" BASIS,
10 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 * See the License for the specific language governing permissions and
12 * limitations under the License.
22 #include "command-line.h"
25 #include "openvswitch/dynamic-string.h"
26 #include "fatal-signal.h"
28 #include "openvswitch/hmap.h"
29 #include "openvswitch/json.h"
31 #include "ovn/lib/chassis-index.h"
32 #include "ovn/lib/logical-fields.h"
33 #include "ovn/lib/ovn-l7.h"
34 #include "ovn/lib/ovn-nb-idl.h"
35 #include "ovn/lib/ovn-sb-idl.h"
36 #include "ovn/lib/ovn-util.h"
37 #include "ovn/actions.h"
39 #include "openvswitch/poll-loop.h"
43 #include "stream-ssl.h"
47 #include "openvswitch/vlog.h"
49 VLOG_DEFINE_THIS_MODULE(ovn_northd
);
51 static unixctl_cb_func ovn_northd_exit
;
53 struct northd_context
{
54 struct ovsdb_idl
*ovnnb_idl
;
55 struct ovsdb_idl
*ovnsb_idl
;
56 struct ovsdb_idl_txn
*ovnnb_txn
;
57 struct ovsdb_idl_txn
*ovnsb_txn
;
60 static const char *ovnnb_db
;
61 static const char *ovnsb_db
;
62 static const char *unixctl_path
;
64 #define MAC_ADDR_PREFIX 0x0A0000000000ULL
65 #define MAC_ADDR_SPACE 0xffffff
67 /* MAC address management (macam) table of "struct eth_addr"s, that holds the
68 * MAC addresses allocated by the OVN ipam module. */
69 static struct hmap macam
= HMAP_INITIALIZER(&macam
);
71 #define MAX_OVN_TAGS 4096
73 /* Pipeline stages. */
75 /* The two pipelines in an OVN logical flow table. */
77 P_IN
, /* Ingress pipeline. */
78 P_OUT
/* Egress pipeline. */
81 /* The two purposes for which ovn-northd uses OVN logical datapaths. */
82 enum ovn_datapath_type
{
83 DP_SWITCH
, /* OVN logical switch. */
84 DP_ROUTER
/* OVN logical router. */
87 /* Returns an "enum ovn_stage" built from the arguments.
89 * (It's better to use ovn_stage_build() for type-safety reasons, but inline
90 * functions can't be used in enums or switch cases.) */
91 #define OVN_STAGE_BUILD(DP_TYPE, PIPELINE, TABLE) \
92 (((DP_TYPE) << 9) | ((PIPELINE) << 8) | (TABLE))
94 /* A stage within an OVN logical switch or router.
96 * An "enum ovn_stage" indicates whether the stage is part of a logical switch
97 * or router, whether the stage is part of the ingress or egress pipeline, and
98 * the table within that pipeline. The first three components are combined to
99 * form the stage's full name, e.g. S_SWITCH_IN_PORT_SEC_L2,
100 * S_ROUTER_OUT_DELIVERY. */
102 #define PIPELINE_STAGES \
103 /* Logical switch ingress stages. */ \
104 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_L2, 0, "ls_in_port_sec_l2") \
105 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_IP, 1, "ls_in_port_sec_ip") \
106 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_ND, 2, "ls_in_port_sec_nd") \
107 PIPELINE_STAGE(SWITCH, IN, PRE_ACL, 3, "ls_in_pre_acl") \
108 PIPELINE_STAGE(SWITCH, IN, PRE_LB, 4, "ls_in_pre_lb") \
109 PIPELINE_STAGE(SWITCH, IN, PRE_STATEFUL, 5, "ls_in_pre_stateful") \
110 PIPELINE_STAGE(SWITCH, IN, ACL, 6, "ls_in_acl") \
111 PIPELINE_STAGE(SWITCH, IN, QOS_MARK, 7, "ls_in_qos_mark") \
112 PIPELINE_STAGE(SWITCH, IN, QOS_METER, 8, "ls_in_qos_meter") \
113 PIPELINE_STAGE(SWITCH, IN, LB, 9, "ls_in_lb") \
114 PIPELINE_STAGE(SWITCH, IN, STATEFUL, 10, "ls_in_stateful") \
115 PIPELINE_STAGE(SWITCH, IN, ARP_ND_RSP, 11, "ls_in_arp_rsp") \
116 PIPELINE_STAGE(SWITCH, IN, DHCP_OPTIONS, 12, "ls_in_dhcp_options") \
117 PIPELINE_STAGE(SWITCH, IN, DHCP_RESPONSE, 13, "ls_in_dhcp_response") \
118 PIPELINE_STAGE(SWITCH, IN, DNS_LOOKUP, 14, "ls_in_dns_lookup") \
119 PIPELINE_STAGE(SWITCH, IN, DNS_RESPONSE, 15, "ls_in_dns_response") \
120 PIPELINE_STAGE(SWITCH, IN, L2_LKUP, 16, "ls_in_l2_lkup") \
122 /* Logical switch egress stages. */ \
123 PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 0, "ls_out_pre_lb") \
124 PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 1, "ls_out_pre_acl") \
125 PIPELINE_STAGE(SWITCH, OUT, PRE_STATEFUL, 2, "ls_out_pre_stateful") \
126 PIPELINE_STAGE(SWITCH, OUT, LB, 3, "ls_out_lb") \
127 PIPELINE_STAGE(SWITCH, OUT, ACL, 4, "ls_out_acl") \
128 PIPELINE_STAGE(SWITCH, OUT, QOS_MARK, 5, "ls_out_qos_mark") \
129 PIPELINE_STAGE(SWITCH, OUT, QOS_METER, 6, "ls_out_qos_meter") \
130 PIPELINE_STAGE(SWITCH, OUT, STATEFUL, 7, "ls_out_stateful") \
131 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_IP, 8, "ls_out_port_sec_ip") \
132 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_L2, 9, "ls_out_port_sec_l2") \
134 /* Logical router ingress stages. */ \
135 PIPELINE_STAGE(ROUTER, IN, ADMISSION, 0, "lr_in_admission") \
136 PIPELINE_STAGE(ROUTER, IN, IP_INPUT, 1, "lr_in_ip_input") \
137 PIPELINE_STAGE(ROUTER, IN, DEFRAG, 2, "lr_in_defrag") \
138 PIPELINE_STAGE(ROUTER, IN, UNSNAT, 3, "lr_in_unsnat") \
139 PIPELINE_STAGE(ROUTER, IN, DNAT, 4, "lr_in_dnat") \
140 PIPELINE_STAGE(ROUTER, IN, ND_RA_OPTIONS, 5, "lr_in_nd_ra_options") \
141 PIPELINE_STAGE(ROUTER, IN, ND_RA_RESPONSE, 6, "lr_in_nd_ra_response") \
142 PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 7, "lr_in_ip_routing") \
143 PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 8, "lr_in_arp_resolve") \
144 PIPELINE_STAGE(ROUTER, IN, GW_REDIRECT, 9, "lr_in_gw_redirect") \
145 PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 10, "lr_in_arp_request") \
147 /* Logical router egress stages. */ \
148 PIPELINE_STAGE(ROUTER, OUT, UNDNAT, 0, "lr_out_undnat") \
149 PIPELINE_STAGE(ROUTER, OUT, SNAT, 1, "lr_out_snat") \
150 PIPELINE_STAGE(ROUTER, OUT, EGR_LOOP, 2, "lr_out_egr_loop") \
151 PIPELINE_STAGE(ROUTER, OUT, DELIVERY, 3, "lr_out_delivery")
153 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
154 S_##DP_TYPE##_##PIPELINE##_##STAGE \
155 = OVN_STAGE_BUILD(DP_##DP_TYPE, P_##PIPELINE, TABLE),
157 #undef PIPELINE_STAGE
160 /* Due to various hard-coded priorities need to implement ACLs, the
161 * northbound database supports a smaller range of ACL priorities than
162 * are available to logical flows. This value is added to an ACL
163 * priority to determine the ACL's logical flow priority. */
164 #define OVN_ACL_PRI_OFFSET 1000
166 /* Register definitions specific to switches. */
167 #define REGBIT_CONNTRACK_DEFRAG "reg0[0]"
168 #define REGBIT_CONNTRACK_COMMIT "reg0[1]"
169 #define REGBIT_CONNTRACK_NAT "reg0[2]"
170 #define REGBIT_DHCP_OPTS_RESULT "reg0[3]"
171 #define REGBIT_DNS_LOOKUP_RESULT "reg0[4]"
172 #define REGBIT_ND_RA_OPTS_RESULT "reg0[5]"
174 /* Register definitions for switches and routers. */
175 #define REGBIT_NAT_REDIRECT "reg9[0]"
176 /* Indicate that this packet has been recirculated using egress
177 * loopback. This allows certain checks to be bypassed, such as a
178 * logical router dropping packets with source IP address equals
179 * one of the logical router's own IP addresses. */
180 #define REGBIT_EGRESS_LOOPBACK "reg9[1]"
182 /* Returns an "enum ovn_stage" built from the arguments. */
183 static enum ovn_stage
184 ovn_stage_build(enum ovn_datapath_type dp_type
, enum ovn_pipeline pipeline
,
187 return OVN_STAGE_BUILD(dp_type
, pipeline
, table
);
190 /* Returns the pipeline to which 'stage' belongs. */
191 static enum ovn_pipeline
192 ovn_stage_get_pipeline(enum ovn_stage stage
)
194 return (stage
>> 8) & 1;
197 /* Returns the pipeline name to which 'stage' belongs. */
199 ovn_stage_get_pipeline_name(enum ovn_stage stage
)
201 return ovn_stage_get_pipeline(stage
) == P_IN
? "ingress" : "egress";
204 /* Returns the table to which 'stage' belongs. */
206 ovn_stage_get_table(enum ovn_stage stage
)
211 /* Returns a string name for 'stage'. */
213 ovn_stage_to_str(enum ovn_stage stage
)
216 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
217 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return NAME;
219 #undef PIPELINE_STAGE
220 default: return "<unknown>";
224 /* Returns the type of the datapath to which a flow with the given 'stage' may
226 static enum ovn_datapath_type
227 ovn_stage_to_datapath_type(enum ovn_stage stage
)
230 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
231 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return DP_##DP_TYPE;
233 #undef PIPELINE_STAGE
234 default: OVS_NOT_REACHED();
242 %s: OVN northbound management daemon\n\
243 usage: %s [OPTIONS]\n\
246 --ovnnb-db=DATABASE connect to ovn-nb database at DATABASE\n\
248 --ovnsb-db=DATABASE connect to ovn-sb database at DATABASE\n\
250 --unixctl=SOCKET override default control socket name\n\
251 -h, --help display this help message\n\
252 -o, --options list available options\n\
253 -V, --version display version information\n\
254 ", program_name
, program_name
, default_nb_db(), default_sb_db());
257 stream_usage("database", true, true, false);
261 struct hmap_node hmap_node
;
266 destroy_tnlids(struct hmap
*tnlids
)
268 struct tnlid_node
*node
;
269 HMAP_FOR_EACH_POP (node
, hmap_node
, tnlids
) {
272 hmap_destroy(tnlids
);
276 add_tnlid(struct hmap
*set
, uint32_t tnlid
)
278 struct tnlid_node
*node
= xmalloc(sizeof *node
);
279 hmap_insert(set
, &node
->hmap_node
, hash_int(tnlid
, 0));
284 tnlid_in_use(const struct hmap
*set
, uint32_t tnlid
)
286 const struct tnlid_node
*node
;
287 HMAP_FOR_EACH_IN_BUCKET (node
, hmap_node
, hash_int(tnlid
, 0), set
) {
288 if (node
->tnlid
== tnlid
) {
296 next_tnlid(uint32_t tnlid
, uint32_t max
)
298 return tnlid
+ 1 <= max
? tnlid
+ 1 : 1;
302 allocate_tnlid(struct hmap
*set
, const char *name
, uint32_t max
,
305 for (uint32_t tnlid
= next_tnlid(*hint
, max
); tnlid
!= *hint
;
306 tnlid
= next_tnlid(tnlid
, max
)) {
307 if (!tnlid_in_use(set
, tnlid
)) {
308 add_tnlid(set
, tnlid
);
314 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
315 VLOG_WARN_RL(&rl
, "all %s tunnel ids exhausted", name
);
319 struct ovn_chassis_qdisc_queues
{
320 struct hmap_node key_node
;
322 struct uuid chassis_uuid
;
326 destroy_chassis_queues(struct hmap
*set
)
328 struct ovn_chassis_qdisc_queues
*node
;
329 HMAP_FOR_EACH_POP (node
, key_node
, set
) {
336 add_chassis_queue(struct hmap
*set
, struct uuid
*chassis_uuid
,
339 struct ovn_chassis_qdisc_queues
*node
= xmalloc(sizeof *node
);
340 node
->queue_id
= queue_id
;
341 memcpy(&node
->chassis_uuid
, chassis_uuid
, sizeof node
->chassis_uuid
);
342 hmap_insert(set
, &node
->key_node
, uuid_hash(chassis_uuid
));
346 chassis_queueid_in_use(const struct hmap
*set
, struct uuid
*chassis_uuid
,
349 const struct ovn_chassis_qdisc_queues
*node
;
350 HMAP_FOR_EACH_WITH_HASH (node
, key_node
, uuid_hash(chassis_uuid
), set
) {
351 if (uuid_equals(chassis_uuid
, &node
->chassis_uuid
)
352 && node
->queue_id
== queue_id
) {
360 allocate_chassis_queueid(struct hmap
*set
, struct sbrec_chassis
*chassis
)
362 for (uint32_t queue_id
= QDISC_MIN_QUEUE_ID
+ 1;
363 queue_id
<= QDISC_MAX_QUEUE_ID
;
365 if (!chassis_queueid_in_use(set
, &chassis
->header_
.uuid
, queue_id
)) {
366 add_chassis_queue(set
, &chassis
->header_
.uuid
, queue_id
);
371 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
372 VLOG_WARN_RL(&rl
, "all %s queue ids exhausted", chassis
->name
);
377 free_chassis_queueid(struct hmap
*set
, struct sbrec_chassis
*chassis
,
380 struct ovn_chassis_qdisc_queues
*node
;
381 HMAP_FOR_EACH_WITH_HASH (node
, key_node
,
382 uuid_hash(&chassis
->header_
.uuid
),
384 if (uuid_equals(&chassis
->header_
.uuid
, &node
->chassis_uuid
)
385 && node
->queue_id
== queue_id
) {
386 hmap_remove(set
, &node
->key_node
);
393 port_has_qos_params(const struct smap
*opts
)
395 return (smap_get(opts
, "qos_max_rate") ||
396 smap_get(opts
, "qos_burst"));
403 unsigned long *allocated_ipv4s
; /* A bitmap of allocated IPv4s */
404 bool ipv6_prefix_set
;
405 struct in6_addr ipv6_prefix
;
408 /* The 'key' comes from nbs->header_.uuid or nbr->header_.uuid or
409 * sb->external_ids:logical-switch. */
410 struct ovn_datapath
{
411 struct hmap_node key_node
; /* Index on 'key'. */
412 struct uuid key
; /* (nbs/nbr)->header_.uuid. */
414 const struct nbrec_logical_switch
*nbs
; /* May be NULL. */
415 const struct nbrec_logical_router
*nbr
; /* May be NULL. */
416 const struct sbrec_datapath_binding
*sb
; /* May be NULL. */
418 struct ovs_list list
; /* In list of similar records. */
420 /* Logical switch data. */
421 struct ovn_port
**router_ports
;
422 size_t n_router_ports
;
424 struct hmap port_tnlids
;
425 uint32_t port_key_hint
;
430 struct ipam_info ipam_info
;
432 /* OVN northd only needs to know about the logical router gateway port for
433 * NAT on a distributed router. This "distributed gateway port" is
434 * populated only when there is a "redirect-chassis" specified for one of
435 * the ports on the logical router. Otherwise this will be NULL. */
436 struct ovn_port
*l3dgw_port
;
437 /* The "derived" OVN port representing the instance of l3dgw_port on
438 * the "redirect-chassis". */
439 struct ovn_port
*l3redirect_port
;
440 struct ovn_port
*localnet_port
;
444 struct hmap_node hmap_node
;
445 struct eth_addr mac_addr
; /* Allocated MAC address. */
449 cleanup_macam(struct hmap
*macam_
)
451 struct macam_node
*node
;
452 HMAP_FOR_EACH_POP (node
, hmap_node
, macam_
) {
457 static struct ovn_datapath
*
458 ovn_datapath_create(struct hmap
*datapaths
, const struct uuid
*key
,
459 const struct nbrec_logical_switch
*nbs
,
460 const struct nbrec_logical_router
*nbr
,
461 const struct sbrec_datapath_binding
*sb
)
463 struct ovn_datapath
*od
= xzalloc(sizeof *od
);
468 hmap_init(&od
->port_tnlids
);
469 od
->port_key_hint
= 0;
470 hmap_insert(datapaths
, &od
->key_node
, uuid_hash(&od
->key
));
475 ovn_datapath_destroy(struct hmap
*datapaths
, struct ovn_datapath
*od
)
478 /* Don't remove od->list. It is used within build_datapaths() as a
479 * private list and once we've exited that function it is not safe to
481 hmap_remove(datapaths
, &od
->key_node
);
482 destroy_tnlids(&od
->port_tnlids
);
483 bitmap_free(od
->ipam_info
.allocated_ipv4s
);
484 free(od
->router_ports
);
489 /* Returns 'od''s datapath type. */
490 static enum ovn_datapath_type
491 ovn_datapath_get_type(const struct ovn_datapath
*od
)
493 return od
->nbs
? DP_SWITCH
: DP_ROUTER
;
496 static struct ovn_datapath
*
497 ovn_datapath_find(struct hmap
*datapaths
, const struct uuid
*uuid
)
499 struct ovn_datapath
*od
;
501 HMAP_FOR_EACH_WITH_HASH (od
, key_node
, uuid_hash(uuid
), datapaths
) {
502 if (uuid_equals(uuid
, &od
->key
)) {
509 static struct ovn_datapath
*
510 ovn_datapath_from_sbrec(struct hmap
*datapaths
,
511 const struct sbrec_datapath_binding
*sb
)
515 if (!smap_get_uuid(&sb
->external_ids
, "logical-switch", &key
) &&
516 !smap_get_uuid(&sb
->external_ids
, "logical-router", &key
)) {
519 return ovn_datapath_find(datapaths
, &key
);
523 lrouter_is_enabled(const struct nbrec_logical_router
*lrouter
)
525 return !lrouter
->enabled
|| *lrouter
->enabled
;
529 init_ipam_info_for_datapath(struct ovn_datapath
*od
)
535 const char *subnet_str
= smap_get(&od
->nbs
->other_config
, "subnet");
536 const char *ipv6_prefix
= smap_get(&od
->nbs
->other_config
, "ipv6_prefix");
539 od
->ipam_info
.ipv6_prefix_set
= ipv6_parse(
540 ipv6_prefix
, &od
->ipam_info
.ipv6_prefix
);
547 ovs_be32 subnet
, mask
;
548 char *error
= ip_parse_masked(subnet_str
, &subnet
, &mask
);
549 if (error
|| mask
== OVS_BE32_MAX
|| !ip_is_cidr(mask
)) {
550 static struct vlog_rate_limit rl
551 = VLOG_RATE_LIMIT_INIT(5, 1);
552 VLOG_WARN_RL(&rl
, "bad 'subnet' %s", subnet_str
);
557 od
->ipam_info
.start_ipv4
= ntohl(subnet
) + 1;
558 od
->ipam_info
.total_ipv4s
= ~ntohl(mask
);
559 od
->ipam_info
.allocated_ipv4s
=
560 bitmap_allocate(od
->ipam_info
.total_ipv4s
);
562 /* Mark first IP as taken */
563 bitmap_set1(od
->ipam_info
.allocated_ipv4s
, 0);
565 /* Check if there are any reserver IPs (list) to be excluded from IPAM */
566 const char *exclude_ip_list
= smap_get(&od
->nbs
->other_config
,
568 if (!exclude_ip_list
) {
573 lexer_init(&lexer
, exclude_ip_list
);
574 /* exclude_ip_list could be in the format -
575 * "10.0.0.4 10.0.0.10 10.0.0.20..10.0.0.50 10.0.0.100..10.0.0.110".
578 while (lexer
.token
.type
!= LEX_T_END
) {
579 if (lexer
.token
.type
!= LEX_T_INTEGER
) {
580 lexer_syntax_error(&lexer
, "expecting address");
583 uint32_t start
= ntohl(lexer
.token
.value
.ipv4
);
586 uint32_t end
= start
+ 1;
587 if (lexer_match(&lexer
, LEX_T_ELLIPSIS
)) {
588 if (lexer
.token
.type
!= LEX_T_INTEGER
) {
589 lexer_syntax_error(&lexer
, "expecting address range");
592 end
= ntohl(lexer
.token
.value
.ipv4
) + 1;
596 /* Clamp start...end to fit the subnet. */
597 start
= MAX(od
->ipam_info
.start_ipv4
, start
);
598 end
= MIN(od
->ipam_info
.start_ipv4
+ od
->ipam_info
.total_ipv4s
, end
);
600 bitmap_set_multiple(od
->ipam_info
.allocated_ipv4s
,
601 start
- od
->ipam_info
.start_ipv4
,
604 lexer_error(&lexer
, "excluded addresses not in subnet");
608 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
609 VLOG_WARN_RL(&rl
, "logical switch "UUID_FMT
": bad exclude_ips (%s)",
610 UUID_ARGS(&od
->key
), lexer
.error
);
612 lexer_destroy(&lexer
);
616 ovn_datapath_update_external_ids(struct ovn_datapath
*od
)
618 /* Get the logical-switch or logical-router UUID to set in
620 char uuid_s
[UUID_LEN
+ 1];
621 sprintf(uuid_s
, UUID_FMT
, UUID_ARGS(&od
->key
));
622 const char *key
= od
->nbs
? "logical-switch" : "logical-router";
624 /* Get names to set in external-ids. */
625 const char *name
= od
->nbs
? od
->nbs
->name
: od
->nbr
->name
;
626 const char *name2
= (od
->nbs
627 ? smap_get(&od
->nbs
->external_ids
,
628 "neutron:network_name")
629 : smap_get(&od
->nbr
->external_ids
,
630 "neutron:router_name"));
632 /* Set external-ids. */
633 struct smap ids
= SMAP_INITIALIZER(&ids
);
634 smap_add(&ids
, key
, uuid_s
);
635 smap_add(&ids
, "name", name
);
636 if (name2
&& name2
[0]) {
637 smap_add(&ids
, "name2", name2
);
639 sbrec_datapath_binding_set_external_ids(od
->sb
, &ids
);
644 join_datapaths(struct northd_context
*ctx
, struct hmap
*datapaths
,
645 struct ovs_list
*sb_only
, struct ovs_list
*nb_only
,
646 struct ovs_list
*both
)
648 hmap_init(datapaths
);
649 ovs_list_init(sb_only
);
650 ovs_list_init(nb_only
);
653 const struct sbrec_datapath_binding
*sb
, *sb_next
;
654 SBREC_DATAPATH_BINDING_FOR_EACH_SAFE (sb
, sb_next
, ctx
->ovnsb_idl
) {
656 if (!smap_get_uuid(&sb
->external_ids
, "logical-switch", &key
) &&
657 !smap_get_uuid(&sb
->external_ids
, "logical-router", &key
)) {
658 ovsdb_idl_txn_add_comment(
660 "deleting Datapath_Binding "UUID_FMT
" that lacks "
661 "external-ids:logical-switch and "
662 "external-ids:logical-router",
663 UUID_ARGS(&sb
->header_
.uuid
));
664 sbrec_datapath_binding_delete(sb
);
668 if (ovn_datapath_find(datapaths
, &key
)) {
669 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
671 &rl
, "deleting Datapath_Binding "UUID_FMT
" with "
672 "duplicate external-ids:logical-switch/router "UUID_FMT
,
673 UUID_ARGS(&sb
->header_
.uuid
), UUID_ARGS(&key
));
674 sbrec_datapath_binding_delete(sb
);
678 struct ovn_datapath
*od
= ovn_datapath_create(datapaths
, &key
,
680 ovs_list_push_back(sb_only
, &od
->list
);
683 const struct nbrec_logical_switch
*nbs
;
684 NBREC_LOGICAL_SWITCH_FOR_EACH (nbs
, ctx
->ovnnb_idl
) {
685 struct ovn_datapath
*od
= ovn_datapath_find(datapaths
,
689 ovs_list_remove(&od
->list
);
690 ovs_list_push_back(both
, &od
->list
);
691 ovn_datapath_update_external_ids(od
);
693 od
= ovn_datapath_create(datapaths
, &nbs
->header_
.uuid
,
695 ovs_list_push_back(nb_only
, &od
->list
);
698 init_ipam_info_for_datapath(od
);
701 const struct nbrec_logical_router
*nbr
;
702 NBREC_LOGICAL_ROUTER_FOR_EACH (nbr
, ctx
->ovnnb_idl
) {
703 if (!lrouter_is_enabled(nbr
)) {
707 struct ovn_datapath
*od
= ovn_datapath_find(datapaths
,
712 ovs_list_remove(&od
->list
);
713 ovs_list_push_back(both
, &od
->list
);
714 ovn_datapath_update_external_ids(od
);
717 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
719 "duplicate UUID "UUID_FMT
" in OVN_Northbound",
720 UUID_ARGS(&nbr
->header_
.uuid
));
724 od
= ovn_datapath_create(datapaths
, &nbr
->header_
.uuid
,
726 ovs_list_push_back(nb_only
, &od
->list
);
732 ovn_datapath_allocate_key(struct hmap
*dp_tnlids
)
734 static uint32_t hint
;
735 return allocate_tnlid(dp_tnlids
, "datapath", (1u << 24) - 1, &hint
);
738 /* Updates the southbound Datapath_Binding table so that it contains the
739 * logical switches and routers specified by the northbound database.
741 * Initializes 'datapaths' to contain a "struct ovn_datapath" for every logical
742 * switch and router. */
744 build_datapaths(struct northd_context
*ctx
, struct hmap
*datapaths
)
746 struct ovs_list sb_only
, nb_only
, both
;
748 join_datapaths(ctx
, datapaths
, &sb_only
, &nb_only
, &both
);
750 if (!ovs_list_is_empty(&nb_only
)) {
751 /* First index the in-use datapath tunnel IDs. */
752 struct hmap dp_tnlids
= HMAP_INITIALIZER(&dp_tnlids
);
753 struct ovn_datapath
*od
;
754 LIST_FOR_EACH (od
, list
, &both
) {
755 add_tnlid(&dp_tnlids
, od
->sb
->tunnel_key
);
758 /* Add southbound record for each unmatched northbound record. */
759 LIST_FOR_EACH (od
, list
, &nb_only
) {
760 uint16_t tunnel_key
= ovn_datapath_allocate_key(&dp_tnlids
);
765 od
->sb
= sbrec_datapath_binding_insert(ctx
->ovnsb_txn
);
766 ovn_datapath_update_external_ids(od
);
767 sbrec_datapath_binding_set_tunnel_key(od
->sb
, tunnel_key
);
769 destroy_tnlids(&dp_tnlids
);
772 /* Delete southbound records without northbound matches. */
773 struct ovn_datapath
*od
, *next
;
774 LIST_FOR_EACH_SAFE (od
, next
, list
, &sb_only
) {
775 ovs_list_remove(&od
->list
);
776 sbrec_datapath_binding_delete(od
->sb
);
777 ovn_datapath_destroy(datapaths
, od
);
782 struct hmap_node key_node
; /* Index on 'key'. */
783 char *key
; /* nbs->name, nbr->name, sb->logical_port. */
784 char *json_key
; /* 'key', quoted for use in JSON. */
786 const struct sbrec_port_binding
*sb
; /* May be NULL. */
788 /* Logical switch port data. */
789 const struct nbrec_logical_switch_port
*nbsp
; /* May be NULL. */
791 struct lport_addresses
*lsp_addrs
; /* Logical switch port addresses. */
792 unsigned int n_lsp_addrs
;
794 struct lport_addresses
*ps_addrs
; /* Port security addresses. */
795 unsigned int n_ps_addrs
;
797 /* Logical router port data. */
798 const struct nbrec_logical_router_port
*nbrp
; /* May be NULL. */
800 struct lport_addresses lrp_networks
;
802 bool derived
; /* Indicates whether this is an additional port
803 * derived from nbsp or nbrp. */
807 * - A switch port S of type "router" has a router port R as a peer,
808 * and R in turn has S has its peer.
810 * - Two connected logical router ports have each other as peer. */
811 struct ovn_port
*peer
;
813 struct ovn_datapath
*od
;
815 struct ovs_list list
; /* In list of similar records. */
818 static struct ovn_port
*
819 ovn_port_create(struct hmap
*ports
, const char *key
,
820 const struct nbrec_logical_switch_port
*nbsp
,
821 const struct nbrec_logical_router_port
*nbrp
,
822 const struct sbrec_port_binding
*sb
)
824 struct ovn_port
*op
= xzalloc(sizeof *op
);
826 struct ds json_key
= DS_EMPTY_INITIALIZER
;
827 json_string_escape(key
, &json_key
);
828 op
->json_key
= ds_steal_cstr(&json_key
);
830 op
->key
= xstrdup(key
);
835 hmap_insert(ports
, &op
->key_node
, hash_string(op
->key
, 0));
840 ovn_port_destroy(struct hmap
*ports
, struct ovn_port
*port
)
843 /* Don't remove port->list. It is used within build_ports() as a
844 * private list and once we've exited that function it is not safe to
846 hmap_remove(ports
, &port
->key_node
);
848 for (int i
= 0; i
< port
->n_lsp_addrs
; i
++) {
849 destroy_lport_addresses(&port
->lsp_addrs
[i
]);
851 free(port
->lsp_addrs
);
853 for (int i
= 0; i
< port
->n_ps_addrs
; i
++) {
854 destroy_lport_addresses(&port
->ps_addrs
[i
]);
856 free(port
->ps_addrs
);
858 destroy_lport_addresses(&port
->lrp_networks
);
859 free(port
->json_key
);
865 static struct ovn_port
*
866 ovn_port_find(struct hmap
*ports
, const char *name
)
870 HMAP_FOR_EACH_WITH_HASH (op
, key_node
, hash_string(name
, 0), ports
) {
871 if (!strcmp(op
->key
, name
)) {
879 ovn_port_allocate_key(struct ovn_datapath
*od
)
881 return allocate_tnlid(&od
->port_tnlids
, "port",
882 (1u << 15) - 1, &od
->port_key_hint
);
886 chassis_redirect_name(const char *port_name
)
888 return xasprintf("cr-%s", port_name
);
892 ipam_is_duplicate_mac(struct eth_addr
*ea
, uint64_t mac64
, bool warn
)
894 struct macam_node
*macam_node
;
895 HMAP_FOR_EACH_WITH_HASH (macam_node
, hmap_node
, hash_uint64(mac64
),
897 if (eth_addr_equals(*ea
, macam_node
->mac_addr
)) {
899 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
900 VLOG_WARN_RL(&rl
, "Duplicate MAC set: "ETH_ADDR_FMT
,
901 ETH_ADDR_ARGS(macam_node
->mac_addr
));
910 ipam_insert_mac(struct eth_addr
*ea
, bool check
)
916 uint64_t mac64
= eth_addr_to_uint64(*ea
);
917 /* If the new MAC was not assigned by this address management system or
918 * check is true and the new MAC is a duplicate, do not insert it into the
920 if (((mac64
^ MAC_ADDR_PREFIX
) >> 24)
921 || (check
&& ipam_is_duplicate_mac(ea
, mac64
, true))) {
925 struct macam_node
*new_macam_node
= xmalloc(sizeof *new_macam_node
);
926 new_macam_node
->mac_addr
= *ea
;
927 hmap_insert(&macam
, &new_macam_node
->hmap_node
, hash_uint64(mac64
));
931 ipam_insert_ip(struct ovn_datapath
*od
, uint32_t ip
)
933 if (!od
|| !od
->ipam_info
.allocated_ipv4s
) {
937 if (ip
>= od
->ipam_info
.start_ipv4
&&
938 ip
< (od
->ipam_info
.start_ipv4
+ od
->ipam_info
.total_ipv4s
)) {
939 bitmap_set1(od
->ipam_info
.allocated_ipv4s
,
940 ip
- od
->ipam_info
.start_ipv4
);
945 ipam_insert_lsp_addresses(struct ovn_datapath
*od
, struct ovn_port
*op
,
948 if (!od
|| !op
|| !address
|| !strcmp(address
, "unknown")
949 || !strcmp(address
, "router") || is_dynamic_lsp_address(address
)) {
953 struct lport_addresses laddrs
;
954 if (!extract_lsp_addresses(address
, &laddrs
)) {
955 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
956 VLOG_WARN_RL(&rl
, "Extract addresses failed.");
959 ipam_insert_mac(&laddrs
.ea
, true);
961 /* IP is only added to IPAM if the switch's subnet option
962 * is set, whereas MAC is always added to MACAM. */
963 if (!od
->ipam_info
.allocated_ipv4s
) {
964 destroy_lport_addresses(&laddrs
);
968 for (size_t j
= 0; j
< laddrs
.n_ipv4_addrs
; j
++) {
969 uint32_t ip
= ntohl(laddrs
.ipv4_addrs
[j
].addr
);
970 ipam_insert_ip(od
, ip
);
973 destroy_lport_addresses(&laddrs
);
977 ipam_add_port_addresses(struct ovn_datapath
*od
, struct ovn_port
*op
)
984 /* Add all the port's addresses to address data structures. */
985 for (size_t i
= 0; i
< op
->nbsp
->n_addresses
; i
++) {
986 ipam_insert_lsp_addresses(od
, op
, op
->nbsp
->addresses
[i
]);
988 if (op
->nbsp
->dynamic_addresses
) {
989 ipam_insert_lsp_addresses(od
, op
, op
->nbsp
->dynamic_addresses
);
991 } else if (op
->nbrp
) {
992 struct lport_addresses lrp_networks
;
993 if (!extract_lrp_networks(op
->nbrp
, &lrp_networks
)) {
994 static struct vlog_rate_limit rl
995 = VLOG_RATE_LIMIT_INIT(1, 1);
996 VLOG_WARN_RL(&rl
, "Extract addresses failed.");
999 ipam_insert_mac(&lrp_networks
.ea
, true);
1001 if (!op
->peer
|| !op
->peer
->nbsp
|| !op
->peer
->od
|| !op
->peer
->od
->nbs
1002 || !smap_get(&op
->peer
->od
->nbs
->other_config
, "subnet")) {
1003 destroy_lport_addresses(&lrp_networks
);
1007 for (size_t i
= 0; i
< lrp_networks
.n_ipv4_addrs
; i
++) {
1008 uint32_t ip
= ntohl(lrp_networks
.ipv4_addrs
[i
].addr
);
1009 ipam_insert_ip(op
->peer
->od
, ip
);
1012 destroy_lport_addresses(&lrp_networks
);
1017 ipam_get_unused_mac(void)
1019 /* Stores the suffix of the most recently ipam-allocated MAC address. */
1020 static uint32_t last_mac
;
1023 struct eth_addr mac
;
1024 uint32_t mac_addr_suffix
, i
;
1025 for (i
= 0; i
< MAC_ADDR_SPACE
- 1; i
++) {
1026 /* The tentative MAC's suffix will be in the interval (1, 0xfffffe). */
1027 mac_addr_suffix
= ((last_mac
+ i
) % (MAC_ADDR_SPACE
- 1)) + 1;
1028 mac64
= MAC_ADDR_PREFIX
| mac_addr_suffix
;
1029 eth_addr_from_uint64(mac64
, &mac
);
1030 if (!ipam_is_duplicate_mac(&mac
, mac64
, false)) {
1031 last_mac
= mac_addr_suffix
;
1036 if (i
== MAC_ADDR_SPACE
) {
1037 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
1038 VLOG_WARN_RL(&rl
, "MAC address space exhausted.");
1046 ipam_get_unused_ip(struct ovn_datapath
*od
)
1048 if (!od
|| !od
->ipam_info
.allocated_ipv4s
) {
1052 size_t new_ip_index
= bitmap_scan(od
->ipam_info
.allocated_ipv4s
, 0, 0,
1053 od
->ipam_info
.total_ipv4s
- 1);
1054 if (new_ip_index
== od
->ipam_info
.total_ipv4s
- 1) {
1055 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
1056 VLOG_WARN_RL( &rl
, "Subnet address space has been exhausted.");
1060 return od
->ipam_info
.start_ipv4
+ new_ip_index
;
1064 ipam_allocate_addresses(struct ovn_datapath
*od
, struct ovn_port
*op
,
1065 const char *addrspec
)
1071 /* Get or generate MAC address. */
1072 struct eth_addr mac
;
1075 if (ovs_scan(addrspec
, ETH_ADDR_SCAN_FMT
" dynamic%n",
1076 ETH_ADDR_SCAN_ARGS(mac
), &n
)
1077 && addrspec
[n
] == '\0') {
1078 dynamic_mac
= false;
1080 uint64_t mac64
= ipam_get_unused_mac();
1084 eth_addr_from_uint64(mac64
, &mac
);
1088 /* Generate IPv4 address, if desirable. */
1089 bool dynamic_ip4
= od
->ipam_info
.allocated_ipv4s
!= NULL
;
1090 uint32_t ip4
= dynamic_ip4
? ipam_get_unused_ip(od
) : 0;
1092 /* Generate IPv6 address, if desirable. */
1093 bool dynamic_ip6
= od
->ipam_info
.ipv6_prefix_set
;
1094 struct in6_addr ip6
;
1096 in6_generate_eui64(mac
, &od
->ipam_info
.ipv6_prefix
, &ip6
);
1099 /* If we didn't generate anything, bail out. */
1100 if (!dynamic_ip4
&& !dynamic_ip6
) {
1104 /* Save the dynamic addresses. */
1105 struct ds new_addr
= DS_EMPTY_INITIALIZER
;
1106 ds_put_format(&new_addr
, ETH_ADDR_FMT
, ETH_ADDR_ARGS(mac
));
1107 if (dynamic_ip4
&& ip4
) {
1108 ipam_insert_ip(od
, ip4
);
1109 ds_put_format(&new_addr
, " "IP_FMT
, IP_ARGS(htonl(ip4
)));
1112 char ip6_s
[INET6_ADDRSTRLEN
+ 1];
1113 ipv6_string_mapped(ip6_s
, &ip6
);
1114 ds_put_format(&new_addr
, " %s", ip6_s
);
1116 ipam_insert_mac(&mac
, !dynamic_mac
);
1117 nbrec_logical_switch_port_set_dynamic_addresses(op
->nbsp
,
1118 ds_cstr(&new_addr
));
1119 ds_destroy(&new_addr
);
1124 build_ipam(struct hmap
*datapaths
, struct hmap
*ports
)
1126 /* IPAM generally stands for IP address management. In non-virtualized
1127 * world, MAC addresses come with the hardware. But, with virtualized
1128 * workloads, they need to be assigned and managed. This function
1129 * does both IP address management (ipam) and MAC address management
1132 /* If the switch's other_config:subnet is set, allocate new addresses for
1133 * ports that have the "dynamic" keyword in their addresses column. */
1134 struct ovn_datapath
*od
;
1135 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
1136 if (!od
->nbs
|| (!od
->ipam_info
.allocated_ipv4s
&&
1137 !od
->ipam_info
.ipv6_prefix_set
)) {
1141 struct ovn_port
*op
;
1142 for (size_t i
= 0; i
< od
->nbs
->n_ports
; i
++) {
1143 const struct nbrec_logical_switch_port
*nbsp
=
1150 op
= ovn_port_find(ports
, nbsp
->name
);
1151 if (!op
|| (op
->nbsp
&& op
->peer
)) {
1152 /* Do not allocate addresses for logical switch ports that
1157 for (size_t j
= 0; j
< nbsp
->n_addresses
; j
++) {
1158 if (is_dynamic_lsp_address(nbsp
->addresses
[j
])
1159 && !nbsp
->dynamic_addresses
) {
1160 if (!ipam_allocate_addresses(od
, op
, nbsp
->addresses
[j
])
1161 || !extract_lsp_addresses(nbsp
->dynamic_addresses
,
1162 &op
->lsp_addrs
[op
->n_lsp_addrs
])) {
1163 static struct vlog_rate_limit rl
1164 = VLOG_RATE_LIMIT_INIT(1, 1);
1165 VLOG_INFO_RL(&rl
, "Failed to allocate address.");
1173 if (!nbsp
->n_addresses
&& nbsp
->dynamic_addresses
) {
1174 nbrec_logical_switch_port_set_dynamic_addresses(op
->nbsp
,
1181 /* Tag allocation for nested containers.
1183 * For a logical switch port with 'parent_name' and a request to allocate tags,
1184 * keeps a track of all allocated tags. */
1185 struct tag_alloc_node
{
1186 struct hmap_node hmap_node
;
1188 unsigned long *allocated_tags
; /* A bitmap to track allocated tags. */
1192 tag_alloc_destroy(struct hmap
*tag_alloc_table
)
1194 struct tag_alloc_node
*node
;
1195 HMAP_FOR_EACH_POP (node
, hmap_node
, tag_alloc_table
) {
1196 bitmap_free(node
->allocated_tags
);
1197 free(node
->parent_name
);
1200 hmap_destroy(tag_alloc_table
);
1203 static struct tag_alloc_node
*
1204 tag_alloc_get_node(struct hmap
*tag_alloc_table
, const char *parent_name
)
1206 /* If a node for the 'parent_name' exists, return it. */
1207 struct tag_alloc_node
*tag_alloc_node
;
1208 HMAP_FOR_EACH_WITH_HASH (tag_alloc_node
, hmap_node
,
1209 hash_string(parent_name
, 0),
1211 if (!strcmp(tag_alloc_node
->parent_name
, parent_name
)) {
1212 return tag_alloc_node
;
1216 /* Create a new node. */
1217 tag_alloc_node
= xmalloc(sizeof *tag_alloc_node
);
1218 tag_alloc_node
->parent_name
= xstrdup(parent_name
);
1219 tag_alloc_node
->allocated_tags
= bitmap_allocate(MAX_OVN_TAGS
);
1220 /* Tag 0 is invalid for nested containers. */
1221 bitmap_set1(tag_alloc_node
->allocated_tags
, 0);
1222 hmap_insert(tag_alloc_table
, &tag_alloc_node
->hmap_node
,
1223 hash_string(parent_name
, 0));
1225 return tag_alloc_node
;
1229 tag_alloc_add_existing_tags(struct hmap
*tag_alloc_table
,
1230 const struct nbrec_logical_switch_port
*nbsp
)
1232 /* Add the tags of already existing nested containers. If there is no
1233 * 'nbsp->parent_name' or no 'nbsp->tag' set, there is nothing to do. */
1234 if (!nbsp
->parent_name
|| !nbsp
->parent_name
[0] || !nbsp
->tag
) {
1238 struct tag_alloc_node
*tag_alloc_node
;
1239 tag_alloc_node
= tag_alloc_get_node(tag_alloc_table
, nbsp
->parent_name
);
1240 bitmap_set1(tag_alloc_node
->allocated_tags
, *nbsp
->tag
);
1244 tag_alloc_create_new_tag(struct hmap
*tag_alloc_table
,
1245 const struct nbrec_logical_switch_port
*nbsp
)
1247 if (!nbsp
->tag_request
) {
1251 if (nbsp
->parent_name
&& nbsp
->parent_name
[0]
1252 && *nbsp
->tag_request
== 0) {
1253 /* For nested containers that need allocation, do the allocation. */
1256 /* This has already been allocated. */
1260 struct tag_alloc_node
*tag_alloc_node
;
1262 tag_alloc_node
= tag_alloc_get_node(tag_alloc_table
,
1264 tag
= bitmap_scan(tag_alloc_node
->allocated_tags
, 0, 1, MAX_OVN_TAGS
);
1265 if (tag
== MAX_OVN_TAGS
) {
1266 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
1267 VLOG_ERR_RL(&rl
, "out of vlans for logical switch ports with "
1268 "parent %s", nbsp
->parent_name
);
1271 bitmap_set1(tag_alloc_node
->allocated_tags
, tag
);
1272 nbrec_logical_switch_port_set_tag(nbsp
, &tag
, 1);
1273 } else if (*nbsp
->tag_request
!= 0) {
1274 /* For everything else, copy the contents of 'tag_request' to 'tag'. */
1275 nbrec_logical_switch_port_set_tag(nbsp
, nbsp
->tag_request
, 1);
1281 * This function checks if the MAC in "address" parameter (if present) is
1282 * different from the one stored in Logical_Switch_Port.dynamic_addresses
1286 check_and_update_mac_in_dynamic_addresses(
1287 const char *address
,
1288 const struct nbrec_logical_switch_port
*nbsp
)
1290 if (!nbsp
->dynamic_addresses
) {
1295 if (!ovs_scan_len(address
, &buf_index
,
1296 ETH_ADDR_SCAN_FMT
, ETH_ADDR_SCAN_ARGS(ea
))) {
1300 struct eth_addr present_ea
;
1302 if (ovs_scan_len(nbsp
->dynamic_addresses
, &buf_index
,
1303 ETH_ADDR_SCAN_FMT
, ETH_ADDR_SCAN_ARGS(present_ea
))
1304 && !eth_addr_equals(ea
, present_ea
)) {
1305 /* MAC address has changed. Update it */
1306 char *new_addr
= xasprintf(
1307 ETH_ADDR_FMT
"%s", ETH_ADDR_ARGS(ea
),
1308  
->dynamic_addresses
[buf_index
]);
1309 nbrec_logical_switch_port_set_dynamic_addresses(
1316 join_logical_ports(struct northd_context
*ctx
,
1317 struct hmap
*datapaths
, struct hmap
*ports
,
1318 struct hmap
*chassis_qdisc_queues
,
1319 struct hmap
*tag_alloc_table
, struct ovs_list
*sb_only
,
1320 struct ovs_list
*nb_only
, struct ovs_list
*both
)
1323 ovs_list_init(sb_only
);
1324 ovs_list_init(nb_only
);
1325 ovs_list_init(both
);
1327 const struct sbrec_port_binding
*sb
;
1328 SBREC_PORT_BINDING_FOR_EACH (sb
, ctx
->ovnsb_idl
) {
1329 struct ovn_port
*op
= ovn_port_create(ports
, sb
->logical_port
,
1331 ovs_list_push_back(sb_only
, &op
->list
);
1334 struct ovn_datapath
*od
;
1335 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
1337 for (size_t i
= 0; i
< od
->nbs
->n_ports
; i
++) {
1338 const struct nbrec_logical_switch_port
*nbsp
1339 = od
->nbs
->ports
[i
];
1340 struct ovn_port
*op
= ovn_port_find(ports
, nbsp
->name
);
1342 if (op
->nbsp
|| op
->nbrp
) {
1343 static struct vlog_rate_limit rl
1344 = VLOG_RATE_LIMIT_INIT(5, 1);
1345 VLOG_WARN_RL(&rl
, "duplicate logical port %s",
1350 ovs_list_remove(&op
->list
);
1352 uint32_t queue_id
= smap_get_int(&op
->sb
->options
,
1353 "qdisc_queue_id", 0);
1354 if (queue_id
&& op
->sb
->chassis
) {
1356 chassis_qdisc_queues
, &op
->sb
->chassis
->header_
.uuid
,
1360 ovs_list_push_back(both
, &op
->list
);
1362 /* This port exists due to a SB binding, but should
1363 * not have been initialized fully. */
1364 ovs_assert(!op
->n_lsp_addrs
&& !op
->n_ps_addrs
);
1366 op
= ovn_port_create(ports
, nbsp
->name
, nbsp
, NULL
, NULL
);
1367 ovs_list_push_back(nb_only
, &op
->list
);
1370 if (!strcmp(nbsp
->type
, "localnet")) {
1371 od
->localnet_port
= op
;
1375 = xmalloc(sizeof *op
->lsp_addrs
* nbsp
->n_addresses
);
1376 for (size_t j
= 0; j
< nbsp
->n_addresses
; j
++) {
1377 if (!strcmp(nbsp
->addresses
[j
], "unknown")
1378 || !strcmp(nbsp
->addresses
[j
], "router")) {
1381 if (is_dynamic_lsp_address(nbsp
->addresses
[j
])) {
1382 if (nbsp
->dynamic_addresses
) {
1383 check_and_update_mac_in_dynamic_addresses(
1384 nbsp
->addresses
[j
], nbsp
);
1385 if (!extract_lsp_addresses(nbsp
->dynamic_addresses
,
1386 &op
->lsp_addrs
[op
->n_lsp_addrs
])) {
1387 static struct vlog_rate_limit rl
1388 = VLOG_RATE_LIMIT_INIT(1, 1);
1389 VLOG_INFO_RL(&rl
, "invalid syntax '%s' in "
1390 "logical switch port "
1391 "dynamic_addresses. No "
1392 "MAC address found",
1393 op
->nbsp
->dynamic_addresses
);
1399 } else if (!extract_lsp_addresses(nbsp
->addresses
[j
],
1400 &op
->lsp_addrs
[op
->n_lsp_addrs
])) {
1401 static struct vlog_rate_limit rl
1402 = VLOG_RATE_LIMIT_INIT(1, 1);
1403 VLOG_INFO_RL(&rl
, "invalid syntax '%s' in logical "
1404 "switch port addresses. No MAC "
1406 op
->nbsp
->addresses
[j
]);
1413 = xmalloc(sizeof *op
->ps_addrs
* nbsp
->n_port_security
);
1414 for (size_t j
= 0; j
< nbsp
->n_port_security
; j
++) {
1415 if (!extract_lsp_addresses(nbsp
->port_security
[j
],
1416 &op
->ps_addrs
[op
->n_ps_addrs
])) {
1417 static struct vlog_rate_limit rl
1418 = VLOG_RATE_LIMIT_INIT(1, 1);
1419 VLOG_INFO_RL(&rl
, "invalid syntax '%s' in port "
1420 "security. No MAC address found",
1421 op
->nbsp
->port_security
[j
]);
1428 ipam_add_port_addresses(od
, op
);
1429 tag_alloc_add_existing_tags(tag_alloc_table
, nbsp
);
1432 for (size_t i
= 0; i
< od
->nbr
->n_ports
; i
++) {
1433 const struct nbrec_logical_router_port
*nbrp
1434 = od
->nbr
->ports
[i
];
1436 struct lport_addresses lrp_networks
;
1437 if (!extract_lrp_networks(nbrp
, &lrp_networks
)) {
1438 static struct vlog_rate_limit rl
1439 = VLOG_RATE_LIMIT_INIT(5, 1);
1440 VLOG_WARN_RL(&rl
, "bad 'mac' %s", nbrp
->mac
);
1444 if (!lrp_networks
.n_ipv4_addrs
&& !lrp_networks
.n_ipv6_addrs
) {
1448 struct ovn_port
*op
= ovn_port_find(ports
, nbrp
->name
);
1450 if (op
->nbsp
|| op
->nbrp
) {
1451 static struct vlog_rate_limit rl
1452 = VLOG_RATE_LIMIT_INIT(5, 1);
1453 VLOG_WARN_RL(&rl
, "duplicate logical router port %s",
1458 ovs_list_remove(&op
->list
);
1459 ovs_list_push_back(both
, &op
->list
);
1461 /* This port exists but should not have been
1462 * initialized fully. */
1463 ovs_assert(!op
->lrp_networks
.n_ipv4_addrs
1464 && !op
->lrp_networks
.n_ipv6_addrs
);
1466 op
= ovn_port_create(ports
, nbrp
->name
, NULL
, nbrp
, NULL
);
1467 ovs_list_push_back(nb_only
, &op
->list
);
1470 op
->lrp_networks
= lrp_networks
;
1472 ipam_add_port_addresses(op
->od
, op
);
1474 const char *redirect_chassis
= smap_get(&op
->nbrp
->options
,
1475 "redirect-chassis");
1476 if (redirect_chassis
|| op
->nbrp
->n_gateway_chassis
) {
1477 /* Additional "derived" ovn_port crp represents the
1478 * instance of op on the "redirect-chassis". */
1479 const char *gw_chassis
= smap_get(&op
->od
->nbr
->options
,
1482 static struct vlog_rate_limit rl
1483 = VLOG_RATE_LIMIT_INIT(1, 1);
1484 VLOG_WARN_RL(&rl
, "Bad configuration: "
1485 "redirect-chassis configured on port %s "
1486 "on L3 gateway router", nbrp
->name
);
1489 if (od
->l3dgw_port
|| od
->l3redirect_port
) {
1490 static struct vlog_rate_limit rl
1491 = VLOG_RATE_LIMIT_INIT(1, 1);
1492 VLOG_WARN_RL(&rl
, "Bad configuration: multiple ports "
1493 "with redirect-chassis on same logical "
1494 "router %s", od
->nbr
->name
);
1498 char *redirect_name
= chassis_redirect_name(nbrp
->name
);
1499 struct ovn_port
*crp
= ovn_port_find(ports
, redirect_name
);
1501 crp
->derived
= true;
1503 ovs_list_remove(&crp
->list
);
1504 ovs_list_push_back(both
, &crp
->list
);
1506 crp
= ovn_port_create(ports
, redirect_name
,
1508 crp
->derived
= true;
1509 ovs_list_push_back(nb_only
, &crp
->list
);
1512 free(redirect_name
);
1514 /* Set l3dgw_port and l3redirect_port in od, for later
1515 * use during flow creation. */
1516 od
->l3dgw_port
= op
;
1517 od
->l3redirect_port
= crp
;
1523 /* Connect logical router ports, and logical switch ports of type "router",
1524 * to their peers. */
1525 struct ovn_port
*op
;
1526 HMAP_FOR_EACH (op
, key_node
, ports
) {
1527 if (op
->nbsp
&& !strcmp(op
->nbsp
->type
, "router") && !op
->derived
) {
1528 const char *peer_name
= smap_get(&op
->nbsp
->options
, "router-port");
1533 struct ovn_port
*peer
= ovn_port_find(ports
, peer_name
);
1534 if (!peer
|| !peer
->nbrp
) {
1540 op
->od
->router_ports
= xrealloc(
1541 op
->od
->router_ports
,
1542 sizeof *op
->od
->router_ports
* (op
->od
->n_router_ports
+ 1));
1543 op
->od
->router_ports
[op
->od
->n_router_ports
++] = op
;
1545 /* Fill op->lsp_addrs for op->nbsp->addresses[] with
1546 * contents "router", which was skipped in the loop above. */
1547 for (size_t j
= 0; j
< op
->nbsp
->n_addresses
; j
++) {
1548 if (!strcmp(op
->nbsp
->addresses
[j
], "router")) {
1549 if (extract_lrp_networks(peer
->nbrp
,
1550 &op
->lsp_addrs
[op
->n_lsp_addrs
])) {
1556 } else if (op
->nbrp
&& op
->nbrp
->peer
&& !op
->derived
) {
1557 struct ovn_port
*peer
= ovn_port_find(ports
, op
->nbrp
->peer
);
1561 } else if (peer
->nbsp
) {
1562 /* An ovn_port for a switch port of type "router" does have
1563 * a router port as its peer (see the case above for
1564 * "router" ports), but this is set via options:router-port
1565 * in Logical_Switch_Port and does not involve the
1566 * Logical_Router_Port's 'peer' column. */
1567 static struct vlog_rate_limit rl
=
1568 VLOG_RATE_LIMIT_INIT(5, 1);
1569 VLOG_WARN_RL(&rl
, "Bad configuration: The peer of router "
1570 "port %s is a switch port", op
->key
);
1578 ip_address_and_port_from_lb_key(const char *key
, char **ip_address
,
1579 uint16_t *port
, int *addr_family
);
1582 get_router_load_balancer_ips(const struct ovn_datapath
*od
,
1583 struct sset
*all_ips
, int *addr_family
)
1589 for (int i
= 0; i
< od
->nbr
->n_load_balancer
; i
++) {
1590 struct nbrec_load_balancer
*lb
= od
->nbr
->load_balancer
[i
];
1591 struct smap
*vips
= &lb
->vips
;
1592 struct smap_node
*node
;
1594 SMAP_FOR_EACH (node
, vips
) {
1595 /* node->key contains IP:port or just IP. */
1596 char *ip_address
= NULL
;
1599 ip_address_and_port_from_lb_key(node
->key
, &ip_address
, &port
,
1605 if (!sset_contains(all_ips
, ip_address
)) {
1606 sset_add(all_ips
, ip_address
);
1614 /* Returns an array of strings, each consisting of a MAC address followed
1615 * by one or more IP addresses, and if the port is a distributed gateway
1616 * port, followed by 'is_chassis_resident("LPORT_NAME")', where the
1617 * LPORT_NAME is the name of the L3 redirect port or the name of the
1618 * logical_port specified in a NAT rule. These strings include the
1619 * external IP addresses of all NAT rules defined on that router, and all
1620 * of the IP addresses used in load balancer VIPs defined on that router.
1622 * The caller must free each of the n returned strings with free(),
1623 * and must free the returned array when it is no longer needed. */
1625 get_nat_addresses(const struct ovn_port
*op
, size_t *n
)
1628 struct eth_addr mac
;
1629 if (!op
->nbrp
|| !op
->od
|| !op
->od
->nbr
1630 || (!op
->od
->nbr
->n_nat
&& !op
->od
->nbr
->n_load_balancer
)
1631 || !eth_addr_from_string(op
->nbrp
->mac
, &mac
)) {
1636 struct ds c_addresses
= DS_EMPTY_INITIALIZER
;
1637 ds_put_format(&c_addresses
, ETH_ADDR_FMT
, ETH_ADDR_ARGS(mac
));
1638 bool central_ip_address
= false;
1641 addresses
= xmalloc(sizeof *addresses
* (op
->od
->nbr
->n_nat
+ 1));
1643 /* Get NAT IP addresses. */
1644 for (size_t i
= 0; i
< op
->od
->nbr
->n_nat
; i
++) {
1645 const struct nbrec_nat
*nat
= op
->od
->nbr
->nat
[i
];
1648 char *error
= ip_parse_masked(nat
->external_ip
, &ip
, &mask
);
1649 if (error
|| mask
!= OVS_BE32_MAX
) {
1654 /* Determine whether this NAT rule satisfies the conditions for
1655 * distributed NAT processing. */
1656 if (op
->od
->l3redirect_port
&& !strcmp(nat
->type
, "dnat_and_snat")
1657 && nat
->logical_port
&& nat
->external_mac
) {
1658 /* Distributed NAT rule. */
1659 if (eth_addr_from_string(nat
->external_mac
, &mac
)) {
1660 struct ds address
= DS_EMPTY_INITIALIZER
;
1661 ds_put_format(&address
, ETH_ADDR_FMT
, ETH_ADDR_ARGS(mac
));
1662 ds_put_format(&address
, " %s", nat
->external_ip
);
1663 ds_put_format(&address
, " is_chassis_resident(\"%s\")",
1665 addresses
[n_nats
++] = ds_steal_cstr(&address
);
1668 /* Centralized NAT rule, either on gateway router or distributed
1670 ds_put_format(&c_addresses
, " %s", nat
->external_ip
);
1671 central_ip_address
= true;
1675 /* A set to hold all load-balancer vips. */
1676 struct sset all_ips
= SSET_INITIALIZER(&all_ips
);
1678 get_router_load_balancer_ips(op
->od
, &all_ips
, &addr_family
);
1680 const char *ip_address
;
1681 SSET_FOR_EACH (ip_address
, &all_ips
) {
1682 ds_put_format(&c_addresses
, " %s", ip_address
);
1683 central_ip_address
= true;
1685 sset_destroy(&all_ips
);
1687 if (central_ip_address
) {
1688 /* Gratuitous ARP for centralized NAT rules on distributed gateway
1689 * ports should be restricted to the "redirect-chassis". */
1690 if (op
->od
->l3redirect_port
) {
1691 ds_put_format(&c_addresses
, " is_chassis_resident(%s)",
1692 op
->od
->l3redirect_port
->json_key
);
1695 addresses
[n_nats
++] = ds_steal_cstr(&c_addresses
);
1704 gateway_chassis_equal(const struct nbrec_gateway_chassis
*nb_gwc
,
1705 const struct sbrec_chassis
*nb_gwc_c
,
1706 const struct sbrec_gateway_chassis
*sb_gwc
)
1708 bool equal
= !strcmp(nb_gwc
->name
, sb_gwc
->name
)
1709 && nb_gwc
->priority
== sb_gwc
->priority
1710 && smap_equal(&nb_gwc
->options
, &sb_gwc
->options
)
1711 && smap_equal(&nb_gwc
->external_ids
, &sb_gwc
->external_ids
);
1717 /* If everything else matched and we were unable to find the SBDB
1718 * Chassis entry at this time, assume a match and return true.
1719 * This happens when an ovn-controller is restarting and the Chassis
1720 * entry is gone away momentarily */
1722 || (sb_gwc
->chassis
&& !strcmp(nb_gwc_c
->name
,
1723 sb_gwc
->chassis
->name
));
1727 sbpb_gw_chassis_needs_update(
1728 struct ovsdb_idl_index
*sbrec_chassis_by_name
,
1729 const struct sbrec_port_binding
*port_binding
,
1730 const struct nbrec_logical_router_port
*lrp
)
1732 if (!lrp
|| !port_binding
) {
1736 /* These arrays are used to collect valid Gateway_Chassis and valid
1737 * Chassis records from the Logical_Router_Port Gateway_Chassis list,
1738 * we ignore the ones we can't match on the SBDB */
1739 struct nbrec_gateway_chassis
**lrp_gwc
= xzalloc(lrp
->n_gateway_chassis
*
1741 const struct sbrec_chassis
**lrp_gwc_c
= xzalloc(lrp
->n_gateway_chassis
*
1744 /* Count the number of gateway chassis chassis names from the logical
1745 * router port that we are able to match on the southbound database */
1746 int lrp_n_gateway_chassis
= 0;
1748 for (n
= 0; n
< lrp
->n_gateway_chassis
; n
++) {
1750 if (!lrp
->gateway_chassis
[n
]->chassis_name
) {
1754 const struct sbrec_chassis
*chassis
=
1755 chassis_lookup_by_name(sbrec_chassis_by_name
,
1756 lrp
->gateway_chassis
[n
]->chassis_name
);
1758 lrp_gwc_c
[lrp_n_gateway_chassis
] = chassis
;
1759 lrp_gwc
[lrp_n_gateway_chassis
] = lrp
->gateway_chassis
[n
];
1760 lrp_n_gateway_chassis
++;
1762 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
1764 &rl
, "Chassis name %s referenced in NBDB via Gateway_Chassis "
1765 "on logical router port %s does not exist in SBDB",
1766 lrp
->gateway_chassis
[n
]->chassis_name
, lrp
->name
);
1770 /* Basic check, different amount of Gateway_Chassis means that we
1771 * need to update southbound database Port_Binding */
1772 if (lrp_n_gateway_chassis
!= port_binding
->n_gateway_chassis
) {
1778 for (n
= 0; n
< lrp_n_gateway_chassis
; n
++) {
1780 /* For each of the valid gw chassis on the lrp, check if there's
1781 * a match on the Port_Binding list, we assume order is not
1783 for (i
= 0; i
< port_binding
->n_gateway_chassis
; i
++) {
1784 if (gateway_chassis_equal(lrp_gwc
[n
],
1786 port_binding
->gateway_chassis
[i
])) {
1787 break; /* we found a match */
1791 /* if no Port_Binding gateway chassis matched for the entry... */
1792 if (i
== port_binding
->n_gateway_chassis
) {
1795 return true; /* found no match for this gateway chassis on lrp */
1799 /* no need for update, all ports matched */
1805 /* This functions translates the gw chassis on the nb database
1806 * to sb database entries, the only difference is that SB database
1807 * Gateway_Chassis table references the chassis directly instead
1808 * of using the name */
1810 copy_gw_chassis_from_nbrp_to_sbpb(
1811 struct northd_context
*ctx
,
1812 struct ovsdb_idl_index
*sbrec_chassis_by_name
,
1813 const struct nbrec_logical_router_port
*lrp
,
1814 const struct sbrec_port_binding
*port_binding
) {
1816 if (!lrp
|| !port_binding
|| !lrp
->n_gateway_chassis
) {
1820 struct sbrec_gateway_chassis
**gw_chassis
= NULL
;
1824 /* XXX: This can be improved. This code will generate a set of new
1825 * Gateway_Chassis and push them all in a single transaction, instead
1826 * this would be more optimal if we just add/update/remove the rows in
1827 * the southbound db that need to change. We don't expect lots of
1828 * changes to the Gateway_Chassis table, but if that proves to be wrong
1829 * we should optimize this. */
1830 for (n
= 0; n
< lrp
->n_gateway_chassis
; n
++) {
1831 struct nbrec_gateway_chassis
*lrp_gwc
= lrp
->gateway_chassis
[n
];
1832 if (!lrp_gwc
->chassis_name
) {
1836 const struct sbrec_chassis
*chassis
=
1837 chassis_lookup_by_name(sbrec_chassis_by_name
,
1838 lrp_gwc
->chassis_name
);
1840 gw_chassis
= xrealloc(gw_chassis
, (n_gwc
+ 1) * sizeof *gw_chassis
);
1842 struct sbrec_gateway_chassis
*pb_gwc
=
1843 sbrec_gateway_chassis_insert(ctx
->ovnsb_txn
);
1845 sbrec_gateway_chassis_set_name(pb_gwc
, lrp_gwc
->name
);
1846 sbrec_gateway_chassis_set_priority(pb_gwc
, lrp_gwc
->priority
);
1847 sbrec_gateway_chassis_set_chassis(pb_gwc
, chassis
);
1848 sbrec_gateway_chassis_set_options(pb_gwc
, &lrp_gwc
->options
);
1849 sbrec_gateway_chassis_set_external_ids(pb_gwc
, &lrp_gwc
->external_ids
);
1851 gw_chassis
[n_gwc
++] = pb_gwc
;
1853 sbrec_port_binding_set_gateway_chassis(port_binding
, gw_chassis
, n_gwc
);
1858 ovn_port_update_sbrec(struct northd_context
*ctx
,
1859 struct ovsdb_idl_index
*sbrec_chassis_by_name
,
1860 const struct ovn_port
*op
,
1861 struct hmap
*chassis_qdisc_queues
)
1863 sbrec_port_binding_set_datapath(op
->sb
, op
->od
->sb
);
1865 /* If the router is for l3 gateway, it resides on a chassis
1866 * and its port type is "l3gateway". */
1867 const char *chassis_name
= smap_get(&op
->od
->nbr
->options
, "chassis");
1869 sbrec_port_binding_set_type(op
->sb
, "chassisredirect");
1870 } else if (chassis_name
) {
1871 sbrec_port_binding_set_type(op
->sb
, "l3gateway");
1873 sbrec_port_binding_set_type(op
->sb
, "patch");
1879 const char *redirect_chassis
= smap_get(&op
->nbrp
->options
,
1880 "redirect-chassis");
1881 if (op
->nbrp
->n_gateway_chassis
&& redirect_chassis
) {
1882 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
1884 &rl
, "logical router port %s has both options:"
1885 "redirect-chassis and gateway_chassis populated "
1886 "redirect-chassis will be ignored in favour of "
1887 "gateway chassis", op
->nbrp
->name
);
1890 if (op
->nbrp
->n_gateway_chassis
) {
1891 if (sbpb_gw_chassis_needs_update(sbrec_chassis_by_name
,
1892 op
->sb
, op
->nbrp
)) {
1893 copy_gw_chassis_from_nbrp_to_sbpb(ctx
,
1894 sbrec_chassis_by_name
,
1898 } else if (redirect_chassis
) {
1899 /* Handle ports that had redirect-chassis option attached
1900 * to them, and for backwards compatibility convert them
1901 * to a single Gateway_Chassis entry */
1902 const struct sbrec_chassis
*chassis
=
1903 chassis_lookup_by_name(sbrec_chassis_by_name
,
1906 /* If we found the chassis, and the gw chassis on record
1907 * differs from what we expect go ahead and update */
1908 if (op
->sb
->n_gateway_chassis
!= 1
1909 || !op
->sb
->gateway_chassis
[0]->chassis
1910 || strcmp(op
->sb
->gateway_chassis
[0]->chassis
->name
,
1912 || op
->sb
->gateway_chassis
[0]->priority
!= 0) {
1913 /* Construct a single Gateway_Chassis entry on the
1914 * Port_Binding attached to the redirect_chassis
1916 struct sbrec_gateway_chassis
*gw_chassis
=
1917 sbrec_gateway_chassis_insert(ctx
->ovnsb_txn
);
1919 char *gwc_name
= xasprintf("%s_%s", op
->nbrp
->name
,
1922 /* XXX: Again, here, we could just update an existing
1923 * Gateway_Chassis, instead of creating a new one
1924 * and replacing it */
1925 sbrec_gateway_chassis_set_name(gw_chassis
, gwc_name
);
1926 sbrec_gateway_chassis_set_priority(gw_chassis
, 0);
1927 sbrec_gateway_chassis_set_chassis(gw_chassis
, chassis
);
1928 sbrec_gateway_chassis_set_external_ids(gw_chassis
,
1929 &op
->nbrp
->external_ids
);
1930 sbrec_port_binding_set_gateway_chassis(op
->sb
,
1935 VLOG_WARN("chassis name '%s' from redirect from logical "
1936 " router port '%s' redirect-chassis not found",
1937 redirect_chassis
, op
->nbrp
->name
);
1938 if (op
->sb
->n_gateway_chassis
) {
1939 sbrec_port_binding_set_gateway_chassis(op
->sb
, NULL
,
1944 smap_add(&new, "distributed-port", op
->nbrp
->name
);
1947 smap_add(&new, "peer", op
->peer
->key
);
1950 smap_add(&new, "l3gateway-chassis", chassis_name
);
1953 sbrec_port_binding_set_options(op
->sb
, &new);
1956 sbrec_port_binding_set_parent_port(op
->sb
, NULL
);
1957 sbrec_port_binding_set_tag(op
->sb
, NULL
, 0);
1959 struct ds s
= DS_EMPTY_INITIALIZER
;
1960 ds_put_cstr(&s
, op
->nbrp
->mac
);
1961 for (int i
= 0; i
< op
->nbrp
->n_networks
; ++i
) {
1962 ds_put_format(&s
, " %s", op
->nbrp
->networks
[i
]);
1964 const char *addresses
= ds_cstr(&s
);
1965 sbrec_port_binding_set_mac(op
->sb
, &addresses
, 1);
1968 struct smap ids
= SMAP_INITIALIZER(&ids
);
1969 sbrec_port_binding_set_external_ids(op
->sb
, &ids
);
1971 if (strcmp(op
->nbsp
->type
, "router")) {
1972 uint32_t queue_id
= smap_get_int(
1973 &op
->sb
->options
, "qdisc_queue_id", 0);
1974 bool has_qos
= port_has_qos_params(&op
->nbsp
->options
);
1975 struct smap options
;
1977 if (op
->sb
->chassis
&& has_qos
&& !queue_id
) {
1978 queue_id
= allocate_chassis_queueid(chassis_qdisc_queues
,
1980 } else if (!has_qos
&& queue_id
) {
1981 free_chassis_queueid(chassis_qdisc_queues
,
1987 smap_clone(&options
, &op
->nbsp
->options
);
1989 smap_add_format(&options
,
1990 "qdisc_queue_id", "%d", queue_id
);
1992 sbrec_port_binding_set_options(op
->sb
, &options
);
1993 smap_destroy(&options
);
1994 if (ovn_is_known_nb_lsp_type(op
->nbsp
->type
)) {
1995 sbrec_port_binding_set_type(op
->sb
, op
->nbsp
->type
);
1997 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
1999 &rl
, "Unknown port type '%s' set on logical switch '%s'.",
2000 op
->nbsp
->type
, op
->nbsp
->name
);
2003 const char *chassis
= NULL
;
2004 if (op
->peer
&& op
->peer
->od
&& op
->peer
->od
->nbr
) {
2005 chassis
= smap_get(&op
->peer
->od
->nbr
->options
, "chassis");
2008 /* A switch port connected to a gateway router is also of
2009 * type "l3gateway". */
2011 sbrec_port_binding_set_type(op
->sb
, "l3gateway");
2013 sbrec_port_binding_set_type(op
->sb
, "patch");
2016 const char *router_port
= smap_get(&op
->nbsp
->options
,
2018 if (router_port
|| chassis
) {
2022 smap_add(&new, "peer", router_port
);
2025 smap_add(&new, "l3gateway-chassis", chassis
);
2027 sbrec_port_binding_set_options(op
->sb
, &new);
2031 const char *nat_addresses
= smap_get(&op
->nbsp
->options
,
2033 if (nat_addresses
&& !strcmp(nat_addresses
, "router")) {
2034 if (op
->peer
&& op
->peer
->od
2035 && (chassis
|| op
->peer
->od
->l3redirect_port
)) {
2037 char **nats
= get_nat_addresses(op
->peer
, &n_nats
);
2039 sbrec_port_binding_set_nat_addresses(op
->sb
,
2040 (const char **) nats
, n_nats
);
2041 for (size_t i
= 0; i
< n_nats
; i
++) {
2046 sbrec_port_binding_set_nat_addresses(op
->sb
, NULL
, 0);
2049 sbrec_port_binding_set_nat_addresses(op
->sb
, NULL
, 0);
2051 /* Only accept manual specification of ethernet address
2052 * followed by IPv4 addresses on type "l3gateway" ports. */
2053 } else if (nat_addresses
&& chassis
) {
2054 struct lport_addresses laddrs
;
2055 if (!extract_lsp_addresses(nat_addresses
, &laddrs
)) {
2056 static struct vlog_rate_limit rl
=
2057 VLOG_RATE_LIMIT_INIT(1, 1);
2058 VLOG_WARN_RL(&rl
, "Error extracting nat-addresses.");
2059 sbrec_port_binding_set_nat_addresses(op
->sb
, NULL
, 0);
2061 sbrec_port_binding_set_nat_addresses(op
->sb
,
2063 destroy_lport_addresses(&laddrs
);
2066 sbrec_port_binding_set_nat_addresses(op
->sb
, NULL
, 0);
2069 sbrec_port_binding_set_parent_port(op
->sb
, op
->nbsp
->parent_name
);
2070 sbrec_port_binding_set_tag(op
->sb
, op
->nbsp
->tag
, op
->nbsp
->n_tag
);
2071 sbrec_port_binding_set_mac(op
->sb
, (const char **) op
->nbsp
->addresses
,
2072 op
->nbsp
->n_addresses
);
2074 struct smap ids
= SMAP_INITIALIZER(&ids
);
2075 smap_clone(&ids
, &op
->nbsp
->external_ids
);
2076 const char *name
= smap_get(&ids
, "neutron:port_name");
2077 if (name
&& name
[0]) {
2078 smap_add(&ids
, "name", name
);
2080 sbrec_port_binding_set_external_ids(op
->sb
, &ids
);
2085 /* Remove mac_binding entries that refer to logical_ports which are
2088 cleanup_mac_bindings(struct northd_context
*ctx
, struct hmap
*ports
)
2090 const struct sbrec_mac_binding
*b
, *n
;
2091 SBREC_MAC_BINDING_FOR_EACH_SAFE (b
, n
, ctx
->ovnsb_idl
) {
2092 if (!ovn_port_find(ports
, b
->logical_port
)) {
2093 sbrec_mac_binding_delete(b
);
2098 /* Updates the southbound Port_Binding table so that it contains the logical
2099 * switch ports specified by the northbound database.
2101 * Initializes 'ports' to contain a "struct ovn_port" for every logical port,
2102 * using the "struct ovn_datapath"s in 'datapaths' to look up logical
2105 build_ports(struct northd_context
*ctx
,
2106 struct ovsdb_idl_index
*sbrec_chassis_by_name
,
2107 struct hmap
*datapaths
, struct hmap
*ports
)
2109 struct ovs_list sb_only
, nb_only
, both
;
2110 struct hmap tag_alloc_table
= HMAP_INITIALIZER(&tag_alloc_table
);
2111 struct hmap chassis_qdisc_queues
= HMAP_INITIALIZER(&chassis_qdisc_queues
);
2113 join_logical_ports(ctx
, datapaths
, ports
, &chassis_qdisc_queues
,
2114 &tag_alloc_table
, &sb_only
, &nb_only
, &both
);
2116 struct ovn_port
*op
, *next
;
2117 /* For logical ports that are in both databases, update the southbound
2118 * record based on northbound data. Also index the in-use tunnel_keys.
2119 * For logical ports that are in NB database, do any tag allocation
2121 LIST_FOR_EACH_SAFE (op
, next
, list
, &both
) {
2123 tag_alloc_create_new_tag(&tag_alloc_table
, op
->nbsp
);
2125 ovn_port_update_sbrec(ctx
, sbrec_chassis_by_name
,
2126 op
, &chassis_qdisc_queues
);
2128 add_tnlid(&op
->od
->port_tnlids
, op
->sb
->tunnel_key
);
2129 if (op
->sb
->tunnel_key
> op
->od
->port_key_hint
) {
2130 op
->od
->port_key_hint
= op
->sb
->tunnel_key
;
2134 /* Add southbound record for each unmatched northbound record. */
2135 LIST_FOR_EACH_SAFE (op
, next
, list
, &nb_only
) {
2136 uint16_t tunnel_key
= ovn_port_allocate_key(op
->od
);
2141 op
->sb
= sbrec_port_binding_insert(ctx
->ovnsb_txn
);
2142 ovn_port_update_sbrec(ctx
, sbrec_chassis_by_name
, op
,
2143 &chassis_qdisc_queues
);
2145 sbrec_port_binding_set_logical_port(op
->sb
, op
->key
);
2146 sbrec_port_binding_set_tunnel_key(op
->sb
, tunnel_key
);
2149 bool remove_mac_bindings
= false;
2150 if (!ovs_list_is_empty(&sb_only
)) {
2151 remove_mac_bindings
= true;
2154 /* Delete southbound records without northbound matches. */
2155 LIST_FOR_EACH_SAFE(op
, next
, list
, &sb_only
) {
2156 ovs_list_remove(&op
->list
);
2157 sbrec_port_binding_delete(op
->sb
);
2158 ovn_port_destroy(ports
, op
);
2160 if (remove_mac_bindings
) {
2161 cleanup_mac_bindings(ctx
, ports
);
2164 tag_alloc_destroy(&tag_alloc_table
);
2165 destroy_chassis_queues(&chassis_qdisc_queues
);
2168 #define OVN_MIN_MULTICAST 32768
2169 #define OVN_MAX_MULTICAST 65535
2171 struct multicast_group
{
2173 uint16_t key
; /* OVN_MIN_MULTICAST...OVN_MAX_MULTICAST. */
2176 #define MC_FLOOD "_MC_flood"
2177 static const struct multicast_group mc_flood
= { MC_FLOOD
, 65535 };
2179 #define MC_UNKNOWN "_MC_unknown"
2180 static const struct multicast_group mc_unknown
= { MC_UNKNOWN
, 65534 };
2183 multicast_group_equal(const struct multicast_group
*a
,
2184 const struct multicast_group
*b
)
2186 return !strcmp(a
->name
, b
->name
) && a
->key
== b
->key
;
2189 /* Multicast group entry. */
2190 struct ovn_multicast
{
2191 struct hmap_node hmap_node
; /* Index on 'datapath' and 'key'. */
2192 struct ovn_datapath
*datapath
;
2193 const struct multicast_group
*group
;
2195 struct ovn_port
**ports
;
2196 size_t n_ports
, allocated_ports
;
2200 ovn_multicast_hash(const struct ovn_datapath
*datapath
,
2201 const struct multicast_group
*group
)
2203 return hash_pointer(datapath
, group
->key
);
2206 static struct ovn_multicast
*
2207 ovn_multicast_find(struct hmap
*mcgroups
, struct ovn_datapath
*datapath
,
2208 const struct multicast_group
*group
)
2210 struct ovn_multicast
*mc
;
2212 HMAP_FOR_EACH_WITH_HASH (mc
, hmap_node
,
2213 ovn_multicast_hash(datapath
, group
), mcgroups
) {
2214 if (mc
->datapath
== datapath
2215 && multicast_group_equal(mc
->group
, group
)) {
2223 ovn_multicast_add(struct hmap
*mcgroups
, const struct multicast_group
*group
,
2224 struct ovn_port
*port
)
2226 struct ovn_datapath
*od
= port
->od
;
2227 struct ovn_multicast
*mc
= ovn_multicast_find(mcgroups
, od
, group
);
2229 mc
= xmalloc(sizeof *mc
);
2230 hmap_insert(mcgroups
, &mc
->hmap_node
, ovn_multicast_hash(od
, group
));
2234 mc
->allocated_ports
= 4;
2235 mc
->ports
= xmalloc(mc
->allocated_ports
* sizeof *mc
->ports
);
2237 if (mc
->n_ports
>= mc
->allocated_ports
) {
2238 mc
->ports
= x2nrealloc(mc
->ports
, &mc
->allocated_ports
,
2241 mc
->ports
[mc
->n_ports
++] = port
;
2245 ovn_multicast_destroy(struct hmap
*mcgroups
, struct ovn_multicast
*mc
)
2248 hmap_remove(mcgroups
, &mc
->hmap_node
);
2255 ovn_multicast_update_sbrec(const struct ovn_multicast
*mc
,
2256 const struct sbrec_multicast_group
*sb
)
2258 struct sbrec_port_binding
**ports
= xmalloc(mc
->n_ports
* sizeof *ports
);
2259 for (size_t i
= 0; i
< mc
->n_ports
; i
++) {
2260 ports
[i
] = CONST_CAST(struct sbrec_port_binding
*, mc
->ports
[i
]->sb
);
2262 sbrec_multicast_group_set_ports(sb
, ports
, mc
->n_ports
);
2266 /* Logical flow generation.
2268 * This code generates the Logical_Flow table in the southbound database, as a
2269 * function of most of the northbound database.
2273 struct hmap_node hmap_node
;
2275 struct ovn_datapath
*od
;
2276 enum ovn_stage stage
;
2285 ovn_lflow_hash(const struct ovn_lflow
*lflow
)
2287 return ovn_logical_flow_hash(&lflow
->od
->sb
->header_
.uuid
,
2288 ovn_stage_get_table(lflow
->stage
),
2289 ovn_stage_get_pipeline_name(lflow
->stage
),
2290 lflow
->priority
, lflow
->match
,
2295 ovn_lflow_equal(const struct ovn_lflow
*a
, const struct ovn_lflow
*b
)
2297 return (a
->od
== b
->od
2298 && a
->stage
== b
->stage
2299 && a
->priority
== b
->priority
2300 && !strcmp(a
->match
, b
->match
)
2301 && !strcmp(a
->actions
, b
->actions
));
2305 ovn_lflow_init(struct ovn_lflow
*lflow
, struct ovn_datapath
*od
,
2306 enum ovn_stage stage
, uint16_t priority
,
2307 char *match
, char *actions
, char *stage_hint
,
2311 lflow
->stage
= stage
;
2312 lflow
->priority
= priority
;
2313 lflow
->match
= match
;
2314 lflow
->actions
= actions
;
2315 lflow
->stage_hint
= stage_hint
;
2316 lflow
->where
= where
;
2319 /* Adds a row with the specified contents to the Logical_Flow table. */
2321 ovn_lflow_add_at(struct hmap
*lflow_map
, struct ovn_datapath
*od
,
2322 enum ovn_stage stage
, uint16_t priority
,
2323 const char *match
, const char *actions
,
2324 const char *stage_hint
, const char *where
)
2326 ovs_assert(ovn_stage_to_datapath_type(stage
) == ovn_datapath_get_type(od
));
2328 struct ovn_lflow
*lflow
= xmalloc(sizeof *lflow
);
2329 ovn_lflow_init(lflow
, od
, stage
, priority
,
2330 xstrdup(match
), xstrdup(actions
),
2331 nullable_xstrdup(stage_hint
), where
);
2332 hmap_insert(lflow_map
, &lflow
->hmap_node
, ovn_lflow_hash(lflow
));
2335 /* Adds a row with the specified contents to the Logical_Flow table. */
2336 #define ovn_lflow_add_with_hint(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, \
2337 ACTIONS, STAGE_HINT) \
2338 ovn_lflow_add_at(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, ACTIONS, \
2339 STAGE_HINT, OVS_SOURCE_LOCATOR)
2341 #define ovn_lflow_add(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, ACTIONS) \
2342 ovn_lflow_add_with_hint(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, \
2345 static struct ovn_lflow
*
2346 ovn_lflow_find(struct hmap
*lflows
, struct ovn_datapath
*od
,
2347 enum ovn_stage stage
, uint16_t priority
,
2348 const char *match
, const char *actions
, uint32_t hash
)
2350 struct ovn_lflow target
;
2351 ovn_lflow_init(&target
, od
, stage
, priority
,
2352 CONST_CAST(char *, match
), CONST_CAST(char *, actions
),
2355 struct ovn_lflow
*lflow
;
2356 HMAP_FOR_EACH_WITH_HASH (lflow
, hmap_node
, hash
, lflows
) {
2357 if (ovn_lflow_equal(lflow
, &target
)) {
2365 ovn_lflow_destroy(struct hmap
*lflows
, struct ovn_lflow
*lflow
)
2368 hmap_remove(lflows
, &lflow
->hmap_node
);
2370 free(lflow
->actions
);
2371 free(lflow
->stage_hint
);
2376 /* Appends port security constraints on L2 address field 'eth_addr_field'
2377 * (e.g. "eth.src" or "eth.dst") to 'match'. 'ps_addrs', with 'n_ps_addrs'
2378 * elements, is the collection of port_security constraints from an
2379 * OVN_NB Logical_Switch_Port row generated by extract_lsp_addresses(). */
2381 build_port_security_l2(const char *eth_addr_field
,
2382 struct lport_addresses
*ps_addrs
,
2383 unsigned int n_ps_addrs
,
2390 ds_put_format(match
, " && %s == {", eth_addr_field
);
2392 for (size_t i
= 0; i
< n_ps_addrs
; i
++) {
2393 ds_put_format(match
, "%s ", ps_addrs
[i
].ea_s
);
2395 ds_chomp(match
, ' ');
2396 ds_put_cstr(match
, "}");
2400 build_port_security_ipv6_nd_flow(
2401 struct ds
*match
, struct eth_addr ea
, struct ipv6_netaddr
*ipv6_addrs
,
2404 ds_put_format(match
, " && ip6 && nd && ((nd.sll == "ETH_ADDR_FMT
" || "
2405 "nd.sll == "ETH_ADDR_FMT
") || ((nd.tll == "ETH_ADDR_FMT
" || "
2406 "nd.tll == "ETH_ADDR_FMT
")", ETH_ADDR_ARGS(eth_addr_zero
),
2407 ETH_ADDR_ARGS(ea
), ETH_ADDR_ARGS(eth_addr_zero
),
2409 if (!n_ipv6_addrs
) {
2410 ds_put_cstr(match
, "))");
2414 char ip6_str
[INET6_ADDRSTRLEN
+ 1];
2415 struct in6_addr lla
;
2416 in6_generate_lla(ea
, &lla
);
2417 memset(ip6_str
, 0, sizeof(ip6_str
));
2418 ipv6_string_mapped(ip6_str
, &lla
);
2419 ds_put_format(match
, " && (nd.target == %s", ip6_str
);
2421 for(int i
= 0; i
< n_ipv6_addrs
; i
++) {
2422 memset(ip6_str
, 0, sizeof(ip6_str
));
2423 ipv6_string_mapped(ip6_str
, &ipv6_addrs
[i
].addr
);
2424 ds_put_format(match
, " || nd.target == %s", ip6_str
);
2427 ds_put_format(match
, ")))");
2431 build_port_security_ipv6_flow(
2432 enum ovn_pipeline pipeline
, struct ds
*match
, struct eth_addr ea
,
2433 struct ipv6_netaddr
*ipv6_addrs
, int n_ipv6_addrs
)
2435 char ip6_str
[INET6_ADDRSTRLEN
+ 1];
2437 ds_put_format(match
, " && %s == {",
2438 pipeline
== P_IN
? "ip6.src" : "ip6.dst");
2440 /* Allow link-local address. */
2441 struct in6_addr lla
;
2442 in6_generate_lla(ea
, &lla
);
2443 ipv6_string_mapped(ip6_str
, &lla
);
2444 ds_put_format(match
, "%s, ", ip6_str
);
2446 /* Allow ip6.dst=ff00::/8 for multicast packets */
2447 if (pipeline
== P_OUT
) {
2448 ds_put_cstr(match
, "ff00::/8, ");
2450 for(int i
= 0; i
< n_ipv6_addrs
; i
++) {
2451 ipv6_string_mapped(ip6_str
, &ipv6_addrs
[i
].addr
);
2452 ds_put_format(match
, "%s, ", ip6_str
);
2454 /* Replace ", " by "}". */
2455 ds_chomp(match
, ' ');
2456 ds_chomp(match
, ',');
2457 ds_put_cstr(match
, "}");
2461 * Build port security constraints on ARP and IPv6 ND fields
2462 * and add logical flows to S_SWITCH_IN_PORT_SEC_ND stage.
2464 * For each port security of the logical port, following
2465 * logical flows are added
2466 * - If the port security has no IP (both IPv4 and IPv6) or
2467 * if it has IPv4 address(es)
2468 * - Priority 90 flow to allow ARP packets for known MAC addresses
2469 * in the eth.src and arp.spa fields. If the port security
2470 * has IPv4 addresses, allow known IPv4 addresses in the arp.tpa field.
2472 * - If the port security has no IP (both IPv4 and IPv6) or
2473 * if it has IPv6 address(es)
2474 * - Priority 90 flow to allow IPv6 ND packets for known MAC addresses
2475 * in the eth.src and nd.sll/nd.tll fields. If the port security
2476 * has IPv6 addresses, allow known IPv6 addresses in the nd.target field
2477 * for IPv6 Neighbor Advertisement packet.
2479 * - Priority 80 flow to drop ARP and IPv6 ND packets.
2482 build_port_security_nd(struct ovn_port
*op
, struct hmap
*lflows
)
2484 struct ds match
= DS_EMPTY_INITIALIZER
;
2486 for (size_t i
= 0; i
< op
->n_ps_addrs
; i
++) {
2487 struct lport_addresses
*ps
= &op
->ps_addrs
[i
];
2489 bool no_ip
= !(ps
->n_ipv4_addrs
|| ps
->n_ipv6_addrs
);
2492 if (ps
->n_ipv4_addrs
|| no_ip
) {
2493 ds_put_format(&match
,
2494 "inport == %s && eth.src == %s && arp.sha == %s",
2495 op
->json_key
, ps
->ea_s
, ps
->ea_s
);
2497 if (ps
->n_ipv4_addrs
) {
2498 ds_put_cstr(&match
, " && arp.spa == {");
2499 for (size_t j
= 0; j
< ps
->n_ipv4_addrs
; j
++) {
2500 /* When the netmask is applied, if the host portion is
2501 * non-zero, the host can only use the specified
2502 * address in the arp.spa. If zero, the host is allowed
2503 * to use any address in the subnet. */
2504 if (ps
->ipv4_addrs
[j
].plen
== 32
2505 || ps
->ipv4_addrs
[j
].addr
& ~ps
->ipv4_addrs
[j
].mask
) {
2506 ds_put_cstr(&match
, ps
->ipv4_addrs
[j
].addr_s
);
2508 ds_put_format(&match
, "%s/%d",
2509 ps
->ipv4_addrs
[j
].network_s
,
2510 ps
->ipv4_addrs
[j
].plen
);
2512 ds_put_cstr(&match
, ", ");
2514 ds_chomp(&match
, ' ');
2515 ds_chomp(&match
, ',');
2516 ds_put_cstr(&match
, "}");
2518 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_PORT_SEC_ND
, 90,
2519 ds_cstr(&match
), "next;");
2522 if (ps
->n_ipv6_addrs
|| no_ip
) {
2524 ds_put_format(&match
, "inport == %s && eth.src == %s",
2525 op
->json_key
, ps
->ea_s
);
2526 build_port_security_ipv6_nd_flow(&match
, ps
->ea
, ps
->ipv6_addrs
,
2528 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_PORT_SEC_ND
, 90,
2529 ds_cstr(&match
), "next;");
2534 ds_put_format(&match
, "inport == %s && (arp || nd)", op
->json_key
);
2535 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_PORT_SEC_ND
, 80,
2536 ds_cstr(&match
), "drop;");
2541 * Build port security constraints on IPv4 and IPv6 src and dst fields
2542 * and add logical flows to S_SWITCH_(IN/OUT)_PORT_SEC_IP stage.
2544 * For each port security of the logical port, following
2545 * logical flows are added
2546 * - If the port security has IPv4 addresses,
2547 * - Priority 90 flow to allow IPv4 packets for known IPv4 addresses
2549 * - If the port security has IPv6 addresses,
2550 * - Priority 90 flow to allow IPv6 packets for known IPv6 addresses
2552 * - If the port security has IPv4 addresses or IPv6 addresses or both
2553 * - Priority 80 flow to drop all IPv4 and IPv6 traffic
2556 build_port_security_ip(enum ovn_pipeline pipeline
, struct ovn_port
*op
,
2557 struct hmap
*lflows
)
2559 char *port_direction
;
2560 enum ovn_stage stage
;
2561 if (pipeline
== P_IN
) {
2562 port_direction
= "inport";
2563 stage
= S_SWITCH_IN_PORT_SEC_IP
;
2565 port_direction
= "outport";
2566 stage
= S_SWITCH_OUT_PORT_SEC_IP
;
2569 for (size_t i
= 0; i
< op
->n_ps_addrs
; i
++) {
2570 struct lport_addresses
*ps
= &op
->ps_addrs
[i
];
2572 if (!(ps
->n_ipv4_addrs
|| ps
->n_ipv6_addrs
)) {
2576 if (ps
->n_ipv4_addrs
) {
2577 struct ds match
= DS_EMPTY_INITIALIZER
;
2578 if (pipeline
== P_IN
) {
2579 /* Permit use of the unspecified address for DHCP discovery */
2580 struct ds dhcp_match
= DS_EMPTY_INITIALIZER
;
2581 ds_put_format(&dhcp_match
, "inport == %s"
2583 " && ip4.src == 0.0.0.0"
2584 " && ip4.dst == 255.255.255.255"
2585 " && udp.src == 68 && udp.dst == 67",
2586 op
->json_key
, ps
->ea_s
);
2587 ovn_lflow_add(lflows
, op
->od
, stage
, 90,
2588 ds_cstr(&dhcp_match
), "next;");
2589 ds_destroy(&dhcp_match
);
2590 ds_put_format(&match
, "inport == %s && eth.src == %s"
2591 " && ip4.src == {", op
->json_key
,
2594 ds_put_format(&match
, "outport == %s && eth.dst == %s"
2595 " && ip4.dst == {255.255.255.255, 224.0.0.0/4, ",
2596 op
->json_key
, ps
->ea_s
);
2599 for (int j
= 0; j
< ps
->n_ipv4_addrs
; j
++) {
2600 ovs_be32 mask
= ps
->ipv4_addrs
[j
].mask
;
2601 /* When the netmask is applied, if the host portion is
2602 * non-zero, the host can only use the specified
2603 * address. If zero, the host is allowed to use any
2604 * address in the subnet.
2606 if (ps
->ipv4_addrs
[j
].plen
== 32
2607 || ps
->ipv4_addrs
[j
].addr
& ~mask
) {
2608 ds_put_format(&match
, "%s", ps
->ipv4_addrs
[j
].addr_s
);
2609 if (pipeline
== P_OUT
&& ps
->ipv4_addrs
[j
].plen
!= 32) {
2610 /* Host is also allowed to receive packets to the
2611 * broadcast address in the specified subnet. */
2612 ds_put_format(&match
, ", %s",
2613 ps
->ipv4_addrs
[j
].bcast_s
);
2616 /* host portion is zero */
2617 ds_put_format(&match
, "%s/%d", ps
->ipv4_addrs
[j
].network_s
,
2618 ps
->ipv4_addrs
[j
].plen
);
2620 ds_put_cstr(&match
, ", ");
2623 /* Replace ", " by "}". */
2624 ds_chomp(&match
, ' ');
2625 ds_chomp(&match
, ',');
2626 ds_put_cstr(&match
, "}");
2627 ovn_lflow_add(lflows
, op
->od
, stage
, 90, ds_cstr(&match
), "next;");
2631 if (ps
->n_ipv6_addrs
) {
2632 struct ds match
= DS_EMPTY_INITIALIZER
;
2633 if (pipeline
== P_IN
) {
2634 /* Permit use of unspecified address for duplicate address
2636 struct ds dad_match
= DS_EMPTY_INITIALIZER
;
2637 ds_put_format(&dad_match
, "inport == %s"
2640 " && ip6.dst == ff02::/16"
2641 " && icmp6.type == {131, 135, 143}", op
->json_key
,
2643 ovn_lflow_add(lflows
, op
->od
, stage
, 90,
2644 ds_cstr(&dad_match
), "next;");
2645 ds_destroy(&dad_match
);
2647 ds_put_format(&match
, "%s == %s && %s == %s",
2648 port_direction
, op
->json_key
,
2649 pipeline
== P_IN
? "eth.src" : "eth.dst", ps
->ea_s
);
2650 build_port_security_ipv6_flow(pipeline
, &match
, ps
->ea
,
2651 ps
->ipv6_addrs
, ps
->n_ipv6_addrs
);
2652 ovn_lflow_add(lflows
, op
->od
, stage
, 90,
2653 ds_cstr(&match
), "next;");
2657 char *match
= xasprintf("%s == %s && %s == %s && ip",
2658 port_direction
, op
->json_key
,
2659 pipeline
== P_IN
? "eth.src" : "eth.dst",
2661 ovn_lflow_add(lflows
, op
->od
, stage
, 80, match
, "drop;");
2668 lsp_is_enabled(const struct nbrec_logical_switch_port
*lsp
)
2670 return !lsp
->enabled
|| *lsp
->enabled
;
2674 lsp_is_up(const struct nbrec_logical_switch_port
*lsp
)
2676 return !lsp
->up
|| *lsp
->up
;
2680 build_dhcpv4_action(struct ovn_port
*op
, ovs_be32 offer_ip
,
2681 struct ds
*options_action
, struct ds
*response_action
,
2682 struct ds
*ipv4_addr_match
)
2684 if (!op
->nbsp
->dhcpv4_options
) {
2685 /* CMS has disabled native DHCPv4 for this lport. */
2689 ovs_be32 host_ip
, mask
;
2690 char *error
= ip_parse_masked(op
->nbsp
->dhcpv4_options
->cidr
, &host_ip
,
2692 if (error
|| ((offer_ip
^ host_ip
) & mask
)) {
2694 * - cidr defined is invalid or
2695 * - the offer ip of the logical port doesn't belong to the cidr
2696 * defined in the DHCPv4 options.
2702 const char *server_ip
= smap_get(
2703 &op
->nbsp
->dhcpv4_options
->options
, "server_id");
2704 const char *server_mac
= smap_get(
2705 &op
->nbsp
->dhcpv4_options
->options
, "server_mac");
2706 const char *lease_time
= smap_get(
2707 &op
->nbsp
->dhcpv4_options
->options
, "lease_time");
2709 if (!(server_ip
&& server_mac
&& lease_time
)) {
2710 /* "server_id", "server_mac" and "lease_time" should be
2711 * present in the dhcp_options. */
2712 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
2713 VLOG_WARN_RL(&rl
, "Required DHCPv4 options not defined for lport - %s",
2718 struct smap dhcpv4_options
= SMAP_INITIALIZER(&dhcpv4_options
);
2719 smap_clone(&dhcpv4_options
, &op
->nbsp
->dhcpv4_options
->options
);
2721 /* server_mac is not DHCPv4 option, delete it from the smap. */
2722 smap_remove(&dhcpv4_options
, "server_mac");
2723 char *netmask
= xasprintf(IP_FMT
, IP_ARGS(mask
));
2724 smap_add(&dhcpv4_options
, "netmask", netmask
);
2727 ds_put_format(options_action
,
2728 REGBIT_DHCP_OPTS_RESULT
" = put_dhcp_opts(offerip = "
2729 IP_FMT
", ", IP_ARGS(offer_ip
));
2731 /* We're not using SMAP_FOR_EACH because we want a consistent order of the
2732 * options on different architectures (big or little endian, SSE4.2) */
2733 const struct smap_node
**sorted_opts
= smap_sort(&dhcpv4_options
);
2734 for (size_t i
= 0; i
< smap_count(&dhcpv4_options
); i
++) {
2735 const struct smap_node
*node
= sorted_opts
[i
];
2736 ds_put_format(options_action
, "%s = %s, ", node
->key
, node
->value
);
2740 ds_chomp(options_action
, ' ');
2741 ds_chomp(options_action
, ',');
2742 ds_put_cstr(options_action
, "); next;");
2744 ds_put_format(response_action
, "eth.dst = eth.src; eth.src = %s; "
2745 "ip4.dst = "IP_FMT
"; ip4.src = %s; udp.src = 67; "
2746 "udp.dst = 68; outport = inport; flags.loopback = 1; "
2748 server_mac
, IP_ARGS(offer_ip
), server_ip
);
2750 ds_put_format(ipv4_addr_match
,
2751 "ip4.src == "IP_FMT
" && ip4.dst == {%s, 255.255.255.255}",
2752 IP_ARGS(offer_ip
), server_ip
);
2753 smap_destroy(&dhcpv4_options
);
2758 build_dhcpv6_action(struct ovn_port
*op
, struct in6_addr
*offer_ip
,
2759 struct ds
*options_action
, struct ds
*response_action
)
2761 if (!op
->nbsp
->dhcpv6_options
) {
2762 /* CMS has disabled native DHCPv6 for this lport. */
2766 struct in6_addr host_ip
, mask
;
2768 char *error
= ipv6_parse_masked(op
->nbsp
->dhcpv6_options
->cidr
, &host_ip
,
2774 struct in6_addr ip6_mask
= ipv6_addr_bitxor(offer_ip
, &host_ip
);
2775 ip6_mask
= ipv6_addr_bitand(&ip6_mask
, &mask
);
2776 if (!ipv6_mask_is_any(&ip6_mask
)) {
2777 /* offer_ip doesn't belongs to the cidr defined in lport's DHCPv6
2782 const struct smap
*options_map
= &op
->nbsp
->dhcpv6_options
->options
;
2783 /* "server_id" should be the MAC address. */
2784 const char *server_mac
= smap_get(options_map
, "server_id");
2786 if (!server_mac
|| !eth_addr_from_string(server_mac
, &ea
)) {
2787 /* "server_id" should be present in the dhcpv6_options. */
2788 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
2789 VLOG_WARN_RL(&rl
, "server_id not present in the DHCPv6 options"
2790 " for lport %s", op
->json_key
);
2794 /* Get the link local IP of the DHCPv6 server from the server MAC. */
2795 struct in6_addr lla
;
2796 in6_generate_lla(ea
, &lla
);
2798 char server_ip
[INET6_ADDRSTRLEN
+ 1];
2799 ipv6_string_mapped(server_ip
, &lla
);
2801 char ia_addr
[INET6_ADDRSTRLEN
+ 1];
2802 ipv6_string_mapped(ia_addr
, offer_ip
);
2804 ds_put_format(options_action
,
2805 REGBIT_DHCP_OPTS_RESULT
" = put_dhcpv6_opts(");
2807 /* Check whether the dhcpv6 options should be configured as stateful.
2808 * Only reply with ia_addr option for dhcpv6 stateful address mode. */
2809 if (!smap_get_bool(options_map
, "dhcpv6_stateless", false)) {
2810 ipv6_string_mapped(ia_addr
, offer_ip
);
2811 ds_put_format(options_action
, "ia_addr = %s, ", ia_addr
);
2814 /* We're not using SMAP_FOR_EACH because we want a consistent order of the
2815 * options on different architectures (big or little endian, SSE4.2) */
2816 const struct smap_node
**sorted_opts
= smap_sort(options_map
);
2817 for (size_t i
= 0; i
< smap_count(options_map
); i
++) {
2818 const struct smap_node
*node
= sorted_opts
[i
];
2819 if (strcmp(node
->key
, "dhcpv6_stateless")) {
2820 ds_put_format(options_action
, "%s = %s, ", node
->key
, node
->value
);
2825 ds_chomp(options_action
, ' ');
2826 ds_chomp(options_action
, ',');
2827 ds_put_cstr(options_action
, "); next;");
2829 ds_put_format(response_action
, "eth.dst = eth.src; eth.src = %s; "
2830 "ip6.dst = ip6.src; ip6.src = %s; udp.src = 547; "
2831 "udp.dst = 546; outport = inport; flags.loopback = 1; "
2833 server_mac
, server_ip
);
2839 has_stateful_acl(struct ovn_datapath
*od
)
2841 for (size_t i
= 0; i
< od
->nbs
->n_acls
; i
++) {
2842 struct nbrec_acl
*acl
= od
->nbs
->acls
[i
];
2843 if (!strcmp(acl
->action
, "allow-related")) {
2852 build_pre_acls(struct ovn_datapath
*od
, struct hmap
*lflows
)
2854 bool has_stateful
= has_stateful_acl(od
);
2856 /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are
2857 * allowed by default. */
2858 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_PRE_ACL
, 0, "1", "next;");
2859 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_PRE_ACL
, 0, "1", "next;");
2861 /* If there are any stateful ACL rules in this datapath, we must
2862 * send all IP packets through the conntrack action, which handles
2863 * defragmentation, in order to match L4 headers. */
2865 for (size_t i
= 0; i
< od
->n_router_ports
; i
++) {
2866 struct ovn_port
*op
= od
->router_ports
[i
];
2867 /* Can't use ct() for router ports. Consider the
2868 * following configuration: lp1(10.0.0.2) on
2869 * hostA--ls1--lr0--ls2--lp2(10.0.1.2) on hostB, For a
2870 * ping from lp1 to lp2, First, the response will go
2871 * through ct() with a zone for lp2 in the ls2 ingress
2872 * pipeline on hostB. That ct zone knows about this
2873 * connection. Next, it goes through ct() with the zone
2874 * for the router port in the egress pipeline of ls2 on
2875 * hostB. This zone does not know about the connection,
2876 * as the icmp request went through the logical router
2877 * on hostA, not hostB. This would only work with
2878 * distributed conntrack state across all chassis. */
2879 struct ds match_in
= DS_EMPTY_INITIALIZER
;
2880 struct ds match_out
= DS_EMPTY_INITIALIZER
;
2882 ds_put_format(&match_in
, "ip && inport == %s", op
->json_key
);
2883 ds_put_format(&match_out
, "ip && outport == %s", op
->json_key
);
2884 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_PRE_ACL
, 110,
2885 ds_cstr(&match_in
), "next;");
2886 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_PRE_ACL
, 110,
2887 ds_cstr(&match_out
), "next;");
2889 ds_destroy(&match_in
);
2890 ds_destroy(&match_out
);
2892 if (od
->localnet_port
) {
2893 struct ds match_in
= DS_EMPTY_INITIALIZER
;
2894 struct ds match_out
= DS_EMPTY_INITIALIZER
;
2896 ds_put_format(&match_in
, "ip && inport == %s",
2897 od
->localnet_port
->json_key
);
2898 ds_put_format(&match_out
, "ip && outport == %s",
2899 od
->localnet_port
->json_key
);
2900 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_PRE_ACL
, 110,
2901 ds_cstr(&match_in
), "next;");
2902 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_PRE_ACL
, 110,
2903 ds_cstr(&match_out
), "next;");
2905 ds_destroy(&match_in
);
2906 ds_destroy(&match_out
);
2909 /* Ingress and Egress Pre-ACL Table (Priority 110).
2911 * Not to do conntrack on ND and ICMP destination
2912 * unreachable packets. */
2913 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_PRE_ACL
, 110,
2914 "nd || nd_rs || nd_ra || icmp4.type == 3 || "
2915 "icmp6.type == 1 || (tcp && tcp.flags == 4)",
2917 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_PRE_ACL
, 110,
2918 "nd || nd_rs || nd_ra || icmp4.type == 3 || "
2919 "icmp6.type == 1 || (tcp && tcp.flags == 4)",
2922 /* Ingress and Egress Pre-ACL Table (Priority 100).
2924 * Regardless of whether the ACL is "from-lport" or "to-lport",
2925 * we need rules in both the ingress and egress table, because
2926 * the return traffic needs to be followed.
2928 * 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send
2929 * it to conntrack for tracking and defragmentation. */
2930 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_PRE_ACL
, 100, "ip",
2931 REGBIT_CONNTRACK_DEFRAG
" = 1; next;");
2932 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_PRE_ACL
, 100, "ip",
2933 REGBIT_CONNTRACK_DEFRAG
" = 1; next;");
2937 /* For a 'key' of the form "IP:port" or just "IP", sets 'port' and
2938 * 'ip_address'. The caller must free() the memory allocated for
2941 ip_address_and_port_from_lb_key(const char *key
, char **ip_address
,
2942 uint16_t *port
, int *addr_family
)
2944 struct sockaddr_storage ss
;
2945 if (!inet_parse_active(key
, 0, &ss
)) {
2946 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
2947 VLOG_WARN_RL(&rl
, "bad ip address or port for load balancer key %s",
2952 struct ds s
= DS_EMPTY_INITIALIZER
;
2953 ss_format_address_nobracks(&ss
, &s
);
2954 *ip_address
= ds_steal_cstr(&s
);
2956 *port
= ss_get_port(&ss
);
2958 *addr_family
= ss
.ss_family
;
2962 * Returns true if logical switch is configured with DNS records, false
2966 ls_has_dns_records(const struct nbrec_logical_switch
*nbs
)
2968 for (size_t i
= 0; i
< nbs
->n_dns_records
; i
++) {
2969 if (!smap_is_empty(&nbs
->dns_records
[i
]->records
)) {
2978 build_pre_lb(struct ovn_datapath
*od
, struct hmap
*lflows
)
2980 /* Do not send ND packets to conntrack */
2981 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_PRE_LB
, 110,
2982 "nd || nd_rs || nd_ra", "next;");
2983 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_PRE_LB
, 110,
2984 "nd || nd_rs || nd_ra", "next;");
2986 /* Allow all packets to go to next tables by default. */
2987 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_PRE_LB
, 0, "1", "next;");
2988 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_PRE_LB
, 0, "1", "next;");
2990 struct sset all_ips
= SSET_INITIALIZER(&all_ips
);
2991 bool vip_configured
= false;
2992 int addr_family
= AF_INET
;
2993 for (int i
= 0; i
< od
->nbs
->n_load_balancer
; i
++) {
2994 struct nbrec_load_balancer
*lb
= od
->nbs
->load_balancer
[i
];
2995 struct smap
*vips
= &lb
->vips
;
2996 struct smap_node
*node
;
2998 SMAP_FOR_EACH (node
, vips
) {
2999 vip_configured
= true;
3001 /* node->key contains IP:port or just IP. */
3002 char *ip_address
= NULL
;
3004 ip_address_and_port_from_lb_key(node
->key
, &ip_address
, &port
,
3010 if (!sset_contains(&all_ips
, ip_address
)) {
3011 sset_add(&all_ips
, ip_address
);
3016 /* Ignore L4 port information in the key because fragmented packets
3017 * may not have L4 information. The pre-stateful table will send
3018 * the packet through ct() action to de-fragment. In stateful
3019 * table, we will eventually look at L4 information. */
3023 /* 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send
3024 * packet to conntrack for defragmentation. */
3025 const char *ip_address
;
3026 SSET_FOR_EACH(ip_address
, &all_ips
) {
3029 if (addr_family
== AF_INET
) {
3030 match
= xasprintf("ip && ip4.dst == %s", ip_address
);
3032 match
= xasprintf("ip && ip6.dst == %s", ip_address
);
3034 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_PRE_LB
,
3035 100, match
, REGBIT_CONNTRACK_DEFRAG
" = 1; next;");
3039 sset_destroy(&all_ips
);
3041 if (vip_configured
) {
3042 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_PRE_LB
,
3043 100, "ip", REGBIT_CONNTRACK_DEFRAG
" = 1; next;");
3048 build_pre_stateful(struct ovn_datapath
*od
, struct hmap
*lflows
)
3050 /* Ingress and Egress pre-stateful Table (Priority 0): Packets are
3051 * allowed by default. */
3052 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_PRE_STATEFUL
, 0, "1", "next;");
3053 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_PRE_STATEFUL
, 0, "1", "next;");
3055 /* If REGBIT_CONNTRACK_DEFRAG is set as 1, then the packets should be
3056 * sent to conntrack for tracking and defragmentation. */
3057 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_PRE_STATEFUL
, 100,
3058 REGBIT_CONNTRACK_DEFRAG
" == 1", "ct_next;");
3059 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_PRE_STATEFUL
, 100,
3060 REGBIT_CONNTRACK_DEFRAG
" == 1", "ct_next;");
3064 build_acl_log(struct ds
*actions
, const struct nbrec_acl
*acl
)
3070 ds_put_cstr(actions
, "log(");
3073 ds_put_format(actions
, "name=\"%s\", ", acl
->name
);
3076 /* If a severity level isn't specified, default to "info". */
3077 if (acl
->severity
) {
3078 ds_put_format(actions
, "severity=%s, ", acl
->severity
);
3080 ds_put_format(actions
, "severity=info, ");
3083 if (!strcmp(acl
->action
, "drop")) {
3084 ds_put_cstr(actions
, "verdict=drop, ");
3085 } else if (!strcmp(acl
->action
, "reject")) {
3086 ds_put_cstr(actions
, "verdict=reject, ");
3087 } else if (!strcmp(acl
->action
, "allow")
3088 || !strcmp(acl
->action
, "allow-related")) {
3089 ds_put_cstr(actions
, "verdict=allow, ");
3092 ds_chomp(actions
, ' ');
3093 ds_chomp(actions
, ',');
3094 ds_put_cstr(actions
, "); ");
3098 build_reject_acl_rules(struct ovn_datapath
*od
, struct hmap
*lflows
,
3099 enum ovn_stage stage
, struct nbrec_acl
*acl
,
3100 struct ds
*extra_match
, struct ds
*extra_actions
)
3102 struct ds match
= DS_EMPTY_INITIALIZER
;
3103 struct ds actions
= DS_EMPTY_INITIALIZER
;
3104 bool ingress
= (stage
== S_SWITCH_IN_ACL
);
3107 build_acl_log(&actions
, acl
);
3108 if (extra_match
->length
> 0) {
3109 ds_put_format(&match
, "(%s) && ", extra_match
->string
);
3111 ds_put_format(&match
, "ip4 && tcp && (%s)", acl
->match
);
3112 ds_put_format(&actions
, "reg0 = 0; "
3113 "eth.dst <-> eth.src; ip4.dst <-> ip4.src; "
3114 "tcp_reset { outport <-> inport; %s };",
3115 ingress
? "output;" : "next(pipeline=ingress,table=0);");
3116 ovn_lflow_add(lflows
, od
, stage
, acl
->priority
+ OVN_ACL_PRI_OFFSET
+ 10,
3117 ds_cstr(&match
), ds_cstr(&actions
));
3120 build_acl_log(&actions
, acl
);
3121 if (extra_match
->length
> 0) {
3122 ds_put_format(&match
, "(%s) && ", extra_match
->string
);
3124 ds_put_format(&match
, "ip6 && tcp && (%s)", acl
->match
);
3125 ds_put_format(&actions
, "reg0 = 0; "
3126 "eth.dst <-> eth.src; ip6.dst <-> ip6.src; "
3127 "tcp_reset { outport <-> inport; %s };",
3128 ingress
? "output;" : "next(pipeline=ingress,table=0);");
3129 ovn_lflow_add(lflows
, od
, stage
, acl
->priority
+ OVN_ACL_PRI_OFFSET
+ 10,
3130 ds_cstr(&match
), ds_cstr(&actions
));
3135 build_acl_log(&actions
, acl
);
3136 if (extra_match
->length
> 0) {
3137 ds_put_format(&match
, "(%s) && ", extra_match
->string
);
3139 ds_put_format(&match
, "ip4 && (%s)", acl
->match
);
3140 if (extra_actions
->length
> 0) {
3141 ds_put_format(&actions
, "%s ", extra_actions
->string
);
3143 ds_put_format(&actions
, "reg0 = 0; "
3144 "eth.dst <-> eth.src; ip4.dst <-> ip4.src; "
3145 "icmp4 { outport <-> inport; %s };",
3146 ingress
? "output;" : "next(pipeline=ingress,table=0);");
3147 ovn_lflow_add(lflows
, od
, stage
, acl
->priority
+ OVN_ACL_PRI_OFFSET
,
3148 ds_cstr(&match
), ds_cstr(&actions
));
3151 build_acl_log(&actions
, acl
);
3152 if (extra_match
->length
> 0) {
3153 ds_put_format(&match
, "(%s) && ", extra_match
->string
);
3155 ds_put_format(&match
, "ip6 && (%s)", acl
->match
);
3156 if (extra_actions
->length
> 0) {
3157 ds_put_format(&actions
, "%s ", extra_actions
->string
);
3159 ds_put_format(&actions
, "reg0 = 0; icmp6 { "
3160 "eth.dst <-> eth.src; ip6.dst <-> ip6.src; "
3161 "outport <-> inport; %s };",
3162 ingress
? "output;" : "next(pipeline=ingress,table=0);");
3163 ovn_lflow_add(lflows
, od
, stage
, acl
->priority
+ OVN_ACL_PRI_OFFSET
,
3164 ds_cstr(&match
), ds_cstr(&actions
));
3167 ds_destroy(&actions
);
3171 consider_acl(struct hmap
*lflows
, struct ovn_datapath
*od
,
3172 struct nbrec_acl
*acl
, bool has_stateful
)
3174 bool ingress
= !strcmp(acl
->direction
, "from-lport") ? true :false;
3175 enum ovn_stage stage
= ingress
? S_SWITCH_IN_ACL
: S_SWITCH_OUT_ACL
;
3177 char *stage_hint
= xasprintf("%08x", acl
->header_
.uuid
.parts
[0]);
3178 if (!strcmp(acl
->action
, "allow")
3179 || !strcmp(acl
->action
, "allow-related")) {
3180 /* If there are any stateful flows, we must even commit "allow"
3181 * actions. This is because, while the initiater's
3182 * direction may not have any stateful rules, the server's
3183 * may and then its return traffic would not have an
3184 * associated conntrack entry and would return "+invalid". */
3185 if (!has_stateful
) {
3186 struct ds actions
= DS_EMPTY_INITIALIZER
;
3187 build_acl_log(&actions
, acl
);
3188 ds_put_cstr(&actions
, "next;");
3189 ovn_lflow_add_with_hint(lflows
, od
, stage
,
3190 acl
->priority
+ OVN_ACL_PRI_OFFSET
,
3191 acl
->match
, ds_cstr(&actions
),
3193 ds_destroy(&actions
);
3195 struct ds match
= DS_EMPTY_INITIALIZER
;
3196 struct ds actions
= DS_EMPTY_INITIALIZER
;
3198 /* Commit the connection tracking entry if it's a new
3199 * connection that matches this ACL. After this commit,
3200 * the reply traffic is allowed by a flow we create at
3201 * priority 65535, defined earlier.
3203 * It's also possible that a known connection was marked for
3204 * deletion after a policy was deleted, but the policy was
3205 * re-added while that connection is still known. We catch
3206 * that case here and un-set ct_label.blocked (which will be done
3207 * by ct_commit in the "stateful" stage) to indicate that the
3208 * connection should be allowed to resume.
3210 ds_put_format(&match
, "((ct.new && !ct.est)"
3211 " || (!ct.new && ct.est && !ct.rpl "
3212 "&& ct_label.blocked == 1)) "
3213 "&& (%s)", acl
->match
);
3214 ds_put_cstr(&actions
, REGBIT_CONNTRACK_COMMIT
" = 1; ");
3215 build_acl_log(&actions
, acl
);
3216 ds_put_cstr(&actions
, "next;");
3217 ovn_lflow_add_with_hint(lflows
, od
, stage
,
3218 acl
->priority
+ OVN_ACL_PRI_OFFSET
,
3223 /* Match on traffic in the request direction for an established
3224 * connection tracking entry that has not been marked for
3225 * deletion. There is no need to commit here, so we can just
3226 * proceed to the next table. We use this to ensure that this
3227 * connection is still allowed by the currently defined
3231 ds_put_format(&match
,
3232 "!ct.new && ct.est && !ct.rpl"
3233 " && ct_label.blocked == 0 && (%s)",
3236 build_acl_log(&actions
, acl
);
3237 ds_put_cstr(&actions
, "next;");
3238 ovn_lflow_add_with_hint(lflows
, od
, stage
,
3239 acl
->priority
+ OVN_ACL_PRI_OFFSET
,
3240 ds_cstr(&match
), ds_cstr(&actions
),
3244 ds_destroy(&actions
);
3246 } else if (!strcmp(acl
->action
, "drop")
3247 || !strcmp(acl
->action
, "reject")) {
3248 struct ds match
= DS_EMPTY_INITIALIZER
;
3249 struct ds actions
= DS_EMPTY_INITIALIZER
;
3251 /* The implementation of "drop" differs if stateful ACLs are in
3252 * use for this datapath. In that case, the actions differ
3253 * depending on whether the connection was previously committed
3254 * to the connection tracker with ct_commit. */
3256 /* If the packet is not part of an established connection, then
3257 * we can simply reject/drop it. */
3259 "(!ct.est || (ct.est && ct_label.blocked == 1))");
3260 if (!strcmp(acl
->action
, "reject")) {
3261 build_reject_acl_rules(od
, lflows
, stage
, acl
, &match
,
3264 ds_put_format(&match
, " && (%s)", acl
->match
);
3265 build_acl_log(&actions
, acl
);
3266 ds_put_cstr(&actions
, "/* drop */");
3267 ovn_lflow_add(lflows
, od
, stage
,
3268 acl
->priority
+ OVN_ACL_PRI_OFFSET
,
3269 ds_cstr(&match
), ds_cstr(&actions
));
3271 /* For an existing connection without ct_label set, we've
3272 * encountered a policy change. ACLs previously allowed
3273 * this connection and we committed the connection tracking
3274 * entry. Current policy says that we should drop this
3275 * connection. First, we set bit 0 of ct_label to indicate
3276 * that this connection is set for deletion. By not
3277 * specifying "next;", we implicitly drop the packet after
3278 * updating conntrack state. We would normally defer
3279 * ct_commit() to the "stateful" stage, but since we're
3280 * rejecting/dropping the packet, we go ahead and do it here.
3284 ds_put_cstr(&match
, "ct.est && ct_label.blocked == 0");
3285 ds_put_cstr(&actions
, "ct_commit(ct_label=1/1); ");
3286 if (!strcmp(acl
->action
, "reject")) {
3287 build_reject_acl_rules(od
, lflows
, stage
, acl
, &match
,
3290 ds_put_format(&match
, " && (%s)", acl
->match
);
3291 build_acl_log(&actions
, acl
);
3292 ds_put_cstr(&actions
, "/* drop */");
3293 ovn_lflow_add(lflows
, od
, stage
,
3294 acl
->priority
+ OVN_ACL_PRI_OFFSET
,
3295 ds_cstr(&match
), ds_cstr(&actions
));
3298 /* There are no stateful ACLs in use on this datapath,
3299 * so a "reject/drop" ACL is simply the "reject/drop"
3300 * logical flow action in all cases. */
3301 if (!strcmp(acl
->action
, "reject")) {
3302 build_reject_acl_rules(od
, lflows
, stage
, acl
, &match
,
3305 build_acl_log(&actions
, acl
);
3306 ds_put_cstr(&actions
, "/* drop */");
3307 ovn_lflow_add(lflows
, od
, stage
,
3308 acl
->priority
+ OVN_ACL_PRI_OFFSET
,
3309 acl
->match
, ds_cstr(&actions
));
3313 ds_destroy(&actions
);
3318 struct ovn_port_group_ls
{
3319 struct hmap_node key_node
; /* Index on 'key'. */
3320 struct uuid key
; /* nb_ls->header_.uuid. */
3321 const struct nbrec_logical_switch
*nb_ls
;
3324 struct ovn_port_group
{
3325 struct hmap_node key_node
; /* Index on 'key'. */
3326 struct uuid key
; /* nb_pg->header_.uuid. */
3327 const struct nbrec_port_group
*nb_pg
;
3328 struct hmap nb_lswitches
; /* NB lswitches related to the port group */
3329 size_t n_acls
; /* Number of ACLs applied to the port group */
3330 struct nbrec_acl
**acls
; /* ACLs applied to the port group */
3333 static struct ovn_port_group
*
3334 ovn_port_group_create(struct hmap
*pgs
,
3335 const struct nbrec_port_group
*nb_pg
)
3337 struct ovn_port_group
*pg
= xzalloc(sizeof *pg
);
3338 pg
->key
= nb_pg
->header_
.uuid
;
3340 pg
->n_acls
= nb_pg
->n_acls
;
3341 pg
->acls
= nb_pg
->acls
;
3342 hmap_init(&pg
->nb_lswitches
);
3343 hmap_insert(pgs
, &pg
->key_node
, uuid_hash(&pg
->key
));
3348 ovn_port_group_ls_add(struct ovn_port_group
*pg
,
3349 const struct nbrec_logical_switch
*nb_ls
)
3351 struct ovn_port_group_ls
*pg_ls
= xzalloc(sizeof *pg_ls
);
3352 pg_ls
->key
= nb_ls
->header_
.uuid
;
3353 pg_ls
->nb_ls
= nb_ls
;
3354 hmap_insert(&pg
->nb_lswitches
, &pg_ls
->key_node
, uuid_hash(&pg_ls
->key
));
3357 static struct ovn_port_group_ls
*
3358 ovn_port_group_ls_find(struct ovn_port_group
*pg
, const struct uuid
*ls_uuid
)
3360 struct ovn_port_group_ls
*pg_ls
;
3362 HMAP_FOR_EACH_WITH_HASH (pg_ls
, key_node
, uuid_hash(ls_uuid
),
3363 &pg
->nb_lswitches
) {
3364 if (uuid_equals(ls_uuid
, &pg_ls
->key
)) {
3372 ovn_port_group_destroy(struct hmap
*pgs
, struct ovn_port_group
*pg
)
3375 hmap_remove(pgs
, &pg
->key_node
);
3376 struct ovn_port_group_ls
*ls
;
3377 HMAP_FOR_EACH_POP (ls
, key_node
, &pg
->nb_lswitches
) {
3380 hmap_destroy(&pg
->nb_lswitches
);
3386 build_port_group_lswitches(struct northd_context
*ctx
, struct hmap
*pgs
,
3391 const struct nbrec_port_group
*nb_pg
;
3392 NBREC_PORT_GROUP_FOR_EACH (nb_pg
, ctx
->ovnnb_idl
) {
3393 struct ovn_port_group
*pg
= ovn_port_group_create(pgs
, nb_pg
);
3394 for (size_t i
= 0; i
< nb_pg
->n_ports
; i
++) {
3395 struct ovn_port
*op
= ovn_port_find(ports
, nb_pg
->ports
[i
]->name
);
3397 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
3398 VLOG_ERR_RL(&rl
, "lport %s in port group %s not found.",
3399 nb_pg
->ports
[i
]->name
,
3405 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
3406 VLOG_WARN_RL(&rl
, "lport %s in port group %s has no lswitch.",
3407 nb_pg
->ports
[i
]->name
,
3412 struct ovn_port_group_ls
*pg_ls
=
3413 ovn_port_group_ls_find(pg
, &op
->od
->nbs
->header_
.uuid
);
3415 ovn_port_group_ls_add(pg
, op
->od
->nbs
);
3422 build_acls(struct ovn_datapath
*od
, struct hmap
*lflows
,
3423 struct hmap
*port_groups
)
3425 bool has_stateful
= has_stateful_acl(od
);
3427 /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
3428 * default. A related rule at priority 1 is added below if there
3429 * are any stateful ACLs in this datapath. */
3430 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_ACL
, 0, "1", "next;");
3431 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_ACL
, 0, "1", "next;");
3434 /* Ingress and Egress ACL Table (Priority 1).
3436 * By default, traffic is allowed. This is partially handled by
3437 * the Priority 0 ACL flows added earlier, but we also need to
3438 * commit IP flows. This is because, while the initiater's
3439 * direction may not have any stateful rules, the server's may
3440 * and then its return traffic would not have an associated
3441 * conntrack entry and would return "+invalid".
3443 * We use "ct_commit" for a connection that is not already known
3444 * by the connection tracker. Once a connection is committed,
3445 * subsequent packets will hit the flow at priority 0 that just
3448 * We also check for established connections that have ct_label.blocked
3449 * set on them. That's a connection that was disallowed, but is
3450 * now allowed by policy again since it hit this default-allow flow.
3451 * We need to set ct_label.blocked=0 to let the connection continue,
3452 * which will be done by ct_commit() in the "stateful" stage.
3453 * Subsequent packets will hit the flow at priority 0 that just
3455 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_ACL
, 1,
3456 "ip && (!ct.est || (ct.est && ct_label.blocked == 1))",
3457 REGBIT_CONNTRACK_COMMIT
" = 1; next;");
3458 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_ACL
, 1,
3459 "ip && (!ct.est || (ct.est && ct_label.blocked == 1))",
3460 REGBIT_CONNTRACK_COMMIT
" = 1; next;");
3462 /* Ingress and Egress ACL Table (Priority 65535).
3464 * Always drop traffic that's in an invalid state. Also drop
3465 * reply direction packets for connections that have been marked
3466 * for deletion (bit 0 of ct_label is set).
3468 * This is enforced at a higher priority than ACLs can be defined. */
3469 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_ACL
, UINT16_MAX
,
3470 "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
3472 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_ACL
, UINT16_MAX
,
3473 "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
3476 /* Ingress and Egress ACL Table (Priority 65535).
3478 * Allow reply traffic that is part of an established
3479 * conntrack entry that has not been marked for deletion
3480 * (bit 0 of ct_label). We only match traffic in the
3481 * reply direction because we want traffic in the request
3482 * direction to hit the currently defined policy from ACLs.
3484 * This is enforced at a higher priority than ACLs can be defined. */
3485 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_ACL
, UINT16_MAX
,
3486 "ct.est && !ct.rel && !ct.new && !ct.inv "
3487 "&& ct.rpl && ct_label.blocked == 0",
3489 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_ACL
, UINT16_MAX
,
3490 "ct.est && !ct.rel && !ct.new && !ct.inv "
3491 "&& ct.rpl && ct_label.blocked == 0",
3494 /* Ingress and Egress ACL Table (Priority 65535).
3496 * Allow traffic that is related to an existing conntrack entry that
3497 * has not been marked for deletion (bit 0 of ct_label).
3499 * This is enforced at a higher priority than ACLs can be defined.
3501 * NOTE: This does not support related data sessions (eg,
3502 * a dynamically negotiated FTP data channel), but will allow
3503 * related traffic such as an ICMP Port Unreachable through
3504 * that's generated from a non-listening UDP port. */
3505 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_ACL
, UINT16_MAX
,
3506 "!ct.est && ct.rel && !ct.new && !ct.inv "
3507 "&& ct_label.blocked == 0",
3509 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_ACL
, UINT16_MAX
,
3510 "!ct.est && ct.rel && !ct.new && !ct.inv "
3511 "&& ct_label.blocked == 0",
3514 /* Ingress and Egress ACL Table (Priority 65535).
3516 * Not to do conntrack on ND packets. */
3517 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_ACL
, UINT16_MAX
, "nd", "next;");
3518 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_ACL
, UINT16_MAX
, "nd", "next;");
3521 /* Ingress or Egress ACL Table (Various priorities). */
3522 for (size_t i
= 0; i
< od
->nbs
->n_acls
; i
++) {
3523 struct nbrec_acl
*acl
= od
->nbs
->acls
[i
];
3524 consider_acl(lflows
, od
, acl
, has_stateful
);
3526 struct ovn_port_group
*pg
;
3527 HMAP_FOR_EACH (pg
, key_node
, port_groups
) {
3528 if (ovn_port_group_ls_find(pg
, &od
->nbs
->header_
.uuid
)) {
3529 for (size_t i
= 0; i
< pg
->n_acls
; i
++) {
3530 consider_acl(lflows
, od
, pg
->acls
[i
], has_stateful
);
3535 /* Add 34000 priority flow to allow DHCP reply from ovn-controller to all
3536 * logical ports of the datapath if the CMS has configured DHCPv4 options.
3538 for (size_t i
= 0; i
< od
->nbs
->n_ports
; i
++) {
3539 if (od
->nbs
->ports
[i
]->dhcpv4_options
) {
3540 const char *server_id
= smap_get(
3541 &od
->nbs
->ports
[i
]->dhcpv4_options
->options
, "server_id");
3542 const char *server_mac
= smap_get(
3543 &od
->nbs
->ports
[i
]->dhcpv4_options
->options
, "server_mac");
3544 const char *lease_time
= smap_get(
3545 &od
->nbs
->ports
[i
]->dhcpv4_options
->options
, "lease_time");
3546 if (server_id
&& server_mac
&& lease_time
) {
3547 struct ds match
= DS_EMPTY_INITIALIZER
;
3548 const char *actions
=
3549 has_stateful
? "ct_commit; next;" : "next;";
3550 ds_put_format(&match
, "outport == \"%s\" && eth.src == %s "
3551 "&& ip4.src == %s && udp && udp.src == 67 "
3552 "&& udp.dst == 68", od
->nbs
->ports
[i
]->name
,
3553 server_mac
, server_id
);
3555 lflows
, od
, S_SWITCH_OUT_ACL
, 34000, ds_cstr(&match
),
3561 if (od
->nbs
->ports
[i
]->dhcpv6_options
) {
3562 const char *server_mac
= smap_get(
3563 &od
->nbs
->ports
[i
]->dhcpv6_options
->options
, "server_id");
3565 if (server_mac
&& eth_addr_from_string(server_mac
, &ea
)) {
3566 /* Get the link local IP of the DHCPv6 server from the
3568 struct in6_addr lla
;
3569 in6_generate_lla(ea
, &lla
);
3571 char server_ip
[INET6_ADDRSTRLEN
+ 1];
3572 ipv6_string_mapped(server_ip
, &lla
);
3574 struct ds match
= DS_EMPTY_INITIALIZER
;
3575 const char *actions
= has_stateful
? "ct_commit; next;" :
3577 ds_put_format(&match
, "outport == \"%s\" && eth.src == %s "
3578 "&& ip6.src == %s && udp && udp.src == 547 "
3579 "&& udp.dst == 546", od
->nbs
->ports
[i
]->name
,
3580 server_mac
, server_ip
);
3582 lflows
, od
, S_SWITCH_OUT_ACL
, 34000, ds_cstr(&match
),
3589 /* Add a 34000 priority flow to advance the DNS reply from ovn-controller,
3590 * if the CMS has configured DNS records for the datapath.
3592 if (ls_has_dns_records(od
->nbs
)) {
3593 const char *actions
= has_stateful
? "ct_commit; next;" : "next;";
3595 lflows
, od
, S_SWITCH_OUT_ACL
, 34000, "udp.src == 53",
3601 build_qos(struct ovn_datapath
*od
, struct hmap
*lflows
) {
3602 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_QOS_MARK
, 0, "1", "next;");
3603 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_QOS_MARK
, 0, "1", "next;");
3604 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_QOS_METER
, 0, "1", "next;");
3605 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_QOS_METER
, 0, "1", "next;");
3607 for (size_t i
= 0; i
< od
->nbs
->n_qos_rules
; i
++) {
3608 struct nbrec_qos
*qos
= od
->nbs
->qos_rules
[i
];
3609 bool ingress
= !strcmp(qos
->direction
, "from-lport") ? true :false;
3610 enum ovn_stage stage
= ingress
? S_SWITCH_IN_QOS_MARK
: S_SWITCH_OUT_QOS_MARK
;
3614 for (size_t j
= 0; j
< qos
->n_action
; j
++) {
3615 if (!strcmp(qos
->key_action
[j
], "dscp")) {
3616 struct ds dscp_action
= DS_EMPTY_INITIALIZER
;
3618 ds_put_format(&dscp_action
, "ip.dscp = %"PRId64
"; next;",
3619 qos
->value_action
[j
]);
3620 ovn_lflow_add(lflows
, od
, stage
,
3622 qos
->match
, ds_cstr(&dscp_action
));
3623 ds_destroy(&dscp_action
);
3627 for (size_t n
= 0; n
< qos
->n_bandwidth
; n
++) {
3628 if (!strcmp(qos
->key_bandwidth
[n
], "rate")) {
3629 rate
= qos
->value_bandwidth
[n
];
3630 } else if (!strcmp(qos
->key_bandwidth
[n
], "burst")) {
3631 burst
= qos
->value_bandwidth
[n
];
3635 struct ds meter_action
= DS_EMPTY_INITIALIZER
;
3636 stage
= ingress
? S_SWITCH_IN_QOS_METER
: S_SWITCH_OUT_QOS_METER
;
3638 ds_put_format(&meter_action
,
3639 "set_meter(%"PRId64
", %"PRId64
"); next;",
3642 ds_put_format(&meter_action
,
3643 "set_meter(%"PRId64
"); next;",
3647 /* Ingress and Egress QoS Meter Table.
3649 * We limit the bandwidth of this flow by adding a meter table.
3651 ovn_lflow_add(lflows
, od
, stage
,
3653 qos
->match
, ds_cstr(&meter_action
));
3654 ds_destroy(&meter_action
);
3660 build_lb(struct ovn_datapath
*od
, struct hmap
*lflows
)
3662 /* Ingress and Egress LB Table (Priority 0): Packets are allowed by
3664 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_LB
, 0, "1", "next;");
3665 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_LB
, 0, "1", "next;");
3667 if (od
->nbs
->load_balancer
) {
3668 /* Ingress and Egress LB Table (Priority 65535).
3670 * Send established traffic through conntrack for just NAT. */
3671 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_LB
, UINT16_MAX
,
3672 "ct.est && !ct.rel && !ct.new && !ct.inv",
3673 REGBIT_CONNTRACK_NAT
" = 1; next;");
3674 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_LB
, UINT16_MAX
,
3675 "ct.est && !ct.rel && !ct.new && !ct.inv",
3676 REGBIT_CONNTRACK_NAT
" = 1; next;");
3681 build_stateful(struct ovn_datapath
*od
, struct hmap
*lflows
)
3683 /* Ingress and Egress stateful Table (Priority 0): Packets are
3684 * allowed by default. */
3685 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_STATEFUL
, 0, "1", "next;");
3686 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_STATEFUL
, 0, "1", "next;");
3688 /* If REGBIT_CONNTRACK_COMMIT is set as 1, then the packets should be
3689 * committed to conntrack. We always set ct_label.blocked to 0 here as
3690 * any packet that makes it this far is part of a connection we
3691 * want to allow to continue. */
3692 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_STATEFUL
, 100,
3693 REGBIT_CONNTRACK_COMMIT
" == 1", "ct_commit(ct_label=0/1); next;");
3694 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_STATEFUL
, 100,
3695 REGBIT_CONNTRACK_COMMIT
" == 1", "ct_commit(ct_label=0/1); next;");
3697 /* If REGBIT_CONNTRACK_NAT is set as 1, then packets should just be sent
3698 * through nat (without committing).
3700 * REGBIT_CONNTRACK_COMMIT is set for new connections and
3701 * REGBIT_CONNTRACK_NAT is set for established connections. So they
3704 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_STATEFUL
, 100,
3705 REGBIT_CONNTRACK_NAT
" == 1", "ct_lb;");
3706 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_STATEFUL
, 100,
3707 REGBIT_CONNTRACK_NAT
" == 1", "ct_lb;");
3709 /* Load balancing rules for new connections get committed to conntrack
3710 * table. So even if REGBIT_CONNTRACK_COMMIT is set in a previous table
3711 * a higher priority rule for load balancing below also commits the
3712 * connection, so it is okay if we do not hit the above match on
3713 * REGBIT_CONNTRACK_COMMIT. */
3714 for (int i
= 0; i
< od
->nbs
->n_load_balancer
; i
++) {
3715 struct nbrec_load_balancer
*lb
= od
->nbs
->load_balancer
[i
];
3716 struct smap
*vips
= &lb
->vips
;
3717 struct smap_node
*node
;
3719 SMAP_FOR_EACH (node
, vips
) {
3723 /* node->key contains IP:port or just IP. */
3724 char *ip_address
= NULL
;
3725 ip_address_and_port_from_lb_key(node
->key
, &ip_address
, &port
,
3731 /* New connections in Ingress table. */
3732 char *action
= xasprintf("ct_lb(%s);", node
->value
);
3733 struct ds match
= DS_EMPTY_INITIALIZER
;
3734 if (addr_family
== AF_INET
) {
3735 ds_put_format(&match
, "ct.new && ip4.dst == %s", ip_address
);
3737 ds_put_format(&match
, "ct.new && ip6.dst == %s", ip_address
);
3740 if (lb
->protocol
&& !strcmp(lb
->protocol
, "udp")) {
3741 ds_put_format(&match
, " && udp.dst == %d", port
);
3743 ds_put_format(&match
, " && tcp.dst == %d", port
);
3745 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_STATEFUL
,
3746 120, ds_cstr(&match
), action
);
3748 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_STATEFUL
,
3749 110, ds_cstr(&match
), action
);
3760 build_lswitch_flows(struct hmap
*datapaths
, struct hmap
*ports
,
3761 struct hmap
*port_groups
, struct hmap
*lflows
,
3762 struct hmap
*mcgroups
)
3764 /* This flow table structure is documented in ovn-northd(8), so please
3765 * update ovn-northd.8.xml if you change anything. */
3767 struct ds match
= DS_EMPTY_INITIALIZER
;
3768 struct ds actions
= DS_EMPTY_INITIALIZER
;
3770 /* Build pre-ACL and ACL tables for both ingress and egress.
3771 * Ingress tables 3 through 10. Egress tables 0 through 7. */
3772 struct ovn_datapath
*od
;
3773 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
3778 build_pre_acls(od
, lflows
);
3779 build_pre_lb(od
, lflows
);
3780 build_pre_stateful(od
, lflows
);
3781 build_acls(od
, lflows
, port_groups
);
3782 build_qos(od
, lflows
);
3783 build_lb(od
, lflows
);
3784 build_stateful(od
, lflows
);
3787 /* Logical switch ingress table 0: Admission control framework (priority
3789 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
3794 /* Logical VLANs not supported. */
3795 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_PORT_SEC_L2
, 100, "vlan.present",
3798 /* Broadcast/multicast source address is invalid. */
3799 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_PORT_SEC_L2
, 100, "eth.src[40]",
3802 /* Port security flows have priority 50 (see below) and will continue
3803 * to the next table if packet source is acceptable. */
3806 /* Logical switch ingress table 0: Ingress port security - L2
3808 * Ingress table 1: Ingress port security - IP (priority 90 and 80)
3809 * Ingress table 2: Ingress port security - ND (priority 90 and 80)
3811 struct ovn_port
*op
;
3812 HMAP_FOR_EACH (op
, key_node
, ports
) {
3817 if (!lsp_is_enabled(op
->nbsp
)) {
3818 /* Drop packets from disabled logical ports (since logical flow
3819 * tables are default-drop). */
3825 ds_put_format(&match
, "inport == %s", op
->json_key
);
3826 build_port_security_l2("eth.src", op
->ps_addrs
, op
->n_ps_addrs
,
3829 const char *queue_id
= smap_get(&op
->sb
->options
, "qdisc_queue_id");
3831 ds_put_format(&actions
, "set_queue(%s); ", queue_id
);
3833 ds_put_cstr(&actions
, "next;");
3834 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_PORT_SEC_L2
, 50,
3835 ds_cstr(&match
), ds_cstr(&actions
));
3837 if (op
->nbsp
->n_port_security
) {
3838 build_port_security_ip(P_IN
, op
, lflows
);
3839 build_port_security_nd(op
, lflows
);
3843 /* Ingress table 1 and 2: Port security - IP and ND, by default goto next.
3845 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
3850 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_PORT_SEC_ND
, 0, "1", "next;");
3851 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_PORT_SEC_IP
, 0, "1", "next;");
3854 /* Ingress table 11: ARP/ND responder, skip requests coming from localnet
3855 * and vtep ports. (priority 100); see ovn-northd.8.xml for the
3857 HMAP_FOR_EACH (op
, key_node
, ports
) {
3862 if ((!strcmp(op
->nbsp
->type
, "localnet")) ||
3863 (!strcmp(op
->nbsp
->type
, "vtep"))) {
3865 ds_put_format(&match
, "inport == %s", op
->json_key
);
3866 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_ARP_ND_RSP
, 100,
3867 ds_cstr(&match
), "next;");
3871 /* Ingress table 11: ARP/ND responder, reply for known IPs.
3873 HMAP_FOR_EACH (op
, key_node
, ports
) {
3879 * Add ARP/ND reply flows if either the
3881 * - port type is router or
3882 * - port type is localport
3884 if (!lsp_is_up(op
->nbsp
) && strcmp(op
->nbsp
->type
, "router") &&
3885 strcmp(op
->nbsp
->type
, "localport")) {
3889 for (size_t i
= 0; i
< op
->n_lsp_addrs
; i
++) {
3890 for (size_t j
= 0; j
< op
->lsp_addrs
[i
].n_ipv4_addrs
; j
++) {
3892 ds_put_format(&match
, "arp.tpa == %s && arp.op == 1",
3893 op
->lsp_addrs
[i
].ipv4_addrs
[j
].addr_s
);
3895 ds_put_format(&actions
,
3896 "eth.dst = eth.src; "
3898 "arp.op = 2; /* ARP reply */ "
3899 "arp.tha = arp.sha; "
3901 "arp.tpa = arp.spa; "
3903 "outport = inport; "
3904 "flags.loopback = 1; "
3906 op
->lsp_addrs
[i
].ea_s
, op
->lsp_addrs
[i
].ea_s
,
3907 op
->lsp_addrs
[i
].ipv4_addrs
[j
].addr_s
);
3908 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_ARP_ND_RSP
, 50,
3909 ds_cstr(&match
), ds_cstr(&actions
));
3911 /* Do not reply to an ARP request from the port that owns the
3912 * address (otherwise a DHCP client that ARPs to check for a
3913 * duplicate address will fail). Instead, forward it the usual
3916 * (Another alternative would be to simply drop the packet. If
3917 * everything is working as it is configured, then this would
3918 * produce equivalent results, since no one should reply to the
3919 * request. But ARPing for one's own IP address is intended to
3920 * detect situations where the network is not working as
3921 * configured, so dropping the request would frustrate that
3923 ds_put_format(&match
, " && inport == %s", op
->json_key
);
3924 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_ARP_ND_RSP
, 100,
3925 ds_cstr(&match
), "next;");
3928 /* For ND solicitations, we need to listen for both the
3929 * unicast IPv6 address and its all-nodes multicast address,
3930 * but always respond with the unicast IPv6 address. */
3931 for (size_t j
= 0; j
< op
->lsp_addrs
[i
].n_ipv6_addrs
; j
++) {
3933 ds_put_format(&match
,
3934 "nd_ns && ip6.dst == {%s, %s} && nd.target == %s",
3935 op
->lsp_addrs
[i
].ipv6_addrs
[j
].addr_s
,
3936 op
->lsp_addrs
[i
].ipv6_addrs
[j
].sn_addr_s
,
3937 op
->lsp_addrs
[i
].ipv6_addrs
[j
].addr_s
);
3940 ds_put_format(&actions
,
3946 "outport = inport; "
3947 "flags.loopback = 1; "
3950 op
->lsp_addrs
[i
].ea_s
,
3951 op
->lsp_addrs
[i
].ipv6_addrs
[j
].addr_s
,
3952 op
->lsp_addrs
[i
].ipv6_addrs
[j
].addr_s
,
3953 op
->lsp_addrs
[i
].ea_s
);
3954 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_ARP_ND_RSP
, 50,
3955 ds_cstr(&match
), ds_cstr(&actions
));
3957 /* Do not reply to a solicitation from the port that owns the
3958 * address (otherwise DAD detection will fail). */
3959 ds_put_format(&match
, " && inport == %s", op
->json_key
);
3960 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_ARP_ND_RSP
, 100,
3961 ds_cstr(&match
), "next;");
3966 /* Ingress table 11: ARP/ND responder, by default goto next.
3968 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
3973 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_ARP_ND_RSP
, 0, "1", "next;");
3976 /* Logical switch ingress table 12 and 13: DHCP options and response
3977 * priority 100 flows. */
3978 HMAP_FOR_EACH (op
, key_node
, ports
) {
3983 if (!lsp_is_enabled(op
->nbsp
) || !strcmp(op
->nbsp
->type
, "router")) {
3984 /* Don't add the DHCP flows if the port is not enabled or if the
3985 * port is a router port. */
3989 if (!op
->nbsp
->dhcpv4_options
&& !op
->nbsp
->dhcpv6_options
) {
3990 /* CMS has disabled both native DHCPv4 and DHCPv6 for this lport.
3995 for (size_t i
= 0; i
< op
->n_lsp_addrs
; i
++) {
3996 for (size_t j
= 0; j
< op
->lsp_addrs
[i
].n_ipv4_addrs
; j
++) {
3997 struct ds options_action
= DS_EMPTY_INITIALIZER
;
3998 struct ds response_action
= DS_EMPTY_INITIALIZER
;
3999 struct ds ipv4_addr_match
= DS_EMPTY_INITIALIZER
;
4000 if (build_dhcpv4_action(
4001 op
, op
->lsp_addrs
[i
].ipv4_addrs
[j
].addr
,
4002 &options_action
, &response_action
, &ipv4_addr_match
)) {
4005 &match
, "inport == %s && eth.src == %s && "
4006 "ip4.src == 0.0.0.0 && ip4.dst == 255.255.255.255 && "
4007 "udp.src == 68 && udp.dst == 67", op
->json_key
,
4008 op
->lsp_addrs
[i
].ea_s
);
4010 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_DHCP_OPTIONS
,
4011 100, ds_cstr(&match
),
4012 ds_cstr(&options_action
));
4014 /* Allow ip4.src = OFFER_IP and
4015 * ip4.dst = {SERVER_IP, 255.255.255.255} for the below
4017 * - When the client wants to renew the IP by sending
4018 * the DHCPREQUEST to the server ip.
4019 * - When the client wants to renew the IP by
4020 * broadcasting the DHCPREQUEST.
4023 &match
, "inport == %s && eth.src == %s && "
4024 "%s && udp.src == 68 && udp.dst == 67", op
->json_key
,
4025 op
->lsp_addrs
[i
].ea_s
, ds_cstr(&ipv4_addr_match
));
4027 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_DHCP_OPTIONS
,
4028 100, ds_cstr(&match
),
4029 ds_cstr(&options_action
));
4032 /* If REGBIT_DHCP_OPTS_RESULT is set, it means the
4033 * put_dhcp_opts action is successful. */
4035 &match
, "inport == %s && eth.src == %s && "
4036 "ip4 && udp.src == 68 && udp.dst == 67"
4037 " && "REGBIT_DHCP_OPTS_RESULT
, op
->json_key
,
4038 op
->lsp_addrs
[i
].ea_s
);
4039 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_DHCP_RESPONSE
,
4040 100, ds_cstr(&match
),
4041 ds_cstr(&response_action
));
4042 ds_destroy(&options_action
);
4043 ds_destroy(&response_action
);
4044 ds_destroy(&ipv4_addr_match
);
4049 for (size_t j
= 0; j
< op
->lsp_addrs
[i
].n_ipv6_addrs
; j
++) {
4050 struct ds options_action
= DS_EMPTY_INITIALIZER
;
4051 struct ds response_action
= DS_EMPTY_INITIALIZER
;
4052 if (build_dhcpv6_action(
4053 op
, &op
->lsp_addrs
[i
].ipv6_addrs
[j
].addr
,
4054 &options_action
, &response_action
)) {
4057 &match
, "inport == %s && eth.src == %s"
4058 " && ip6.dst == ff02::1:2 && udp.src == 546 &&"
4059 " udp.dst == 547", op
->json_key
,
4060 op
->lsp_addrs
[i
].ea_s
);
4062 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_DHCP_OPTIONS
, 100,
4063 ds_cstr(&match
), ds_cstr(&options_action
));
4065 /* If REGBIT_DHCP_OPTS_RESULT is set to 1, it means the
4066 * put_dhcpv6_opts action is successful */
4067 ds_put_cstr(&match
, " && "REGBIT_DHCP_OPTS_RESULT
);
4068 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_DHCP_RESPONSE
, 100,
4069 ds_cstr(&match
), ds_cstr(&response_action
));
4070 ds_destroy(&options_action
);
4071 ds_destroy(&response_action
);
4078 /* Logical switch ingress table 14 and 15: DNS lookup and response
4079 * priority 100 flows.
4081 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
4082 if (!od
->nbs
|| !ls_has_dns_records(od
->nbs
)) {
4086 struct ds action
= DS_EMPTY_INITIALIZER
;
4089 ds_put_cstr(&match
, "udp.dst == 53");
4090 ds_put_format(&action
,
4091 REGBIT_DNS_LOOKUP_RESULT
" = dns_lookup(); next;");
4092 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_DNS_LOOKUP
, 100,
4093 ds_cstr(&match
), ds_cstr(&action
));
4095 ds_put_cstr(&match
, " && "REGBIT_DNS_LOOKUP_RESULT
);
4096 ds_put_format(&action
, "eth.dst <-> eth.src; ip4.src <-> ip4.dst; "
4097 "udp.dst = udp.src; udp.src = 53; outport = inport; "
4098 "flags.loopback = 1; output;");
4099 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_DNS_RESPONSE
, 100,
4100 ds_cstr(&match
), ds_cstr(&action
));
4102 ds_put_format(&action
, "eth.dst <-> eth.src; ip6.src <-> ip6.dst; "
4103 "udp.dst = udp.src; udp.src = 53; outport = inport; "
4104 "flags.loopback = 1; output;");
4105 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_DNS_RESPONSE
, 100,
4106 ds_cstr(&match
), ds_cstr(&action
));
4107 ds_destroy(&action
);
4110 /* Ingress table 12 and 13: DHCP options and response, by default goto
4111 * next. (priority 0).
4112 * Ingress table 14 and 15: DNS lookup and response, by default goto next.
4115 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
4120 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_DHCP_OPTIONS
, 0, "1", "next;");
4121 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_DHCP_RESPONSE
, 0, "1", "next;");
4122 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_DNS_LOOKUP
, 0, "1", "next;");
4123 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_DNS_RESPONSE
, 0, "1", "next;");
4126 /* Ingress table 16: Destination lookup, broadcast and multicast handling
4127 * (priority 100). */
4128 HMAP_FOR_EACH (op
, key_node
, ports
) {
4133 if (lsp_is_enabled(op
->nbsp
)) {
4134 ovn_multicast_add(mcgroups
, &mc_flood
, op
);
4137 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
4142 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_L2_LKUP
, 100, "eth.mcast",
4143 "outport = \""MC_FLOOD
"\"; output;");
4146 /* Ingress table 16: Destination lookup, unicast handling (priority 50), */
4147 HMAP_FOR_EACH (op
, key_node
, ports
) {
4152 for (size_t i
= 0; i
< op
->nbsp
->n_addresses
; i
++) {
4153 /* Addresses are owned by the logical port.
4154 * Ethernet address followed by zero or more IPv4
4155 * or IPv6 addresses (or both). */
4156 struct eth_addr mac
;
4157 if (ovs_scan(op
->nbsp
->addresses
[i
],
4158 ETH_ADDR_SCAN_FMT
, ETH_ADDR_SCAN_ARGS(mac
))) {
4160 ds_put_format(&match
, "eth.dst == "ETH_ADDR_FMT
,
4161 ETH_ADDR_ARGS(mac
));
4164 ds_put_format(&actions
, "outport = %s; output;", op
->json_key
);
4165 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_L2_LKUP
, 50,
4166 ds_cstr(&match
), ds_cstr(&actions
));
4167 } else if (!strcmp(op
->nbsp
->addresses
[i
], "unknown")) {
4168 if (lsp_is_enabled(op
->nbsp
)) {
4169 ovn_multicast_add(mcgroups
, &mc_unknown
, op
);
4170 op
->od
->has_unknown
= true;
4172 } else if (is_dynamic_lsp_address(op
->nbsp
->addresses
[i
])) {
4173 if (!op
->nbsp
->dynamic_addresses
4174 || !ovs_scan(op
->nbsp
->dynamic_addresses
,
4175 ETH_ADDR_SCAN_FMT
, ETH_ADDR_SCAN_ARGS(mac
))) {
4179 ds_put_format(&match
, "eth.dst == "ETH_ADDR_FMT
,
4180 ETH_ADDR_ARGS(mac
));
4183 ds_put_format(&actions
, "outport = %s; output;", op
->json_key
);
4184 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_L2_LKUP
, 50,
4185 ds_cstr(&match
), ds_cstr(&actions
));
4186 } else if (!strcmp(op
->nbsp
->addresses
[i
], "router")) {
4187 if (!op
->peer
|| !op
->peer
->nbrp
4188 || !ovs_scan(op
->peer
->nbrp
->mac
,
4189 ETH_ADDR_SCAN_FMT
, ETH_ADDR_SCAN_ARGS(mac
))) {
4193 ds_put_format(&match
, "eth.dst == "ETH_ADDR_FMT
,
4194 ETH_ADDR_ARGS(mac
));
4195 if (op
->peer
->od
->l3dgw_port
4196 && op
->peer
== op
->peer
->od
->l3dgw_port
4197 && op
->peer
->od
->l3redirect_port
) {
4198 /* The destination lookup flow for the router's
4199 * distributed gateway port MAC address should only be
4200 * programmed on the "redirect-chassis". */
4201 ds_put_format(&match
, " && is_chassis_resident(%s)",
4202 op
->peer
->od
->l3redirect_port
->json_key
);
4206 ds_put_format(&actions
, "outport = %s; output;", op
->json_key
);
4207 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_L2_LKUP
, 50,
4208 ds_cstr(&match
), ds_cstr(&actions
));
4210 /* Add ethernet addresses specified in NAT rules on
4211 * distributed logical routers. */
4212 if (op
->peer
->od
->l3dgw_port
4213 && op
->peer
== op
->peer
->od
->l3dgw_port
) {
4214 for (int j
= 0; j
< op
->peer
->od
->nbr
->n_nat
; j
++) {
4215 const struct nbrec_nat
*nat
4216 = op
->peer
->od
->nbr
->nat
[j
];
4217 if (!strcmp(nat
->type
, "dnat_and_snat")
4218 && nat
->logical_port
&& nat
->external_mac
4219 && eth_addr_from_string(nat
->external_mac
, &mac
)) {
4222 ds_put_format(&match
, "eth.dst == "ETH_ADDR_FMT
4223 " && is_chassis_resident(\"%s\")",
4228 ds_put_format(&actions
, "outport = %s; output;",
4230 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_L2_LKUP
,
4231 50, ds_cstr(&match
),
4237 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
4240 "%s: invalid syntax '%s' in addresses column",
4241 op
->nbsp
->name
, op
->nbsp
->addresses
[i
]);
4246 /* Ingress table 16: Destination lookup for unknown MACs (priority 0). */
4247 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
4252 if (od
->has_unknown
) {
4253 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_L2_LKUP
, 0, "1",
4254 "outport = \""MC_UNKNOWN
"\"; output;");
4258 /* Egress tables 8: Egress port security - IP (priority 0)
4259 * Egress table 9: Egress port security L2 - multicast/broadcast
4260 * (priority 100). */
4261 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
4266 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_PORT_SEC_IP
, 0, "1", "next;");
4267 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_PORT_SEC_L2
, 100, "eth.mcast",
4271 /* Egress table 8: Egress port security - IP (priorities 90 and 80)
4272 * if port security enabled.
4274 * Egress table 9: Egress port security - L2 (priorities 50 and 150).
4276 * Priority 50 rules implement port security for enabled logical port.
4278 * Priority 150 rules drop packets to disabled logical ports, so that they
4279 * don't even receive multicast or broadcast packets. */
4280 HMAP_FOR_EACH (op
, key_node
, ports
) {
4286 ds_put_format(&match
, "outport == %s", op
->json_key
);
4287 if (lsp_is_enabled(op
->nbsp
)) {
4288 build_port_security_l2("eth.dst", op
->ps_addrs
, op
->n_ps_addrs
,
4290 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_OUT_PORT_SEC_L2
, 50,
4291 ds_cstr(&match
), "output;");
4293 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_OUT_PORT_SEC_L2
, 150,
4294 ds_cstr(&match
), "drop;");
4297 if (op
->nbsp
->n_port_security
) {
4298 build_port_security_ip(P_OUT
, op
, lflows
);
4303 ds_destroy(&actions
);
4307 lrport_is_enabled(const struct nbrec_logical_router_port
*lrport
)
4309 return !lrport
->enabled
|| *lrport
->enabled
;
4312 /* Returns a string of the IP address of the router port 'op' that
4313 * overlaps with 'ip_s". If one is not found, returns NULL.
4315 * The caller must not free the returned string. */
4317 find_lrp_member_ip(const struct ovn_port
*op
, const char *ip_s
)
4319 bool is_ipv4
= strchr(ip_s
, '.') ? true : false;
4324 if (!ip_parse(ip_s
, &ip
)) {
4325 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
4326 VLOG_WARN_RL(&rl
, "bad ip address %s", ip_s
);
4330 for (int i
= 0; i
< op
->lrp_networks
.n_ipv4_addrs
; i
++) {
4331 const struct ipv4_netaddr
*na
= &op
->lrp_networks
.ipv4_addrs
[i
];
4333 if (!((na
->network
^ ip
) & na
->mask
)) {
4334 /* There should be only 1 interface that matches the
4335 * supplied IP. Otherwise, it's a configuration error,
4336 * because subnets of a router's interfaces should NOT
4342 struct in6_addr ip6
;
4344 if (!ipv6_parse(ip_s
, &ip6
)) {
4345 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
4346 VLOG_WARN_RL(&rl
, "bad ipv6 address %s", ip_s
);
4350 for (int i
= 0; i
< op
->lrp_networks
.n_ipv6_addrs
; i
++) {
4351 const struct ipv6_netaddr
*na
= &op
->lrp_networks
.ipv6_addrs
[i
];
4352 struct in6_addr xor_addr
= ipv6_addr_bitxor(&na
->network
, &ip6
);
4353 struct in6_addr and_addr
= ipv6_addr_bitand(&xor_addr
, &na
->mask
);
4355 if (ipv6_is_zero(&and_addr
)) {
4356 /* There should be only 1 interface that matches the
4357 * supplied IP. Otherwise, it's a configuration error,
4358 * because subnets of a router's interfaces should NOT
4369 add_route(struct hmap
*lflows
, const struct ovn_port
*op
,
4370 const char *lrp_addr_s
, const char *network_s
, int plen
,
4371 const char *gateway
, const char *policy
)
4373 bool is_ipv4
= strchr(network_s
, '.') ? true : false;
4374 struct ds match
= DS_EMPTY_INITIALIZER
;
4378 if (policy
&& !strcmp(policy
, "src-ip")) {
4380 priority
= plen
* 2;
4383 priority
= (plen
* 2) + 1;
4386 /* IPv6 link-local addresses must be scoped to the local router port. */
4388 struct in6_addr network
;
4389 ovs_assert(ipv6_parse(network_s
, &network
));
4390 if (in6_is_lla(&network
)) {
4391 ds_put_format(&match
, "inport == %s && ", op
->json_key
);
4394 ds_put_format(&match
, "ip%s.%s == %s/%d", is_ipv4
? "4" : "6", dir
,
4397 struct ds actions
= DS_EMPTY_INITIALIZER
;
4398 ds_put_format(&actions
, "ip.ttl--; %sreg0 = ", is_ipv4
? "" : "xx");
4401 ds_put_cstr(&actions
, gateway
);
4403 ds_put_format(&actions
, "ip%s.dst", is_ipv4
? "4" : "6");
4405 ds_put_format(&actions
, "; "
4409 "flags.loopback = 1; "
4411 is_ipv4
? "" : "xx",
4413 op
->lrp_networks
.ea_s
,
4416 /* The priority here is calculated to implement longest-prefix-match
4418 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_IP_ROUTING
, priority
,
4419 ds_cstr(&match
), ds_cstr(&actions
));
4421 ds_destroy(&actions
);
4425 build_static_route_flow(struct hmap
*lflows
, struct ovn_datapath
*od
,
4427 const struct nbrec_logical_router_static_route
*route
)
4430 const char *lrp_addr_s
= NULL
;
4434 /* Verify that the next hop is an IP address with an all-ones mask. */
4435 char *error
= ip_parse_cidr(route
->nexthop
, &nexthop
, &plen
);
4438 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
4439 VLOG_WARN_RL(&rl
, "bad next hop mask %s", route
->nexthop
);
4446 struct in6_addr ip6
;
4447 error
= ipv6_parse_cidr(route
->nexthop
, &ip6
, &plen
);
4450 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
4451 VLOG_WARN_RL(&rl
, "bad next hop mask %s", route
->nexthop
);
4456 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
4457 VLOG_WARN_RL(&rl
, "bad next hop ip address %s", route
->nexthop
);
4466 /* Verify that ip prefix is a valid IPv4 address. */
4467 error
= ip_parse_cidr(route
->ip_prefix
, &prefix
, &plen
);
4469 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
4470 VLOG_WARN_RL(&rl
, "bad 'ip_prefix' in static routes %s",
4475 prefix_s
= xasprintf(IP_FMT
, IP_ARGS(prefix
& be32_prefix_mask(plen
)));
4477 /* Verify that ip prefix is a valid IPv6 address. */
4478 struct in6_addr prefix
;
4479 error
= ipv6_parse_cidr(route
->ip_prefix
, &prefix
, &plen
);
4481 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
4482 VLOG_WARN_RL(&rl
, "bad 'ip_prefix' in static routes %s",
4487 struct in6_addr mask
= ipv6_create_mask(plen
);
4488 struct in6_addr network
= ipv6_addr_bitand(&prefix
, &mask
);
4489 prefix_s
= xmalloc(INET6_ADDRSTRLEN
);
4490 inet_ntop(AF_INET6
, &network
, prefix_s
, INET6_ADDRSTRLEN
);
4493 /* Find the outgoing port. */
4494 struct ovn_port
*out_port
= NULL
;
4495 if (route
->output_port
) {
4496 out_port
= ovn_port_find(ports
, route
->output_port
);
4498 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
4499 VLOG_WARN_RL(&rl
, "Bad out port %s for static route %s",
4500 route
->output_port
, route
->ip_prefix
);
4503 lrp_addr_s
= find_lrp_member_ip(out_port
, route
->nexthop
);
4505 /* There are no IP networks configured on the router's port via
4506 * which 'route->nexthop' is theoretically reachable. But since
4507 * 'out_port' has been specified, we honor it by trying to reach
4508 * 'route->nexthop' via the first IP address of 'out_port'.
4509 * (There are cases, e.g in GCE, where each VM gets a /32 IP
4510 * address and the default gateway is still reachable from it.) */
4512 if (out_port
->lrp_networks
.n_ipv4_addrs
) {
4513 lrp_addr_s
= out_port
->lrp_networks
.ipv4_addrs
[0].addr_s
;
4516 if (out_port
->lrp_networks
.n_ipv6_addrs
) {
4517 lrp_addr_s
= out_port
->lrp_networks
.ipv6_addrs
[0].addr_s
;
4522 /* output_port is not specified, find the
4523 * router port matching the next hop. */
4525 for (i
= 0; i
< od
->nbr
->n_ports
; i
++) {
4526 struct nbrec_logical_router_port
*lrp
= od
->nbr
->ports
[i
];
4527 out_port
= ovn_port_find(ports
, lrp
->name
);
4529 /* This should not happen. */
4533 lrp_addr_s
= find_lrp_member_ip(out_port
, route
->nexthop
);
4540 if (!out_port
|| !lrp_addr_s
) {
4541 /* There is no matched out port. */
4542 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
4543 VLOG_WARN_RL(&rl
, "No path for static route %s; next hop %s",
4544 route
->ip_prefix
, route
->nexthop
);
4548 char *policy
= route
->policy
? route
->policy
: "dst-ip";
4549 add_route(lflows
, out_port
, lrp_addr_s
, prefix_s
, plen
, route
->nexthop
,
4557 op_put_v4_networks(struct ds
*ds
, const struct ovn_port
*op
, bool add_bcast
)
4559 if (!add_bcast
&& op
->lrp_networks
.n_ipv4_addrs
== 1) {
4560 ds_put_format(ds
, "%s", op
->lrp_networks
.ipv4_addrs
[0].addr_s
);
4564 ds_put_cstr(ds
, "{");
4565 for (int i
= 0; i
< op
->lrp_networks
.n_ipv4_addrs
; i
++) {
4566 ds_put_format(ds
, "%s, ", op
->lrp_networks
.ipv4_addrs
[i
].addr_s
);
4568 ds_put_format(ds
, "%s, ", op
->lrp_networks
.ipv4_addrs
[i
].bcast_s
);
4573 ds_put_cstr(ds
, "}");
4577 op_put_v6_networks(struct ds
*ds
, const struct ovn_port
*op
)
4579 if (op
->lrp_networks
.n_ipv6_addrs
== 1) {
4580 ds_put_format(ds
, "%s", op
->lrp_networks
.ipv6_addrs
[0].addr_s
);
4584 ds_put_cstr(ds
, "{");
4585 for (int i
= 0; i
< op
->lrp_networks
.n_ipv6_addrs
; i
++) {
4586 ds_put_format(ds
, "%s, ", op
->lrp_networks
.ipv6_addrs
[i
].addr_s
);
4590 ds_put_cstr(ds
, "}");
4594 get_force_snat_ip(struct ovn_datapath
*od
, const char *key_type
, ovs_be32
*ip
)
4596 char *key
= xasprintf("%s_force_snat_ip", key_type
);
4597 const char *ip_address
= smap_get(&od
->nbr
->options
, key
);
4602 char *error
= ip_parse_masked(ip_address
, ip
, &mask
);
4603 if (error
|| mask
!= OVS_BE32_MAX
) {
4604 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
4605 VLOG_WARN_RL(&rl
, "bad ip %s in options of router "UUID_FMT
"",
4606 ip_address
, UUID_ARGS(&od
->key
));
4619 add_router_lb_flow(struct hmap
*lflows
, struct ovn_datapath
*od
,
4620 struct ds
*match
, struct ds
*actions
, int priority
,
4621 const char *lb_force_snat_ip
, char *backend_ips
,
4622 bool is_udp
, int addr_family
)
4624 /* A match and actions for new connections. */
4625 char *new_match
= xasprintf("ct.new && %s", ds_cstr(match
));
4626 if (lb_force_snat_ip
) {
4627 char *new_actions
= xasprintf("flags.force_snat_for_lb = 1; %s",
4629 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_DNAT
, priority
, new_match
,
4633 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_DNAT
, priority
, new_match
,
4637 /* A match and actions for established connections. */
4638 char *est_match
= xasprintf("ct.est && %s", ds_cstr(match
));
4639 if (lb_force_snat_ip
) {
4640 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_DNAT
, priority
, est_match
,
4641 "flags.force_snat_for_lb = 1; ct_dnat;");
4643 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_DNAT
, priority
, est_match
,
4650 if (!od
->l3dgw_port
|| !od
->l3redirect_port
|| !backend_ips
4651 || addr_family
!= AF_INET
) {
4655 /* Add logical flows to UNDNAT the load balanced reverse traffic in
4656 * the router egress pipleine stage - S_ROUTER_OUT_UNDNAT if the logical
4657 * router has a gateway router port associated.
4659 struct ds undnat_match
= DS_EMPTY_INITIALIZER
;
4660 ds_put_cstr(&undnat_match
, "ip4 && (");
4661 char *start
, *next
, *ip_str
;
4662 start
= next
= xstrdup(backend_ips
);
4663 ip_str
= strsep(&next
, ",");
4664 bool backend_ips_found
= false;
4665 while (ip_str
&& ip_str
[0]) {
4666 char *ip_address
= NULL
;
4669 ip_address_and_port_from_lb_key(ip_str
, &ip_address
, &port
,
4675 ds_put_format(&undnat_match
, "(ip4.src == %s", ip_address
);
4678 ds_put_format(&undnat_match
, " && %s.src == %d) || ",
4679 is_udp
? "udp" : "tcp", port
);
4681 ds_put_cstr(&undnat_match
, ") || ");
4683 ip_str
= strsep(&next
, ",");
4684 backend_ips_found
= true;
4688 if (!backend_ips_found
) {
4689 ds_destroy(&undnat_match
);
4692 ds_chomp(&undnat_match
, ' ');
4693 ds_chomp(&undnat_match
, '|');
4694 ds_chomp(&undnat_match
, '|');
4695 ds_chomp(&undnat_match
, ' ');
4696 ds_put_format(&undnat_match
, ") && outport == %s && "
4697 "is_chassis_resident(%s)", od
->l3dgw_port
->json_key
,
4698 od
->l3redirect_port
->json_key
);
4699 if (lb_force_snat_ip
) {
4700 ovn_lflow_add(lflows
, od
, S_ROUTER_OUT_UNDNAT
, 120,
4701 ds_cstr(&undnat_match
),
4702 "flags.force_snat_for_lb = 1; ct_dnat;");
4704 ovn_lflow_add(lflows
, od
, S_ROUTER_OUT_UNDNAT
, 120,
4705 ds_cstr(&undnat_match
), "ct_dnat;");
4708 ds_destroy(&undnat_match
);
4711 #define ND_RA_MAX_INTERVAL_MAX 1800
4712 #define ND_RA_MAX_INTERVAL_MIN 4
4714 #define ND_RA_MIN_INTERVAL_MAX(max) ((max) * 3 / 4)
4715 #define ND_RA_MIN_INTERVAL_MIN 3
4718 copy_ra_to_sb(struct ovn_port
*op
, const char *address_mode
)
4720 struct smap options
;
4721 smap_clone(&options
, &op
->sb
->options
);
4723 smap_add(&options
, "ipv6_ra_send_periodic", "true");
4724 smap_add(&options
, "ipv6_ra_address_mode", address_mode
);
4726 int max_interval
= smap_get_int(&op
->nbrp
->ipv6_ra_configs
,
4727 "max_interval", ND_RA_MAX_INTERVAL_DEFAULT
);
4728 if (max_interval
> ND_RA_MAX_INTERVAL_MAX
) {
4729 max_interval
= ND_RA_MAX_INTERVAL_MAX
;
4731 if (max_interval
< ND_RA_MAX_INTERVAL_MIN
) {
4732 max_interval
= ND_RA_MAX_INTERVAL_MIN
;
4734 smap_add_format(&options
, "ipv6_ra_max_interval", "%d", max_interval
);
4736 int min_interval
= smap_get_int(&op
->nbrp
->ipv6_ra_configs
,
4737 "min_interval", nd_ra_min_interval_default(max_interval
));
4738 if (min_interval
> ND_RA_MIN_INTERVAL_MAX(max_interval
)) {
4739 min_interval
= ND_RA_MIN_INTERVAL_MAX(max_interval
);
4741 if (min_interval
< ND_RA_MIN_INTERVAL_MIN
) {
4742 min_interval
= ND_RA_MIN_INTERVAL_MIN
;
4744 smap_add_format(&options
, "ipv6_ra_min_interval", "%d", min_interval
);
4746 int mtu
= smap_get_int(&op
->nbrp
->ipv6_ra_configs
, "mtu", ND_MTU_DEFAULT
);
4747 /* RFC 2460 requires the MTU for IPv6 to be at least 1280 */
4748 if (mtu
&& mtu
>= 1280) {
4749 smap_add_format(&options
, "ipv6_ra_mtu", "%d", mtu
);
4752 struct ds s
= DS_EMPTY_INITIALIZER
;
4753 for (int i
= 0; i
< op
->lrp_networks
.n_ipv6_addrs
; ++i
) {
4754 struct ipv6_netaddr
*addrs
= &op
->lrp_networks
.ipv6_addrs
[i
];
4755 if (in6_is_lla(&addrs
->network
)) {
4756 smap_add(&options
, "ipv6_ra_src_addr", addrs
->addr_s
);
4759 ds_put_format(&s
, "%s/%u ", addrs
->network_s
, addrs
->plen
);
4761 /* Remove trailing space */
4763 smap_add(&options
, "ipv6_ra_prefixes", ds_cstr(&s
));
4766 smap_add(&options
, "ipv6_ra_src_eth", op
->lrp_networks
.ea_s
);
4768 sbrec_port_binding_set_options(op
->sb
, &options
);
4769 smap_destroy(&options
);
4773 build_lrouter_flows(struct hmap
*datapaths
, struct hmap
*ports
,
4774 struct hmap
*lflows
)
4776 /* This flow table structure is documented in ovn-northd(8), so please
4777 * update ovn-northd.8.xml if you change anything. */
4779 struct ds match
= DS_EMPTY_INITIALIZER
;
4780 struct ds actions
= DS_EMPTY_INITIALIZER
;
4782 /* Logical router ingress table 0: Admission control framework. */
4783 struct ovn_datapath
*od
;
4784 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
4789 /* Logical VLANs not supported.
4790 * Broadcast/multicast source address is invalid. */
4791 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_ADMISSION
, 100,
4792 "vlan.present || eth.src[40]", "drop;");
4795 /* Logical router ingress table 0: match (priority 50). */
4796 struct ovn_port
*op
;
4797 HMAP_FOR_EACH (op
, key_node
, ports
) {
4802 if (!lrport_is_enabled(op
->nbrp
)) {
4803 /* Drop packets from disabled logical ports (since logical flow
4804 * tables are default-drop). */
4809 /* No ingress packets should be received on a chassisredirect
4815 ds_put_format(&match
, "eth.mcast && inport == %s", op
->json_key
);
4816 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_ADMISSION
, 50,
4817 ds_cstr(&match
), "next;");
4820 ds_put_format(&match
, "eth.dst == %s && inport == %s",
4821 op
->lrp_networks
.ea_s
, op
->json_key
);
4822 if (op
->od
->l3dgw_port
&& op
== op
->od
->l3dgw_port
4823 && op
->od
->l3redirect_port
) {
4824 /* Traffic with eth.dst = l3dgw_port->lrp_networks.ea_s
4825 * should only be received on the "redirect-chassis". */
4826 ds_put_format(&match
, " && is_chassis_resident(%s)",
4827 op
->od
->l3redirect_port
->json_key
);
4829 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_ADMISSION
, 50,
4830 ds_cstr(&match
), "next;");
4833 /* Logical router ingress table 1: IP Input. */
4834 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
4839 /* L3 admission control: drop multicast and broadcast source, localhost
4840 * source or destination, and zero network source or destination
4841 * (priority 100). */
4842 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_IP_INPUT
, 100,
4844 "ip4.src == 255.255.255.255 || "
4845 "ip4.src == 127.0.0.0/8 || "
4846 "ip4.dst == 127.0.0.0/8 || "
4847 "ip4.src == 0.0.0.0/8 || "
4848 "ip4.dst == 0.0.0.0/8",
4851 /* ARP reply handling. Use ARP replies to populate the logical
4852 * router's ARP table. */
4853 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_IP_INPUT
, 90, "arp.op == 2",
4854 "put_arp(inport, arp.spa, arp.sha);");
4856 /* Drop Ethernet local broadcast. By definition this traffic should
4857 * not be forwarded.*/
4858 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_IP_INPUT
, 50,
4859 "eth.bcast", "drop;");
4863 ds_put_cstr(&match
, "ip4 && ip.ttl == {0, 1}");
4864 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_IP_INPUT
, 30,
4865 ds_cstr(&match
), "drop;");
4867 /* ND advertisement handling. Use advertisements to populate
4868 * the logical router's ARP/ND table. */
4869 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_IP_INPUT
, 90, "nd_na",
4870 "put_nd(inport, nd.target, nd.tll);");
4872 /* Lean from neighbor solicitations that were not directed at
4873 * us. (A priority-90 flow will respond to requests to us and
4874 * learn the sender's mac address. */
4875 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_IP_INPUT
, 80, "nd_ns",
4876 "put_nd(inport, ip6.src, nd.sll);");
4878 /* Pass other traffic not already handled to the next table for
4880 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_IP_INPUT
, 0, "1", "next;");
4883 /* Logical router ingress table 1: IP Input for IPv4. */
4884 HMAP_FOR_EACH (op
, key_node
, ports
) {
4890 /* No ingress packets are accepted on a chassisredirect
4891 * port, so no need to program flows for that port. */
4895 if (op
->lrp_networks
.n_ipv4_addrs
) {
4896 /* L3 admission control: drop packets that originate from an
4897 * IPv4 address owned by the router or a broadcast address
4898 * known to the router (priority 100). */
4900 ds_put_cstr(&match
, "ip4.src == ");
4901 op_put_v4_networks(&match
, op
, true);
4902 ds_put_cstr(&match
, " && "REGBIT_EGRESS_LOOPBACK
" == 0");
4903 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_IP_INPUT
, 100,
4904 ds_cstr(&match
), "drop;");
4906 /* ICMP echo reply. These flows reply to ICMP echo requests
4907 * received for the router's IP address. Since packets only
4908 * get here as part of the logical router datapath, the inport
4909 * (i.e. the incoming locally attached net) does not matter.
4910 * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */
4912 ds_put_cstr(&match
, "ip4.dst == ");
4913 op_put_v4_networks(&match
, op
, false);
4914 ds_put_cstr(&match
, " && icmp4.type == 8 && icmp4.code == 0");
4917 ds_put_format(&actions
,
4918 "ip4.dst <-> ip4.src; "
4921 "flags.loopback = 1; "
4923 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_IP_INPUT
, 90,
4924 ds_cstr(&match
), ds_cstr(&actions
));
4927 /* ICMP time exceeded */
4928 for (int i
= 0; i
< op
->lrp_networks
.n_ipv4_addrs
; i
++) {
4932 ds_put_format(&match
,
4933 "inport == %s && ip4 && "
4934 "ip.ttl == {0, 1} && !ip.later_frag", op
->json_key
);
4935 ds_put_format(&actions
,
4937 "eth.dst <-> eth.src; "
4938 "icmp4.type = 11; /* Time exceeded */ "
4939 "icmp4.code = 0; /* TTL exceeded in transit */ "
4940 "ip4.dst = ip4.src; "
4944 op
->lrp_networks
.ipv4_addrs
[i
].addr_s
);
4945 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_IP_INPUT
, 40,
4946 ds_cstr(&match
), ds_cstr(&actions
));
4949 /* ARP reply. These flows reply to ARP requests for the router's own
4951 for (int i
= 0; i
< op
->lrp_networks
.n_ipv4_addrs
; i
++) {
4953 ds_put_format(&match
,
4954 "inport == %s && arp.tpa == %s && arp.op == 1",
4955 op
->json_key
, op
->lrp_networks
.ipv4_addrs
[i
].addr_s
);
4956 if (op
->od
->l3dgw_port
&& op
== op
->od
->l3dgw_port
4957 && op
->od
->l3redirect_port
) {
4958 /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
4959 * should only be sent from the "redirect-chassis", so that
4960 * upstream MAC learning points to the "redirect-chassis".
4961 * Also need to avoid generation of multiple ARP responses
4962 * from different chassis. */
4963 ds_put_format(&match
, " && is_chassis_resident(%s)",
4964 op
->od
->l3redirect_port
->json_key
);
4968 ds_put_format(&actions
,
4969 "eth.dst = eth.src; "
4971 "arp.op = 2; /* ARP reply */ "
4972 "arp.tha = arp.sha; "
4974 "arp.tpa = arp.spa; "
4977 "flags.loopback = 1; "
4979 op
->lrp_networks
.ea_s
,
4980 op
->lrp_networks
.ea_s
,
4981 op
->lrp_networks
.ipv4_addrs
[i
].addr_s
,
4983 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_IP_INPUT
, 90,
4984 ds_cstr(&match
), ds_cstr(&actions
));
4987 /* A set to hold all load-balancer vips that need ARP responses. */
4988 struct sset all_ips
= SSET_INITIALIZER(&all_ips
);
4990 get_router_load_balancer_ips(op
->od
, &all_ips
, &addr_family
);
4992 const char *ip_address
;
4993 SSET_FOR_EACH(ip_address
, &all_ips
) {
4995 if (addr_family
== AF_INET
) {
4996 ds_put_format(&match
,
4997 "inport == %s && arp.tpa == %s && arp.op == 1",
4998 op
->json_key
, ip_address
);
5000 ds_put_format(&match
,
5001 "inport == %s && nd_ns && nd.target == %s",
5002 op
->json_key
, ip_address
);
5006 if (addr_family
== AF_INET
) {
5007 ds_put_format(&actions
,
5008 "eth.dst = eth.src; "
5010 "arp.op = 2; /* ARP reply */ "
5011 "arp.tha = arp.sha; "
5013 "arp.tpa = arp.spa; "
5016 "flags.loopback = 1; "
5018 op
->lrp_networks
.ea_s
,
5019 op
->lrp_networks
.ea_s
,
5023 ds_put_format(&actions
,
5029 "outport = inport; "
5030 "flags.loopback = 1; "
5033 op
->lrp_networks
.ea_s
,
5036 op
->lrp_networks
.ea_s
);
5038 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_IP_INPUT
, 90,
5039 ds_cstr(&match
), ds_cstr(&actions
));
5042 sset_destroy(&all_ips
);
5044 /* A gateway router can have 2 SNAT IP addresses to force DNATed and
5045 * LBed traffic respectively to be SNATed. In addition, there can be
5046 * a number of SNAT rules in the NAT table. */
5047 ovs_be32
*snat_ips
= xmalloc(sizeof *snat_ips
*
5048 (op
->od
->nbr
->n_nat
+ 2));
5049 size_t n_snat_ips
= 0;
5052 const char *dnat_force_snat_ip
= get_force_snat_ip(op
->od
, "dnat",
5054 if (dnat_force_snat_ip
) {
5055 snat_ips
[n_snat_ips
++] = snat_ip
;
5058 const char *lb_force_snat_ip
= get_force_snat_ip(op
->od
, "lb",
5060 if (lb_force_snat_ip
) {
5061 snat_ips
[n_snat_ips
++] = snat_ip
;
5064 for (int i
= 0; i
< op
->od
->nbr
->n_nat
; i
++) {
5065 const struct nbrec_nat
*nat
;
5067 nat
= op
->od
->nbr
->nat
[i
];
5070 if (!ip_parse(nat
->external_ip
, &ip
) || !ip
) {
5071 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
5072 VLOG_WARN_RL(&rl
, "bad ip address %s in nat configuration "
5073 "for router %s", nat
->external_ip
, op
->key
);
5077 if (!strcmp(nat
->type
, "snat")) {
5078 snat_ips
[n_snat_ips
++] = ip
;
5082 /* ARP handling for external IP addresses.
5084 * DNAT IP addresses are external IP addresses that need ARP
5087 ds_put_format(&match
,
5088 "inport == %s && arp.tpa == "IP_FMT
" && arp.op == 1",
5089 op
->json_key
, IP_ARGS(ip
));
5092 ds_put_format(&actions
,
5093 "eth.dst = eth.src; "
5094 "arp.op = 2; /* ARP reply */ "
5095 "arp.tha = arp.sha; ");
5097 if (op
->od
->l3dgw_port
&& op
== op
->od
->l3dgw_port
) {
5098 struct eth_addr mac
;
5099 if (nat
->external_mac
&&
5100 eth_addr_from_string(nat
->external_mac
, &mac
)
5101 && nat
->logical_port
) {
5102 /* distributed NAT case, use nat->external_mac */
5103 ds_put_format(&actions
,
5104 "eth.src = "ETH_ADDR_FMT
"; "
5105 "arp.sha = "ETH_ADDR_FMT
"; ",
5107 ETH_ADDR_ARGS(mac
));
5108 /* Traffic with eth.src = nat->external_mac should only be
5109 * sent from the chassis where nat->logical_port is
5110 * resident, so that upstream MAC learning points to the
5111 * correct chassis. Also need to avoid generation of
5112 * multiple ARP responses from different chassis. */
5113 ds_put_format(&match
, " && is_chassis_resident(\"%s\")",
5116 ds_put_format(&actions
,
5119 op
->lrp_networks
.ea_s
,
5120 op
->lrp_networks
.ea_s
);
5121 /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
5122 * should only be sent from the "redirect-chassis", so that
5123 * upstream MAC learning points to the "redirect-chassis".
5124 * Also need to avoid generation of multiple ARP responses
5125 * from different chassis. */
5126 if (op
->od
->l3redirect_port
) {
5127 ds_put_format(&match
, " && is_chassis_resident(%s)",
5128 op
->od
->l3redirect_port
->json_key
);
5132 ds_put_format(&actions
,
5135 op
->lrp_networks
.ea_s
,
5136 op
->lrp_networks
.ea_s
);
5138 ds_put_format(&actions
,
5139 "arp.tpa = arp.spa; "
5140 "arp.spa = "IP_FMT
"; "
5142 "flags.loopback = 1; "
5146 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_IP_INPUT
, 90,
5147 ds_cstr(&match
), ds_cstr(&actions
));
5150 if (!smap_get(&op
->od
->nbr
->options
, "chassis")
5151 && !op
->od
->l3dgw_port
) {
5152 /* UDP/TCP port unreachable. */
5153 for (int i
= 0; i
< op
->lrp_networks
.n_ipv4_addrs
; i
++) {
5155 ds_put_format(&match
,
5156 "ip4 && ip4.dst == %s && !ip.later_frag && udp",
5157 op
->lrp_networks
.ipv4_addrs
[i
].addr_s
);
5158 const char *action
= "icmp4 {"
5159 "eth.dst <-> eth.src; "
5160 "ip4.dst <-> ip4.src; "
5165 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_IP_INPUT
, 80,
5166 ds_cstr(&match
), action
);
5169 ds_put_format(&match
,
5170 "ip4 && ip4.dst == %s && !ip.later_frag && tcp",
5171 op
->lrp_networks
.ipv4_addrs
[i
].addr_s
);
5172 action
= "tcp_reset {"
5173 "eth.dst <-> eth.src; "
5174 "ip4.dst <-> ip4.src; "
5176 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_IP_INPUT
, 80,
5177 ds_cstr(&match
), action
);
5180 ds_put_format(&match
,
5181 "ip4 && ip4.dst == %s && !ip.later_frag",
5182 op
->lrp_networks
.ipv4_addrs
[i
].addr_s
);
5184 "eth.dst <-> eth.src; "
5185 "ip4.dst <-> ip4.src; "
5190 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_IP_INPUT
, 70,
5191 ds_cstr(&match
), action
);
5196 ds_put_cstr(&match
, "ip4.dst == {");
5197 bool has_drop_ips
= false;
5198 for (int i
= 0; i
< op
->lrp_networks
.n_ipv4_addrs
; i
++) {
5199 bool snat_ip_is_router_ip
= false;
5200 for (int j
= 0; j
< n_snat_ips
; j
++) {
5201 /* Packets to SNAT IPs should not be dropped. */
5202 if (op
->lrp_networks
.ipv4_addrs
[i
].addr
== snat_ips
[j
]) {
5203 snat_ip_is_router_ip
= true;
5207 if (snat_ip_is_router_ip
) {
5210 ds_put_format(&match
, "%s, ",
5211 op
->lrp_networks
.ipv4_addrs
[i
].addr_s
);
5212 has_drop_ips
= true;
5214 ds_chomp(&match
, ' ');
5215 ds_chomp(&match
, ',');
5216 ds_put_cstr(&match
, "}");
5219 /* Drop IP traffic to this router. */
5220 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_IP_INPUT
, 60,
5221 ds_cstr(&match
), "drop;");
5227 /* Logical router ingress table 1: IP Input for IPv6. */
5228 HMAP_FOR_EACH (op
, key_node
, ports
) {
5234 /* No ingress packets are accepted on a chassisredirect
5235 * port, so no need to program flows for that port. */
5239 if (op
->lrp_networks
.n_ipv6_addrs
) {
5240 /* L3 admission control: drop packets that originate from an
5241 * IPv6 address owned by the router (priority 100). */
5243 ds_put_cstr(&match
, "ip6.src == ");
5244 op_put_v6_networks(&match
, op
);
5245 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_IP_INPUT
, 100,
5246 ds_cstr(&match
), "drop;");
5248 /* ICMPv6 echo reply. These flows reply to echo requests
5249 * received for the router's IP address. */
5251 ds_put_cstr(&match
, "ip6.dst == ");
5252 op_put_v6_networks(&match
, op
);
5253 ds_put_cstr(&match
, " && icmp6.type == 128 && icmp6.code == 0");
5256 ds_put_cstr(&actions
,
5257 "ip6.dst <-> ip6.src; "
5259 "icmp6.type = 129; "
5260 "flags.loopback = 1; "
5262 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_IP_INPUT
, 90,
5263 ds_cstr(&match
), ds_cstr(&actions
));
5265 /* Drop IPv6 traffic to this router. */
5267 ds_put_cstr(&match
, "ip6.dst == ");
5268 op_put_v6_networks(&match
, op
);
5269 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_IP_INPUT
, 60,
5270 ds_cstr(&match
), "drop;");
5273 /* ND reply. These flows reply to ND solicitations for the
5274 * router's own IP address. */
5275 for (int i
= 0; i
< op
->lrp_networks
.n_ipv6_addrs
; i
++) {
5277 ds_put_format(&match
,
5278 "inport == %s && nd_ns && ip6.dst == {%s, %s} "
5279 "&& nd.target == %s",
5281 op
->lrp_networks
.ipv6_addrs
[i
].addr_s
,
5282 op
->lrp_networks
.ipv6_addrs
[i
].sn_addr_s
,
5283 op
->lrp_networks
.ipv6_addrs
[i
].addr_s
);
5284 if (op
->od
->l3dgw_port
&& op
== op
->od
->l3dgw_port
5285 && op
->od
->l3redirect_port
) {
5286 /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
5287 * should only be sent from the "redirect-chassis", so that
5288 * upstream MAC learning points to the "redirect-chassis".
5289 * Also need to avoid generation of multiple ND replies
5290 * from different chassis. */
5291 ds_put_format(&match
, " && is_chassis_resident(%s)",
5292 op
->od
->l3redirect_port
->json_key
);
5296 ds_put_format(&actions
,
5297 "put_nd(inport, ip6.src, nd.sll); "
5303 "outport = inport; "
5304 "flags.loopback = 1; "
5307 op
->lrp_networks
.ea_s
,
5308 op
->lrp_networks
.ipv6_addrs
[i
].addr_s
,
5309 op
->lrp_networks
.ipv6_addrs
[i
].addr_s
,
5310 op
->lrp_networks
.ea_s
);
5311 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_IP_INPUT
, 90,
5312 ds_cstr(&match
), ds_cstr(&actions
));
5315 /* TCP port unreachable */
5316 if (!smap_get(&op
->od
->nbr
->options
, "chassis")
5317 && !op
->od
->l3dgw_port
) {
5318 for (int i
= 0; i
< op
->lrp_networks
.n_ipv6_addrs
; i
++) {
5320 ds_put_format(&match
,
5321 "ip6 && ip6.dst == %s && !ip.later_frag && tcp",
5322 op
->lrp_networks
.ipv6_addrs
[i
].addr_s
);
5323 const char *action
= "tcp_reset {"
5324 "eth.dst <-> eth.src; "
5325 "ip6.dst <-> ip6.src; "
5327 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_IP_INPUT
, 80,
5328 ds_cstr(&match
), action
);
5333 /* NAT, Defrag and load balancing. */
5334 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
5339 /* Packets are allowed by default. */
5340 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_DEFRAG
, 0, "1", "next;");
5341 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_UNSNAT
, 0, "1", "next;");
5342 ovn_lflow_add(lflows
, od
, S_ROUTER_OUT_SNAT
, 0, "1", "next;");
5343 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_DNAT
, 0, "1", "next;");
5344 ovn_lflow_add(lflows
, od
, S_ROUTER_OUT_UNDNAT
, 0, "1", "next;");
5345 ovn_lflow_add(lflows
, od
, S_ROUTER_OUT_EGR_LOOP
, 0, "1", "next;");
5347 /* NAT rules are only valid on Gateway routers and routers with
5348 * l3dgw_port (router has a port with "redirect-chassis"
5350 if (!smap_get(&od
->nbr
->options
, "chassis") && !od
->l3dgw_port
) {
5355 const char *dnat_force_snat_ip
= get_force_snat_ip(od
, "dnat",
5357 const char *lb_force_snat_ip
= get_force_snat_ip(od
, "lb",
5360 for (int i
= 0; i
< od
->nbr
->n_nat
; i
++) {
5361 const struct nbrec_nat
*nat
;
5363 nat
= od
->nbr
->nat
[i
];
5367 char *error
= ip_parse_masked(nat
->external_ip
, &ip
, &mask
);
5368 if (error
|| mask
!= OVS_BE32_MAX
) {
5369 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
5370 VLOG_WARN_RL(&rl
, "bad external ip %s for nat",
5376 /* Check the validity of nat->logical_ip. 'logical_ip' can
5377 * be a subnet when the type is "snat". */
5378 error
= ip_parse_masked(nat
->logical_ip
, &ip
, &mask
);
5379 if (!strcmp(nat
->type
, "snat")) {
5381 static struct vlog_rate_limit rl
=
5382 VLOG_RATE_LIMIT_INIT(5, 1);
5383 VLOG_WARN_RL(&rl
, "bad ip network or ip %s for snat "
5384 "in router "UUID_FMT
"",
5385 nat
->logical_ip
, UUID_ARGS(&od
->key
));
5390 if (error
|| mask
!= OVS_BE32_MAX
) {
5391 static struct vlog_rate_limit rl
=
5392 VLOG_RATE_LIMIT_INIT(5, 1);
5393 VLOG_WARN_RL(&rl
, "bad ip %s for dnat in router "
5394 ""UUID_FMT
"", nat
->logical_ip
, UUID_ARGS(&od
->key
));
5400 /* For distributed router NAT, determine whether this NAT rule
5401 * satisfies the conditions for distributed NAT processing. */
5402 bool distributed
= false;
5403 struct eth_addr mac
;
5404 if (od
->l3dgw_port
&& !strcmp(nat
->type
, "dnat_and_snat") &&
5405 nat
->logical_port
&& nat
->external_mac
) {
5406 if (eth_addr_from_string(nat
->external_mac
, &mac
)) {
5409 static struct vlog_rate_limit rl
=
5410 VLOG_RATE_LIMIT_INIT(5, 1);
5411 VLOG_WARN_RL(&rl
, "bad mac %s for dnat in router "
5412 ""UUID_FMT
"", nat
->external_mac
, UUID_ARGS(&od
->key
));
5417 /* Ingress UNSNAT table: It is for already established connections'
5418 * reverse traffic. i.e., SNAT has already been done in egress
5419 * pipeline and now the packet has entered the ingress pipeline as
5420 * part of a reply. We undo the SNAT here.
5422 * Undoing SNAT has to happen before DNAT processing. This is
5423 * because when the packet was DNATed in ingress pipeline, it did
5424 * not know about the possibility of eventual additional SNAT in
5425 * egress pipeline. */
5426 if (!strcmp(nat
->type
, "snat")
5427 || !strcmp(nat
->type
, "dnat_and_snat")) {
5428 if (!od
->l3dgw_port
) {
5429 /* Gateway router. */
5431 ds_put_format(&match
, "ip && ip4.dst == %s",
5433 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_UNSNAT
, 90,
5434 ds_cstr(&match
), "ct_snat;");
5436 /* Distributed router. */
5438 /* Traffic received on l3dgw_port is subject to NAT. */
5440 ds_put_format(&match
, "ip && ip4.dst == %s"
5443 od
->l3dgw_port
->json_key
);
5444 if (!distributed
&& od
->l3redirect_port
) {
5445 /* Flows for NAT rules that are centralized are only
5446 * programmed on the "redirect-chassis". */
5447 ds_put_format(&match
, " && is_chassis_resident(%s)",
5448 od
->l3redirect_port
->json_key
);
5450 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_UNSNAT
, 100,
5451 ds_cstr(&match
), "ct_snat;");
5453 /* Traffic received on other router ports must be
5454 * redirected to the central instance of the l3dgw_port
5455 * for NAT processing. */
5457 ds_put_format(&match
, "ip && ip4.dst == %s",
5459 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_UNSNAT
, 50,
5461 REGBIT_NAT_REDIRECT
" = 1; next;");
5465 /* Ingress DNAT table: Packets enter the pipeline with destination
5466 * IP address that needs to be DNATted from a external IP address
5467 * to a logical IP address. */
5468 if (!strcmp(nat
->type
, "dnat")
5469 || !strcmp(nat
->type
, "dnat_and_snat")) {
5470 if (!od
->l3dgw_port
) {
5471 /* Gateway router. */
5472 /* Packet when it goes from the initiator to destination.
5473 * We need to set flags.loopback because the router can
5474 * send the packet back through the same interface. */
5476 ds_put_format(&match
, "ip && ip4.dst == %s",
5479 if (dnat_force_snat_ip
) {
5480 /* Indicate to the future tables that a DNAT has taken
5481 * place and a force SNAT needs to be done in the
5482 * Egress SNAT table. */
5483 ds_put_format(&actions
,
5484 "flags.force_snat_for_dnat = 1; ");
5486 ds_put_format(&actions
, "flags.loopback = 1; ct_dnat(%s);",
5488 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_DNAT
, 100,
5489 ds_cstr(&match
), ds_cstr(&actions
));
5491 /* Distributed router. */
5493 /* Traffic received on l3dgw_port is subject to NAT. */
5495 ds_put_format(&match
, "ip && ip4.dst == %s"
5498 od
->l3dgw_port
->json_key
);
5499 if (!distributed
&& od
->l3redirect_port
) {
5500 /* Flows for NAT rules that are centralized are only
5501 * programmed on the "redirect-chassis". */
5502 ds_put_format(&match
, " && is_chassis_resident(%s)",
5503 od
->l3redirect_port
->json_key
);
5506 ds_put_format(&actions
, "ct_dnat(%s);",
5508 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_DNAT
, 100,
5509 ds_cstr(&match
), ds_cstr(&actions
));
5511 /* Traffic received on other router ports must be
5512 * redirected to the central instance of the l3dgw_port
5513 * for NAT processing. */
5515 ds_put_format(&match
, "ip && ip4.dst == %s",
5517 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_DNAT
, 50,
5519 REGBIT_NAT_REDIRECT
" = 1; next;");
5523 /* Egress UNDNAT table: It is for already established connections'
5524 * reverse traffic. i.e., DNAT has already been done in ingress
5525 * pipeline and now the packet has entered the egress pipeline as
5526 * part of a reply. We undo the DNAT here.
5528 * Note that this only applies for NAT on a distributed router.
5529 * Undo DNAT on a gateway router is done in the ingress DNAT
5530 * pipeline stage. */
5531 if (od
->l3dgw_port
&& (!strcmp(nat
->type
, "dnat")
5532 || !strcmp(nat
->type
, "dnat_and_snat"))) {
5534 ds_put_format(&match
, "ip && ip4.src == %s"
5535 " && outport == %s",
5537 od
->l3dgw_port
->json_key
);
5538 if (!distributed
&& od
->l3redirect_port
) {
5539 /* Flows for NAT rules that are centralized are only
5540 * programmed on the "redirect-chassis". */
5541 ds_put_format(&match
, " && is_chassis_resident(%s)",
5542 od
->l3redirect_port
->json_key
);
5546 ds_put_format(&actions
, "eth.src = "ETH_ADDR_FMT
"; ",
5547 ETH_ADDR_ARGS(mac
));
5549 ds_put_format(&actions
, "ct_dnat;");
5550 ovn_lflow_add(lflows
, od
, S_ROUTER_OUT_UNDNAT
, 100,
5551 ds_cstr(&match
), ds_cstr(&actions
));
5554 /* Egress SNAT table: Packets enter the egress pipeline with
5555 * source ip address that needs to be SNATted to a external ip
5557 if (!strcmp(nat
->type
, "snat")
5558 || !strcmp(nat
->type
, "dnat_and_snat")) {
5559 if (!od
->l3dgw_port
) {
5560 /* Gateway router. */
5562 ds_put_format(&match
, "ip && ip4.src == %s",
5565 ds_put_format(&actions
, "ct_snat(%s);", nat
->external_ip
);
5567 /* The priority here is calculated such that the
5568 * nat->logical_ip with the longest mask gets a higher
5570 ovn_lflow_add(lflows
, od
, S_ROUTER_OUT_SNAT
,
5571 count_1bits(ntohl(mask
)) + 1,
5572 ds_cstr(&match
), ds_cstr(&actions
));
5574 /* Distributed router. */
5576 ds_put_format(&match
, "ip && ip4.src == %s"
5577 " && outport == %s",
5579 od
->l3dgw_port
->json_key
);
5580 if (!distributed
&& od
->l3redirect_port
) {
5581 /* Flows for NAT rules that are centralized are only
5582 * programmed on the "redirect-chassis". */
5583 ds_put_format(&match
, " && is_chassis_resident(%s)",
5584 od
->l3redirect_port
->json_key
);
5588 ds_put_format(&actions
, "eth.src = "ETH_ADDR_FMT
"; ",
5589 ETH_ADDR_ARGS(mac
));
5591 ds_put_format(&actions
, "ct_snat(%s);", nat
->external_ip
);
5593 /* The priority here is calculated such that the
5594 * nat->logical_ip with the longest mask gets a higher
5596 ovn_lflow_add(lflows
, od
, S_ROUTER_OUT_SNAT
,
5597 count_1bits(ntohl(mask
)) + 1,
5598 ds_cstr(&match
), ds_cstr(&actions
));
5602 /* Logical router ingress table 0:
5603 * For NAT on a distributed router, add rules allowing
5604 * ingress traffic with eth.dst matching nat->external_mac
5605 * on the l3dgw_port instance where nat->logical_port is
5609 ds_put_format(&match
,
5610 "eth.dst == "ETH_ADDR_FMT
" && inport == %s"
5611 " && is_chassis_resident(\"%s\")",
5613 od
->l3dgw_port
->json_key
,
5615 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_ADMISSION
, 50,
5616 ds_cstr(&match
), "next;");
5619 /* Ingress Gateway Redirect Table: For NAT on a distributed
5620 * router, add flows that are specific to a NAT rule. These
5621 * flows indicate the presence of an applicable NAT rule that
5622 * can be applied in a distributed manner. */
5625 ds_put_format(&match
, "ip4.src == %s && outport == %s",
5627 od
->l3dgw_port
->json_key
);
5628 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_GW_REDIRECT
, 100,
5629 ds_cstr(&match
), "next;");
5632 /* Egress Loopback table: For NAT on a distributed router.
5633 * If packets in the egress pipeline on the distributed
5634 * gateway port have ip.dst matching a NAT external IP, then
5635 * loop a clone of the packet back to the beginning of the
5636 * ingress pipeline with inport = outport. */
5637 if (od
->l3dgw_port
) {
5638 /* Distributed router. */
5640 ds_put_format(&match
, "ip4.dst == %s && outport == %s",
5642 od
->l3dgw_port
->json_key
);
5644 ds_put_format(&actions
,
5645 "clone { ct_clear; "
5646 "inport = outport; outport = \"\"; "
5647 "flags = 0; flags.loopback = 1; ");
5648 for (int j
= 0; j
< MFF_N_LOG_REGS
; j
++) {
5649 ds_put_format(&actions
, "reg%d = 0; ", j
);
5651 ds_put_format(&actions
, REGBIT_EGRESS_LOOPBACK
" = 1; "
5652 "next(pipeline=ingress, table=0); };");
5653 ovn_lflow_add(lflows
, od
, S_ROUTER_OUT_EGR_LOOP
, 100,
5654 ds_cstr(&match
), ds_cstr(&actions
));
5658 /* Handle force SNAT options set in the gateway router. */
5659 if (dnat_force_snat_ip
&& !od
->l3dgw_port
) {
5660 /* If a packet with destination IP address as that of the
5661 * gateway router (as set in options:dnat_force_snat_ip) is seen,
5664 ds_put_format(&match
, "ip && ip4.dst == %s", dnat_force_snat_ip
);
5665 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_UNSNAT
, 110,
5666 ds_cstr(&match
), "ct_snat;");
5668 /* Higher priority rules to force SNAT with the IP addresses
5669 * configured in the Gateway router. This only takes effect
5670 * when the packet has already been DNATed once. */
5672 ds_put_format(&match
, "flags.force_snat_for_dnat == 1 && ip");
5674 ds_put_format(&actions
, "ct_snat(%s);", dnat_force_snat_ip
);
5675 ovn_lflow_add(lflows
, od
, S_ROUTER_OUT_SNAT
, 100,
5676 ds_cstr(&match
), ds_cstr(&actions
));
5678 if (lb_force_snat_ip
&& !od
->l3dgw_port
) {
5679 /* If a packet with destination IP address as that of the
5680 * gateway router (as set in options:lb_force_snat_ip) is seen,
5683 ds_put_format(&match
, "ip && ip4.dst == %s", lb_force_snat_ip
);
5684 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_UNSNAT
, 100,
5685 ds_cstr(&match
), "ct_snat;");
5687 /* Load balanced traffic will have flags.force_snat_for_lb set.
5690 ds_put_format(&match
, "flags.force_snat_for_lb == 1 && ip");
5692 ds_put_format(&actions
, "ct_snat(%s);", lb_force_snat_ip
);
5693 ovn_lflow_add(lflows
, od
, S_ROUTER_OUT_SNAT
, 100,
5694 ds_cstr(&match
), ds_cstr(&actions
));
5697 if (!od
->l3dgw_port
) {
5698 /* For gateway router, re-circulate every packet through
5699 * the DNAT zone. This helps with the following.
5701 * Any packet that needs to be unDNATed in the reverse
5702 * direction gets unDNATed. Ideally this could be done in
5703 * the egress pipeline. But since the gateway router
5704 * does not have any feature that depends on the source
5705 * ip address being external IP address for IP routing,
5706 * we can do it here, saving a future re-circulation. */
5707 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_DNAT
, 50,
5708 "ip", "flags.loopback = 1; ct_dnat;");
5710 /* For NAT on a distributed router, add flows to Ingress
5711 * IP Routing table, Ingress ARP Resolution table, and
5712 * Ingress Gateway Redirect Table that are not specific to a
5715 /* The highest priority IN_IP_ROUTING rule matches packets
5716 * with REGBIT_NAT_REDIRECT (set in DNAT or UNSNAT stages),
5717 * with action "ip.ttl--; next;". The IN_GW_REDIRECT table
5718 * will take care of setting the outport. */
5719 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_IP_ROUTING
, 300,
5720 REGBIT_NAT_REDIRECT
" == 1", "ip.ttl--; next;");
5722 /* The highest priority IN_ARP_RESOLVE rule matches packets
5723 * with REGBIT_NAT_REDIRECT (set in DNAT or UNSNAT stages),
5724 * then sets eth.dst to the distributed gateway port's
5725 * ethernet address. */
5727 ds_put_format(&actions
, "eth.dst = %s; next;",
5728 od
->l3dgw_port
->lrp_networks
.ea_s
);
5729 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_ARP_RESOLVE
, 200,
5730 REGBIT_NAT_REDIRECT
" == 1", ds_cstr(&actions
));
5732 /* The highest priority IN_GW_REDIRECT rule redirects packets
5733 * with REGBIT_NAT_REDIRECT (set in DNAT or UNSNAT stages) to
5734 * the central instance of the l3dgw_port for NAT processing. */
5736 ds_put_format(&actions
, "outport = %s; next;",
5737 od
->l3redirect_port
->json_key
);
5738 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_GW_REDIRECT
, 200,
5739 REGBIT_NAT_REDIRECT
" == 1", ds_cstr(&actions
));
5742 /* Load balancing and packet defrag are only valid on
5743 * Gateway routers or router with gateway port. */
5744 if (!smap_get(&od
->nbr
->options
, "chassis") && !od
->l3dgw_port
) {
5748 /* A set to hold all ips that need defragmentation and tracking. */
5749 struct sset all_ips
= SSET_INITIALIZER(&all_ips
);
5751 for (int i
= 0; i
< od
->nbr
->n_load_balancer
; i
++) {
5752 struct nbrec_load_balancer
*lb
= od
->nbr
->load_balancer
[i
];
5753 struct smap
*vips
= &lb
->vips
;
5754 struct smap_node
*node
;
5756 SMAP_FOR_EACH (node
, vips
) {
5760 /* node->key contains IP:port or just IP. */
5761 char *ip_address
= NULL
;
5762 ip_address_and_port_from_lb_key(node
->key
, &ip_address
, &port
,
5768 if (!sset_contains(&all_ips
, ip_address
)) {
5769 sset_add(&all_ips
, ip_address
);
5770 /* If there are any load balancing rules, we should send
5771 * the packet to conntrack for defragmentation and
5772 * tracking. This helps with two things.
5774 * 1. With tracking, we can send only new connections to
5775 * pick a DNAT ip address from a group.
5776 * 2. If there are L4 ports in load balancing rules, we
5777 * need the defragmentation to match on L4 ports. */
5779 if (addr_family
== AF_INET
) {
5780 ds_put_format(&match
, "ip && ip4.dst == %s",
5783 ds_put_format(&match
, "ip && ip6.dst == %s",
5786 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_DEFRAG
,
5787 100, ds_cstr(&match
), "ct_next;");
5790 /* Higher priority rules are added for load-balancing in DNAT
5791 * table. For every match (on a VIP[:port]), we add two flows
5792 * via add_router_lb_flow(). One flow is for specific matching
5793 * on ct.new with an action of "ct_lb($targets);". The other
5794 * flow is for ct.est with an action of "ct_dnat;". */
5796 ds_put_format(&actions
, "ct_lb(%s);", node
->value
);
5799 if (addr_family
== AF_INET
) {
5800 ds_put_format(&match
, "ip && ip4.dst == %s",
5803 ds_put_format(&match
, "ip && ip6.dst == %s",
5809 bool is_udp
= lb
->protocol
&& !strcmp(lb
->protocol
, "udp") ?
5813 ds_put_format(&match
, " && udp && udp.dst == %d",
5816 ds_put_format(&match
, " && tcp && tcp.dst == %d",
5822 if (od
->l3redirect_port
) {
5823 ds_put_format(&match
, " && is_chassis_resident(%s)",
5824 od
->l3redirect_port
->json_key
);
5826 add_router_lb_flow(lflows
, od
, &match
, &actions
, prio
,
5827 lb_force_snat_ip
, node
->value
, is_udp
,
5831 sset_destroy(&all_ips
);
5834 /* Logical router ingress table 5 and 6: IPv6 Router Adv (RA) options and
5836 HMAP_FOR_EACH (op
, key_node
, ports
) {
5837 if (!op
->nbrp
|| op
->nbrp
->peer
|| !op
->peer
) {
5841 if (!op
->lrp_networks
.n_ipv6_addrs
) {
5845 const char *address_mode
= smap_get(
5846 &op
->nbrp
->ipv6_ra_configs
, "address_mode");
5848 if (!address_mode
) {
5851 if (strcmp(address_mode
, "slaac") &&
5852 strcmp(address_mode
, "dhcpv6_stateful") &&
5853 strcmp(address_mode
, "dhcpv6_stateless")) {
5854 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
5855 VLOG_WARN_RL(&rl
, "Invalid address mode [%s] defined",
5860 if (smap_get_bool(&op
->nbrp
->ipv6_ra_configs
, "send_periodic",
5862 copy_ra_to_sb(op
, address_mode
);
5866 ds_put_format(&match
, "inport == %s && ip6.dst == ff02::2 && nd_rs",
5870 const char *mtu_s
= smap_get(
5871 &op
->nbrp
->ipv6_ra_configs
, "mtu");
5873 /* As per RFC 2460, 1280 is minimum IPv6 MTU. */
5874 uint32_t mtu
= (mtu_s
&& atoi(mtu_s
) >= 1280) ? atoi(mtu_s
) : 0;
5876 ds_put_format(&actions
, REGBIT_ND_RA_OPTS_RESULT
" = put_nd_ra_opts("
5877 "addr_mode = \"%s\", slla = %s",
5878 address_mode
, op
->lrp_networks
.ea_s
);
5880 ds_put_format(&actions
, ", mtu = %u", mtu
);
5883 bool add_rs_response_flow
= false;
5885 for (size_t i
= 0; i
< op
->lrp_networks
.n_ipv6_addrs
; i
++) {
5886 if (in6_is_lla(&op
->lrp_networks
.ipv6_addrs
[i
].network
)) {
5890 /* Add the prefix option if the address mode is slaac or
5891 * dhcpv6_stateless. */
5892 if (strcmp(address_mode
, "dhcpv6_stateful")) {
5893 ds_put_format(&actions
, ", prefix = %s/%u",
5894 op
->lrp_networks
.ipv6_addrs
[i
].network_s
,
5895 op
->lrp_networks
.ipv6_addrs
[i
].plen
);
5897 add_rs_response_flow
= true;
5900 if (add_rs_response_flow
) {
5901 ds_put_cstr(&actions
, "); next;");
5902 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_ND_RA_OPTIONS
, 50,
5903 ds_cstr(&match
), ds_cstr(&actions
));
5906 ds_put_format(&match
, "inport == %s && ip6.dst == ff02::2 && "
5907 "nd_ra && "REGBIT_ND_RA_OPTS_RESULT
, op
->json_key
);
5909 char ip6_str
[INET6_ADDRSTRLEN
+ 1];
5910 struct in6_addr lla
;
5911 in6_generate_lla(op
->lrp_networks
.ea
, &lla
);
5912 memset(ip6_str
, 0, sizeof(ip6_str
));
5913 ipv6_string_mapped(ip6_str
, &lla
);
5914 ds_put_format(&actions
, "eth.dst = eth.src; eth.src = %s; "
5915 "ip6.dst = ip6.src; ip6.src = %s; "
5916 "outport = inport; flags.loopback = 1; "
5918 op
->lrp_networks
.ea_s
, ip6_str
);
5919 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_ND_RA_RESPONSE
, 50,
5920 ds_cstr(&match
), ds_cstr(&actions
));
5924 /* Logical router ingress table 5, 6: RS responder, by default goto next.
5926 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
5931 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_ND_RA_OPTIONS
, 0, "1", "next;");
5932 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_ND_RA_RESPONSE
, 0, "1", "next;");
5935 /* Logical router ingress table 7: IP Routing.
5937 * A packet that arrives at this table is an IP packet that should be
5938 * routed to the address in 'ip[46].dst'. This table sets outport to
5939 * the correct output port, eth.src to the output port's MAC
5940 * address, and '[xx]reg0' to the next-hop IP address (leaving
5941 * 'ip[46].dst', the packet’s final destination, unchanged), and
5942 * advances to the next table for ARP/ND resolution. */
5943 HMAP_FOR_EACH (op
, key_node
, ports
) {
5948 for (int i
= 0; i
< op
->lrp_networks
.n_ipv4_addrs
; i
++) {
5949 add_route(lflows
, op
, op
->lrp_networks
.ipv4_addrs
[i
].addr_s
,
5950 op
->lrp_networks
.ipv4_addrs
[i
].network_s
,
5951 op
->lrp_networks
.ipv4_addrs
[i
].plen
, NULL
, NULL
);
5954 for (int i
= 0; i
< op
->lrp_networks
.n_ipv6_addrs
; i
++) {
5955 add_route(lflows
, op
, op
->lrp_networks
.ipv6_addrs
[i
].addr_s
,
5956 op
->lrp_networks
.ipv6_addrs
[i
].network_s
,
5957 op
->lrp_networks
.ipv6_addrs
[i
].plen
, NULL
, NULL
);
5961 /* Convert the static routes to flows. */
5962 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
5967 for (int i
= 0; i
< od
->nbr
->n_static_routes
; i
++) {
5968 const struct nbrec_logical_router_static_route
*route
;
5970 route
= od
->nbr
->static_routes
[i
];
5971 build_static_route_flow(lflows
, od
, ports
, route
);
5975 /* XXX destination unreachable */
5977 /* Local router ingress table 8: ARP Resolution.
5979 * Any packet that reaches this table is an IP packet whose next-hop IP
5980 * address is in reg0. (ip4.dst is the final destination.) This table
5981 * resolves the IP address in reg0 into an output port in outport and an
5982 * Ethernet address in eth.dst. */
5983 HMAP_FOR_EACH (op
, key_node
, ports
) {
5984 if (op
->nbsp
&& !lsp_is_enabled(op
->nbsp
)) {
5989 /* This is a logical router port. If next-hop IP address in
5990 * '[xx]reg0' matches IP address of this router port, then
5991 * the packet is intended to eventually be sent to this
5992 * logical port. Set the destination mac address using this
5993 * port's mac address.
5995 * The packet is still in peer's logical pipeline. So the match
5996 * should be on peer's outport. */
5997 if (op
->peer
&& op
->nbrp
->peer
) {
5998 if (op
->lrp_networks
.n_ipv4_addrs
) {
6000 ds_put_format(&match
, "outport == %s && reg0 == ",
6001 op
->peer
->json_key
);
6002 op_put_v4_networks(&match
, op
, false);
6005 ds_put_format(&actions
, "eth.dst = %s; next;",
6006 op
->lrp_networks
.ea_s
);
6007 ovn_lflow_add(lflows
, op
->peer
->od
, S_ROUTER_IN_ARP_RESOLVE
,
6008 100, ds_cstr(&match
), ds_cstr(&actions
));
6011 if (op
->lrp_networks
.n_ipv6_addrs
) {
6013 ds_put_format(&match
, "outport == %s && xxreg0 == ",
6014 op
->peer
->json_key
);
6015 op_put_v6_networks(&match
, op
);
6018 ds_put_format(&actions
, "eth.dst = %s; next;",
6019 op
->lrp_networks
.ea_s
);
6020 ovn_lflow_add(lflows
, op
->peer
->od
, S_ROUTER_IN_ARP_RESOLVE
,
6021 100, ds_cstr(&match
), ds_cstr(&actions
));
6024 } else if (op
->od
->n_router_ports
&& strcmp(op
->nbsp
->type
, "router")) {
6025 /* This is a logical switch port that backs a VM or a container.
6026 * Extract its addresses. For each of the address, go through all
6027 * the router ports attached to the switch (to which this port
6028 * connects) and if the address in question is reachable from the
6029 * router port, add an ARP/ND entry in that router's pipeline. */
6031 for (size_t i
= 0; i
< op
->n_lsp_addrs
; i
++) {
6032 const char *ea_s
= op
->lsp_addrs
[i
].ea_s
;
6033 for (size_t j
= 0; j
< op
->lsp_addrs
[i
].n_ipv4_addrs
; j
++) {
6034 const char *ip_s
= op
->lsp_addrs
[i
].ipv4_addrs
[j
].addr_s
;
6035 for (size_t k
= 0; k
< op
->od
->n_router_ports
; k
++) {
6036 /* Get the Logical_Router_Port that the
6037 * Logical_Switch_Port is connected to, as
6039 const char *peer_name
= smap_get(
6040 &op
->od
->router_ports
[k
]->nbsp
->options
,
6046 struct ovn_port
*peer
= ovn_port_find(ports
, peer_name
);
6047 if (!peer
|| !peer
->nbrp
) {
6051 if (!find_lrp_member_ip(peer
, ip_s
)) {
6056 ds_put_format(&match
, "outport == %s && reg0 == %s",
6057 peer
->json_key
, ip_s
);
6060 ds_put_format(&actions
, "eth.dst = %s; next;", ea_s
);
6061 ovn_lflow_add(lflows
, peer
->od
,
6062 S_ROUTER_IN_ARP_RESOLVE
, 100,
6063 ds_cstr(&match
), ds_cstr(&actions
));
6067 for (size_t j
= 0; j
< op
->lsp_addrs
[i
].n_ipv6_addrs
; j
++) {
6068 const char *ip_s
= op
->lsp_addrs
[i
].ipv6_addrs
[j
].addr_s
;
6069 for (size_t k
= 0; k
< op
->od
->n_router_ports
; k
++) {
6070 /* Get the Logical_Router_Port that the
6071 * Logical_Switch_Port is connected to, as
6073 const char *peer_name
= smap_get(
6074 &op
->od
->router_ports
[k
]->nbsp
->options
,
6080 struct ovn_port
*peer
= ovn_port_find(ports
, peer_name
);
6081 if (!peer
|| !peer
->nbrp
) {
6085 if (!find_lrp_member_ip(peer
, ip_s
)) {
6090 ds_put_format(&match
, "outport == %s && xxreg0 == %s",
6091 peer
->json_key
, ip_s
);
6094 ds_put_format(&actions
, "eth.dst = %s; next;", ea_s
);
6095 ovn_lflow_add(lflows
, peer
->od
,
6096 S_ROUTER_IN_ARP_RESOLVE
, 100,
6097 ds_cstr(&match
), ds_cstr(&actions
));
6101 } else if (!strcmp(op
->nbsp
->type
, "router")) {
6102 /* This is a logical switch port that connects to a router. */
6104 /* The peer of this switch port is the router port for which
6105 * we need to add logical flows such that it can resolve
6106 * ARP entries for all the other router ports connected to
6107 * the switch in question. */
6109 const char *peer_name
= smap_get(&op
->nbsp
->options
,
6115 struct ovn_port
*peer
= ovn_port_find(ports
, peer_name
);
6116 if (!peer
|| !peer
->nbrp
) {
6120 for (size_t i
= 0; i
< op
->od
->n_router_ports
; i
++) {
6121 const char *router_port_name
= smap_get(
6122 &op
->od
->router_ports
[i
]->nbsp
->options
,
6124 struct ovn_port
*router_port
= ovn_port_find(ports
,
6126 if (!router_port
|| !router_port
->nbrp
) {
6130 /* Skip the router port under consideration. */
6131 if (router_port
== peer
) {
6135 if (router_port
->lrp_networks
.n_ipv4_addrs
) {
6137 ds_put_format(&match
, "outport == %s && reg0 == ",
6139 op_put_v4_networks(&match
, router_port
, false);
6142 ds_put_format(&actions
, "eth.dst = %s; next;",
6143 router_port
->lrp_networks
.ea_s
);
6144 ovn_lflow_add(lflows
, peer
->od
, S_ROUTER_IN_ARP_RESOLVE
,
6145 100, ds_cstr(&match
), ds_cstr(&actions
));
6148 if (router_port
->lrp_networks
.n_ipv6_addrs
) {
6150 ds_put_format(&match
, "outport == %s && xxreg0 == ",
6152 op_put_v6_networks(&match
, router_port
);
6155 ds_put_format(&actions
, "eth.dst = %s; next;",
6156 router_port
->lrp_networks
.ea_s
);
6157 ovn_lflow_add(lflows
, peer
->od
, S_ROUTER_IN_ARP_RESOLVE
,
6158 100, ds_cstr(&match
), ds_cstr(&actions
));
6164 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
6169 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_ARP_RESOLVE
, 0, "ip4",
6170 "get_arp(outport, reg0); next;");
6172 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_ARP_RESOLVE
, 0, "ip6",
6173 "get_nd(outport, xxreg0); next;");
6176 /* Logical router ingress table 9: Gateway redirect.
6178 * For traffic with outport equal to the l3dgw_port
6179 * on a distributed router, this table redirects a subset
6180 * of the traffic to the l3redirect_port which represents
6181 * the central instance of the l3dgw_port.
6183 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
6187 if (od
->l3dgw_port
&& od
->l3redirect_port
) {
6188 /* For traffic with outport == l3dgw_port, if the
6189 * packet did not match any higher priority redirect
6190 * rule, then the traffic is redirected to the central
6191 * instance of the l3dgw_port. */
6193 ds_put_format(&match
, "outport == %s",
6194 od
->l3dgw_port
->json_key
);
6196 ds_put_format(&actions
, "outport = %s; next;",
6197 od
->l3redirect_port
->json_key
);
6198 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_GW_REDIRECT
, 50,
6199 ds_cstr(&match
), ds_cstr(&actions
));
6201 /* If the Ethernet destination has not been resolved,
6202 * redirect to the central instance of the l3dgw_port.
6203 * Such traffic will be replaced by an ARP request or ND
6204 * Neighbor Solicitation in the ARP request ingress
6205 * table, before being redirected to the central instance.
6207 ds_put_format(&match
, " && eth.dst == 00:00:00:00:00:00");
6208 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_GW_REDIRECT
, 150,
6209 ds_cstr(&match
), ds_cstr(&actions
));
6212 /* Packets are allowed by default. */
6213 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_GW_REDIRECT
, 0, "1", "next;");
6216 /* Local router ingress table 10: ARP request.
6218 * In the common case where the Ethernet destination has been resolved,
6219 * this table outputs the packet (priority 0). Otherwise, it composes
6220 * and sends an ARP/IPv6 NA request (priority 100). */
6221 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
6226 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_ARP_REQUEST
, 100,
6227 "eth.dst == 00:00:00:00:00:00",
6229 "eth.dst = ff:ff:ff:ff:ff:ff; "
6232 "arp.op = 1; " /* ARP request */
6235 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_ARP_REQUEST
, 100,
6236 "eth.dst == 00:00:00:00:00:00",
6238 "nd.target = xxreg0; "
6241 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_ARP_REQUEST
, 0, "1", "output;");
6244 /* Logical router egress table 1: Delivery (priority 100).
6246 * Priority 100 rules deliver packets to enabled logical ports. */
6247 HMAP_FOR_EACH (op
, key_node
, ports
) {
6252 if (!lrport_is_enabled(op
->nbrp
)) {
6253 /* Drop packets to disabled logical ports (since logical flow
6254 * tables are default-drop). */
6259 /* No egress packets should be processed in the context of
6260 * a chassisredirect port. The chassisredirect port should
6261 * be replaced by the l3dgw port in the local output
6262 * pipeline stage before egress processing. */
6267 ds_put_format(&match
, "outport == %s", op
->json_key
);
6268 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_OUT_DELIVERY
, 100,
6269 ds_cstr(&match
), "output;");
6273 ds_destroy(&actions
);
6276 /* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
6277 * constructing their contents based on the OVN_NB database. */
6279 build_lflows(struct northd_context
*ctx
, struct hmap
*datapaths
,
6280 struct hmap
*ports
, struct hmap
*port_groups
)
6282 struct hmap lflows
= HMAP_INITIALIZER(&lflows
);
6283 struct hmap mcgroups
= HMAP_INITIALIZER(&mcgroups
);
6285 build_lswitch_flows(datapaths
, ports
, port_groups
, &lflows
, &mcgroups
);
6286 build_lrouter_flows(datapaths
, ports
, &lflows
);
6288 /* Push changes to the Logical_Flow table to database. */
6289 const struct sbrec_logical_flow
*sbflow
, *next_sbflow
;
6290 SBREC_LOGICAL_FLOW_FOR_EACH_SAFE (sbflow
, next_sbflow
, ctx
->ovnsb_idl
) {
6291 struct ovn_datapath
*od
6292 = ovn_datapath_from_sbrec(datapaths
, sbflow
->logical_datapath
);
6294 sbrec_logical_flow_delete(sbflow
);
6298 enum ovn_datapath_type dp_type
= od
->nbs
? DP_SWITCH
: DP_ROUTER
;
6299 enum ovn_pipeline pipeline
6300 = !strcmp(sbflow
->pipeline
, "ingress") ? P_IN
: P_OUT
;
6301 struct ovn_lflow
*lflow
= ovn_lflow_find(
6302 &lflows
, od
, ovn_stage_build(dp_type
, pipeline
, sbflow
->table_id
),
6303 sbflow
->priority
, sbflow
->match
, sbflow
->actions
, sbflow
->hash
);
6305 ovn_lflow_destroy(&lflows
, lflow
);
6307 sbrec_logical_flow_delete(sbflow
);
6310 struct ovn_lflow
*lflow
, *next_lflow
;
6311 HMAP_FOR_EACH_SAFE (lflow
, next_lflow
, hmap_node
, &lflows
) {
6312 const char *pipeline
= ovn_stage_get_pipeline_name(lflow
->stage
);
6313 uint8_t table
= ovn_stage_get_table(lflow
->stage
);
6315 sbflow
= sbrec_logical_flow_insert(ctx
->ovnsb_txn
);
6316 sbrec_logical_flow_set_logical_datapath(sbflow
, lflow
->od
->sb
);
6317 sbrec_logical_flow_set_pipeline(sbflow
, pipeline
);
6318 sbrec_logical_flow_set_table_id(sbflow
, table
);
6319 sbrec_logical_flow_set_priority(sbflow
, lflow
->priority
);
6320 sbrec_logical_flow_set_match(sbflow
, lflow
->match
);
6321 sbrec_logical_flow_set_actions(sbflow
, lflow
->actions
);
6323 /* Trim the source locator lflow->where, which looks something like
6324 * "ovn/northd/ovn-northd.c:1234", down to just the part following the
6325 * last slash, e.g. "ovn-northd.c:1234". */
6326 const char *slash
= strrchr(lflow
->where
, '/');
6328 const char *backslash
= strrchr(lflow
->where
, '\\');
6329 if (!slash
|| backslash
> slash
) {
6333 const char *where
= slash
? slash
+ 1 : lflow
->where
;
6335 struct smap ids
= SMAP_INITIALIZER(&ids
);
6336 smap_add(&ids
, "stage-name", ovn_stage_to_str(lflow
->stage
));
6337 smap_add(&ids
, "source", where
);
6338 if (lflow
->stage_hint
) {
6339 smap_add(&ids
, "stage-hint", lflow
->stage_hint
);
6341 sbrec_logical_flow_set_external_ids(sbflow
, &ids
);
6344 ovn_lflow_destroy(&lflows
, lflow
);
6346 hmap_destroy(&lflows
);
6348 /* Push changes to the Multicast_Group table to database. */
6349 const struct sbrec_multicast_group
*sbmc
, *next_sbmc
;
6350 SBREC_MULTICAST_GROUP_FOR_EACH_SAFE (sbmc
, next_sbmc
, ctx
->ovnsb_idl
) {
6351 struct ovn_datapath
*od
= ovn_datapath_from_sbrec(datapaths
,
6354 sbrec_multicast_group_delete(sbmc
);
6358 struct multicast_group group
= { .name
= sbmc
->name
,
6359 .key
= sbmc
->tunnel_key
};
6360 struct ovn_multicast
*mc
= ovn_multicast_find(&mcgroups
, od
, &group
);
6362 ovn_multicast_update_sbrec(mc
, sbmc
);
6363 ovn_multicast_destroy(&mcgroups
, mc
);
6365 sbrec_multicast_group_delete(sbmc
);
6368 struct ovn_multicast
*mc
, *next_mc
;
6369 HMAP_FOR_EACH_SAFE (mc
, next_mc
, hmap_node
, &mcgroups
) {
6370 sbmc
= sbrec_multicast_group_insert(ctx
->ovnsb_txn
);
6371 sbrec_multicast_group_set_datapath(sbmc
, mc
->datapath
->sb
);
6372 sbrec_multicast_group_set_name(sbmc
, mc
->group
->name
);
6373 sbrec_multicast_group_set_tunnel_key(sbmc
, mc
->group
->key
);
6374 ovn_multicast_update_sbrec(mc
, sbmc
);
6375 ovn_multicast_destroy(&mcgroups
, mc
);
6377 hmap_destroy(&mcgroups
);
6381 sync_address_set(struct northd_context
*ctx
, const char *name
,
6382 const char **addrs
, size_t n_addrs
,
6383 struct shash
*sb_address_sets
)
6385 const struct sbrec_address_set
*sb_address_set
;
6386 sb_address_set
= shash_find_and_delete(sb_address_sets
,
6388 if (!sb_address_set
) {
6389 sb_address_set
= sbrec_address_set_insert(ctx
->ovnsb_txn
);
6390 sbrec_address_set_set_name(sb_address_set
, name
);
6393 sbrec_address_set_set_addresses(sb_address_set
,
6397 /* OVN_Southbound Address_Set table contains same records as in north
6398 * bound, plus the records generated from Port_Group table in north bound.
6400 * There are 2 records generated from each port group, one for IPv4, and
6401 * one for IPv6, named in the format: <port group name>_ip4 and
6402 * <port group name>_ip6 respectively. MAC addresses are ignored.
6404 * We always update OVN_Southbound to match the Address_Set and Port_Group
6405 * in OVN_Northbound, so that the address sets used in Logical_Flows in
6406 * OVN_Southbound is checked against the proper set.*/
6408 sync_address_sets(struct northd_context
*ctx
)
6410 struct shash sb_address_sets
= SHASH_INITIALIZER(&sb_address_sets
);
6412 const struct sbrec_address_set
*sb_address_set
;
6413 SBREC_ADDRESS_SET_FOR_EACH (sb_address_set
, ctx
->ovnsb_idl
) {
6414 shash_add(&sb_address_sets
, sb_address_set
->name
, sb_address_set
);
6417 /* sync port group generated address sets first */
6418 const struct nbrec_port_group
*nb_port_group
;
6419 NBREC_PORT_GROUP_FOR_EACH (nb_port_group
, ctx
->ovnnb_idl
) {
6420 char **ipv4_addrs
= xcalloc(1, sizeof *ipv4_addrs
);
6421 size_t n_ipv4_addrs
= 0;
6422 size_t n_ipv4_addrs_buf
= 1;
6423 char **ipv6_addrs
= xcalloc(1, sizeof *ipv6_addrs
);
6424 size_t n_ipv6_addrs
= 0;
6425 size_t n_ipv6_addrs_buf
= 1;
6426 for (size_t i
= 0; i
< nb_port_group
->n_ports
; i
++) {
6427 for (size_t j
= 0; j
< nb_port_group
->ports
[i
]->n_addresses
; j
++) {
6428 struct lport_addresses laddrs
;
6429 extract_lsp_addresses(nb_port_group
->ports
[i
]->addresses
[j
],
6431 while (n_ipv4_addrs_buf
< n_ipv4_addrs
+ laddrs
.n_ipv4_addrs
) {
6432 n_ipv4_addrs_buf
*= 2;
6433 ipv4_addrs
= xrealloc(ipv4_addrs
,
6434 n_ipv4_addrs_buf
* sizeof *ipv4_addrs
);
6436 for (size_t k
= 0; k
< laddrs
.n_ipv4_addrs
; k
++) {
6437 ipv4_addrs
[n_ipv4_addrs
++] =
6438 xstrdup(laddrs
.ipv4_addrs
[k
].addr_s
);
6440 while (n_ipv6_addrs_buf
< n_ipv6_addrs
+ laddrs
.n_ipv6_addrs
) {
6441 n_ipv6_addrs_buf
*= 2;
6442 ipv6_addrs
= xrealloc(ipv6_addrs
,
6443 n_ipv6_addrs_buf
* sizeof *ipv6_addrs
);
6445 for (size_t k
= 0; k
< laddrs
.n_ipv6_addrs
; k
++) {
6446 ipv6_addrs
[n_ipv6_addrs
++] =
6447 xstrdup(laddrs
.ipv6_addrs
[k
].addr_s
);
6449 destroy_lport_addresses(&laddrs
);
6452 char *ipv4_addrs_name
= xasprintf("%s_ip4", nb_port_group
->name
);
6453 char *ipv6_addrs_name
= xasprintf("%s_ip6", nb_port_group
->name
);
6454 sync_address_set(ctx
, ipv4_addrs_name
, (const char **)ipv4_addrs
,
6455 n_ipv4_addrs
, &sb_address_sets
);
6456 sync_address_set(ctx
, ipv6_addrs_name
, (const char **)ipv6_addrs
,
6457 n_ipv6_addrs
, &sb_address_sets
);
6458 free(ipv4_addrs_name
);
6459 free(ipv6_addrs_name
);
6460 for (size_t i
= 0; i
< n_ipv4_addrs
; i
++) {
6461 free(ipv4_addrs
[i
]);
6464 for (size_t i
= 0; i
< n_ipv6_addrs
; i
++) {
6465 free(ipv6_addrs
[i
]);
6470 /* sync user defined address sets, which may overwrite port group
6471 * generated address sets if same name is used */
6472 const struct nbrec_address_set
*nb_address_set
;
6473 NBREC_ADDRESS_SET_FOR_EACH (nb_address_set
, ctx
->ovnnb_idl
) {
6474 sync_address_set(ctx
, nb_address_set
->name
,
6475 /* "char **" is not compatible with "const char **" */
6476 (const char **)nb_address_set
->addresses
,
6477 nb_address_set
->n_addresses
, &sb_address_sets
);
6480 struct shash_node
*node
, *next
;
6481 SHASH_FOR_EACH_SAFE (node
, next
, &sb_address_sets
) {
6482 sbrec_address_set_delete(node
->data
);
6483 shash_delete(&sb_address_sets
, node
);
6485 shash_destroy(&sb_address_sets
);
6488 /* Each port group in Port_Group table in OVN_Northbound has a corresponding
6489 * entry in Port_Group table in OVN_Southbound. In OVN_Northbound the entries
6490 * contains lport uuids, while in OVN_Southbound we store the lport names.
6493 sync_port_groups(struct northd_context
*ctx
)
6495 struct shash sb_port_groups
= SHASH_INITIALIZER(&sb_port_groups
);
6497 const struct sbrec_port_group
*sb_port_group
;
6498 SBREC_PORT_GROUP_FOR_EACH (sb_port_group
, ctx
->ovnsb_idl
) {
6499 shash_add(&sb_port_groups
, sb_port_group
->name
, sb_port_group
);
6502 const struct nbrec_port_group
*nb_port_group
;
6503 NBREC_PORT_GROUP_FOR_EACH (nb_port_group
, ctx
->ovnnb_idl
) {
6504 sb_port_group
= shash_find_and_delete(&sb_port_groups
,
6505 nb_port_group
->name
);
6506 if (!sb_port_group
) {
6507 sb_port_group
= sbrec_port_group_insert(ctx
->ovnsb_txn
);
6508 sbrec_port_group_set_name(sb_port_group
, nb_port_group
->name
);
6511 const char **nb_port_names
= xcalloc(nb_port_group
->n_ports
,
6512 sizeof *nb_port_names
);
6514 for (i
= 0; i
< nb_port_group
->n_ports
; i
++) {
6515 nb_port_names
[i
] = nb_port_group
->ports
[i
]->name
;
6517 sbrec_port_group_set_ports(sb_port_group
,
6519 nb_port_group
->n_ports
);
6520 free(nb_port_names
);
6523 struct shash_node
*node
, *next
;
6524 SHASH_FOR_EACH_SAFE (node
, next
, &sb_port_groups
) {
6525 sbrec_port_group_delete(node
->data
);
6526 shash_delete(&sb_port_groups
, node
);
6528 shash_destroy(&sb_port_groups
);
6532 * struct 'dns_info' is used to sync the DNS records between OVN Northbound db
6533 * and Southbound db.
6536 struct hmap_node hmap_node
;
6537 const struct nbrec_dns
*nb_dns
; /* DNS record in the Northbound db. */
6538 const struct sbrec_dns
*sb_dns
; /* DNS record in the Soutbound db. */
6540 /* Datapaths to which the DNS entry is associated with it. */
6541 const struct sbrec_datapath_binding
**sbs
;
6545 static inline struct dns_info
*
6546 get_dns_info_from_hmap(struct hmap
*dns_map
, struct uuid
*uuid
)
6548 struct dns_info
*dns_info
;
6549 size_t hash
= uuid_hash(uuid
);
6550 HMAP_FOR_EACH_WITH_HASH (dns_info
, hmap_node
, hash
, dns_map
) {
6551 if (uuid_equals(&dns_info
->nb_dns
->header_
.uuid
, uuid
)) {
6560 sync_dns_entries(struct northd_context
*ctx
, struct hmap
*datapaths
)
6562 struct hmap dns_map
= HMAP_INITIALIZER(&dns_map
);
6563 struct ovn_datapath
*od
;
6564 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
6565 if (!od
->nbs
|| !od
->nbs
->n_dns_records
) {
6569 for (size_t i
= 0; i
< od
->nbs
->n_dns_records
; i
++) {
6570 struct dns_info
*dns_info
= get_dns_info_from_hmap(
6571 &dns_map
, &od
->nbs
->dns_records
[i
]->header_
.uuid
);
6573 size_t hash
= uuid_hash(
6574 &od
->nbs
->dns_records
[i
]->header_
.uuid
);
6575 dns_info
= xzalloc(sizeof *dns_info
);;
6576 dns_info
->nb_dns
= od
->nbs
->dns_records
[i
];
6577 hmap_insert(&dns_map
, &dns_info
->hmap_node
, hash
);
6581 dns_info
->sbs
= xrealloc(dns_info
->sbs
,
6582 dns_info
->n_sbs
* sizeof *dns_info
->sbs
);
6583 dns_info
->sbs
[dns_info
->n_sbs
- 1] = od
->sb
;
6587 const struct sbrec_dns
*sbrec_dns
, *next
;
6588 SBREC_DNS_FOR_EACH_SAFE (sbrec_dns
, next
, ctx
->ovnsb_idl
) {
6589 const char *nb_dns_uuid
= smap_get(&sbrec_dns
->external_ids
, "dns_id");
6590 struct uuid dns_uuid
;
6591 if (!nb_dns_uuid
|| !uuid_from_string(&dns_uuid
, nb_dns_uuid
)) {
6592 sbrec_dns_delete(sbrec_dns
);
6596 struct dns_info
*dns_info
=
6597 get_dns_info_from_hmap(&dns_map
, &dns_uuid
);
6599 dns_info
->sb_dns
= sbrec_dns
;
6601 sbrec_dns_delete(sbrec_dns
);
6605 struct dns_info
*dns_info
;
6606 HMAP_FOR_EACH_POP (dns_info
, hmap_node
, &dns_map
) {
6607 if (!dns_info
->sb_dns
) {
6608 sbrec_dns
= sbrec_dns_insert(ctx
->ovnsb_txn
);
6609 dns_info
->sb_dns
= sbrec_dns
;
6610 char *dns_id
= xasprintf(
6611 UUID_FMT
, UUID_ARGS(&dns_info
->nb_dns
->header_
.uuid
));
6612 const struct smap external_ids
=
6613 SMAP_CONST1(&external_ids
, "dns_id", dns_id
);
6614 sbrec_dns_set_external_ids(sbrec_dns
, &external_ids
);
6618 /* Set the datapaths and records. If nothing has changed, then
6619 * this will be a no-op.
6621 sbrec_dns_set_datapaths(
6623 (struct sbrec_datapath_binding
**)dns_info
->sbs
,
6625 sbrec_dns_set_records(dns_info
->sb_dns
, &dns_info
->nb_dns
->records
);
6626 free(dns_info
->sbs
);
6629 hmap_destroy(&dns_map
);
6635 ovnnb_db_run(struct northd_context
*ctx
,
6636 struct ovsdb_idl_index
*sbrec_chassis_by_name
,
6637 struct ovsdb_idl_loop
*sb_loop
)
6639 if (!ctx
->ovnsb_txn
|| !ctx
->ovnnb_txn
) {
6642 struct hmap datapaths
, ports
, port_groups
;
6643 build_datapaths(ctx
, &datapaths
);
6644 build_ports(ctx
, sbrec_chassis_by_name
, &datapaths
, &ports
);
6645 build_ipam(&datapaths
, &ports
);
6646 build_port_group_lswitches(ctx
, &port_groups
, &ports
);
6647 build_lflows(ctx
, &datapaths
, &ports
, &port_groups
);
6649 sync_address_sets(ctx
);
6650 sync_port_groups(ctx
);
6651 sync_dns_entries(ctx
, &datapaths
);
6653 struct ovn_port_group
*pg
, *next_pg
;
6654 HMAP_FOR_EACH_SAFE (pg
, next_pg
, key_node
, &port_groups
) {
6655 ovn_port_group_destroy(&port_groups
, pg
);
6657 hmap_destroy(&port_groups
);
6659 struct ovn_datapath
*dp
, *next_dp
;
6660 HMAP_FOR_EACH_SAFE (dp
, next_dp
, key_node
, &datapaths
) {
6661 ovn_datapath_destroy(&datapaths
, dp
);
6663 hmap_destroy(&datapaths
);
6665 struct ovn_port
*port
, *next_port
;
6666 HMAP_FOR_EACH_SAFE (port
, next_port
, key_node
, &ports
) {
6667 ovn_port_destroy(&ports
, port
);
6669 hmap_destroy(&ports
);
6671 /* Copy nb_cfg from northbound to southbound database.
6673 * Also set up to update sb_cfg once our southbound transaction commits. */
6674 const struct nbrec_nb_global
*nb
= nbrec_nb_global_first(ctx
->ovnnb_idl
);
6676 nb
= nbrec_nb_global_insert(ctx
->ovnnb_txn
);
6678 const struct sbrec_sb_global
*sb
= sbrec_sb_global_first(ctx
->ovnsb_idl
);
6680 sb
= sbrec_sb_global_insert(ctx
->ovnsb_txn
);
6682 sbrec_sb_global_set_nb_cfg(sb
, nb
->nb_cfg
);
6683 sb_loop
->next_cfg
= nb
->nb_cfg
;
6685 cleanup_macam(&macam
);
6688 /* Handle changes to the 'chassis' column of the 'Port_Binding' table. When
6689 * this column is not empty, it means we need to set the corresponding logical
6690 * port as 'up' in the northbound DB. */
6692 update_logical_port_status(struct northd_context
*ctx
)
6694 struct hmap lports_hmap
;
6695 const struct sbrec_port_binding
*sb
;
6696 const struct nbrec_logical_switch_port
*nbsp
;
6698 struct lport_hash_node
{
6699 struct hmap_node node
;
6700 const struct nbrec_logical_switch_port
*nbsp
;
6703 hmap_init(&lports_hmap
);
6705 NBREC_LOGICAL_SWITCH_PORT_FOR_EACH(nbsp
, ctx
->ovnnb_idl
) {
6706 hash_node
= xzalloc(sizeof *hash_node
);
6707 hash_node
->nbsp
= nbsp
;
6708 hmap_insert(&lports_hmap
, &hash_node
->node
, hash_string(nbsp
->name
, 0));
6711 SBREC_PORT_BINDING_FOR_EACH(sb
, ctx
->ovnsb_idl
) {
6713 HMAP_FOR_EACH_WITH_HASH(hash_node
, node
,
6714 hash_string(sb
->logical_port
, 0),
6716 if (!strcmp(sb
->logical_port
, hash_node
->nbsp
->name
)) {
6717 nbsp
= hash_node
->nbsp
;
6723 /* The logical port doesn't exist for this port binding. This can
6724 * happen under normal circumstances when ovn-northd hasn't gotten
6725 * around to pruning the Port_Binding yet. */
6729 bool up
= (sb
->chassis
|| !strcmp(nbsp
->type
, "router"));
6730 if (!nbsp
->up
|| *nbsp
->up
!= up
) {
6731 nbrec_logical_switch_port_set_up(nbsp
, &up
, 1);
6735 HMAP_FOR_EACH_POP(hash_node
, node
, &lports_hmap
) {
6738 hmap_destroy(&lports_hmap
);
6741 static struct gen_opts_map supported_dhcp_opts
[] = {
6745 DHCP_OPT_DNS_SERVER
,
6746 DHCP_OPT_LOG_SERVER
,
6747 DHCP_OPT_LPR_SERVER
,
6748 DHCP_OPT_SWAP_SERVER
,
6749 DHCP_OPT_POLICY_FILTER
,
6750 DHCP_OPT_ROUTER_SOLICITATION
,
6751 DHCP_OPT_NIS_SERVER
,
6752 DHCP_OPT_NTP_SERVER
,
6754 DHCP_OPT_TFTP_SERVER
,
6755 DHCP_OPT_CLASSLESS_STATIC_ROUTE
,
6756 DHCP_OPT_MS_CLASSLESS_STATIC_ROUTE
,
6757 DHCP_OPT_IP_FORWARD_ENABLE
,
6758 DHCP_OPT_ROUTER_DISCOVERY
,
6759 DHCP_OPT_ETHERNET_ENCAP
,
6760 DHCP_OPT_DEFAULT_TTL
,
6763 DHCP_OPT_LEASE_TIME
,
6768 static struct gen_opts_map supported_dhcpv6_opts
[] = {
6770 DHCPV6_OPT_SERVER_ID
,
6771 DHCPV6_OPT_DOMAIN_SEARCH
,
6772 DHCPV6_OPT_DNS_SERVER
6776 check_and_add_supported_dhcp_opts_to_sb_db(struct northd_context
*ctx
)
6778 struct hmap dhcp_opts_to_add
= HMAP_INITIALIZER(&dhcp_opts_to_add
);
6779 for (size_t i
= 0; (i
< sizeof(supported_dhcp_opts
) /
6780 sizeof(supported_dhcp_opts
[0])); i
++) {
6781 hmap_insert(&dhcp_opts_to_add
, &supported_dhcp_opts
[i
].hmap_node
,
6782 dhcp_opt_hash(supported_dhcp_opts
[i
].name
));
6785 const struct sbrec_dhcp_options
*opt_row
, *opt_row_next
;
6786 SBREC_DHCP_OPTIONS_FOR_EACH_SAFE(opt_row
, opt_row_next
, ctx
->ovnsb_idl
) {
6787 struct gen_opts_map
*dhcp_opt
=
6788 dhcp_opts_find(&dhcp_opts_to_add
, opt_row
->name
);
6790 hmap_remove(&dhcp_opts_to_add
, &dhcp_opt
->hmap_node
);
6792 sbrec_dhcp_options_delete(opt_row
);
6796 struct gen_opts_map
*opt
;
6797 HMAP_FOR_EACH (opt
, hmap_node
, &dhcp_opts_to_add
) {
6798 struct sbrec_dhcp_options
*sbrec_dhcp_option
=
6799 sbrec_dhcp_options_insert(ctx
->ovnsb_txn
);
6800 sbrec_dhcp_options_set_name(sbrec_dhcp_option
, opt
->name
);
6801 sbrec_dhcp_options_set_code(sbrec_dhcp_option
, opt
->code
);
6802 sbrec_dhcp_options_set_type(sbrec_dhcp_option
, opt
->type
);
6805 hmap_destroy(&dhcp_opts_to_add
);
6809 check_and_add_supported_dhcpv6_opts_to_sb_db(struct northd_context
*ctx
)
6811 struct hmap dhcpv6_opts_to_add
= HMAP_INITIALIZER(&dhcpv6_opts_to_add
);
6812 for (size_t i
= 0; (i
< sizeof(supported_dhcpv6_opts
) /
6813 sizeof(supported_dhcpv6_opts
[0])); i
++) {
6814 hmap_insert(&dhcpv6_opts_to_add
, &supported_dhcpv6_opts
[i
].hmap_node
,
6815 dhcp_opt_hash(supported_dhcpv6_opts
[i
].name
));
6818 const struct sbrec_dhcpv6_options
*opt_row
, *opt_row_next
;
6819 SBREC_DHCPV6_OPTIONS_FOR_EACH_SAFE(opt_row
, opt_row_next
, ctx
->ovnsb_idl
) {
6820 struct gen_opts_map
*dhcp_opt
=
6821 dhcp_opts_find(&dhcpv6_opts_to_add
, opt_row
->name
);
6823 hmap_remove(&dhcpv6_opts_to_add
, &dhcp_opt
->hmap_node
);
6825 sbrec_dhcpv6_options_delete(opt_row
);
6829 struct gen_opts_map
*opt
;
6830 HMAP_FOR_EACH(opt
, hmap_node
, &dhcpv6_opts_to_add
) {
6831 struct sbrec_dhcpv6_options
*sbrec_dhcpv6_option
=
6832 sbrec_dhcpv6_options_insert(ctx
->ovnsb_txn
);
6833 sbrec_dhcpv6_options_set_name(sbrec_dhcpv6_option
, opt
->name
);
6834 sbrec_dhcpv6_options_set_code(sbrec_dhcpv6_option
, opt
->code
);
6835 sbrec_dhcpv6_options_set_type(sbrec_dhcpv6_option
, opt
->type
);
6838 hmap_destroy(&dhcpv6_opts_to_add
);
6841 static const char *rbac_chassis_auth
[] =
6843 static const char *rbac_chassis_update
[] =
6844 {"nb_cfg", "external_ids", "encaps", "vtep_logical_switches"};
6846 static const char *rbac_encap_auth
[] =
6848 static const char *rbac_encap_update
[] =
6849 {"type", "options", "ip"};
6851 static const char *rbac_port_binding_auth
[] =
6853 static const char *rbac_port_binding_update
[] =
6856 static const char *rbac_mac_binding_auth
[] =
6858 static const char *rbac_mac_binding_update
[] =
6859 {"logical_port", "ip", "mac", "datapath"};
6861 static struct rbac_perm_cfg
{
6866 const char **update
;
6868 const struct sbrec_rbac_permission
*row
;
6869 } rbac_perm_cfg
[] = {
6872 .auth
= rbac_chassis_auth
,
6873 .n_auth
= ARRAY_SIZE(rbac_chassis_auth
),
6875 .update
= rbac_chassis_update
,
6876 .n_update
= ARRAY_SIZE(rbac_chassis_update
),
6880 .auth
= rbac_encap_auth
,
6881 .n_auth
= ARRAY_SIZE(rbac_encap_auth
),
6883 .update
= rbac_encap_update
,
6884 .n_update
= ARRAY_SIZE(rbac_encap_update
),
6887 .table
= "Port_Binding",
6888 .auth
= rbac_port_binding_auth
,
6889 .n_auth
= ARRAY_SIZE(rbac_port_binding_auth
),
6891 .update
= rbac_port_binding_update
,
6892 .n_update
= ARRAY_SIZE(rbac_port_binding_update
),
6895 .table
= "MAC_Binding",
6896 .auth
= rbac_mac_binding_auth
,
6897 .n_auth
= ARRAY_SIZE(rbac_mac_binding_auth
),
6899 .update
= rbac_mac_binding_update
,
6900 .n_update
= ARRAY_SIZE(rbac_mac_binding_update
),
6914 ovn_rbac_validate_perm(const struct sbrec_rbac_permission
*perm
)
6916 struct rbac_perm_cfg
*pcfg
;
6919 for (pcfg
= rbac_perm_cfg
; pcfg
->table
; pcfg
++) {
6920 if (!strcmp(perm
->table
, pcfg
->table
)) {
6927 if (perm
->n_authorization
!= pcfg
->n_auth
||
6928 perm
->n_update
!= pcfg
->n_update
) {
6931 if (perm
->insert_delete
!= pcfg
->insdel
) {
6934 /* verify perm->authorization vs. pcfg->auth */
6936 for (i
= 0; i
< pcfg
->n_auth
; i
++) {
6937 for (j
= 0; j
< perm
->n_authorization
; j
++) {
6938 if (!strcmp(pcfg
->auth
[i
], perm
->authorization
[j
])) {
6944 if (n_found
!= pcfg
->n_auth
) {
6948 /* verify perm->update vs. pcfg->update */
6950 for (i
= 0; i
< pcfg
->n_update
; i
++) {
6951 for (j
= 0; j
< perm
->n_update
; j
++) {
6952 if (!strcmp(pcfg
->update
[i
], perm
->update
[j
])) {
6958 if (n_found
!= pcfg
->n_update
) {
6962 /* Success, db state matches expected state */
6968 ovn_rbac_create_perm(struct rbac_perm_cfg
*pcfg
,
6969 struct northd_context
*ctx
,
6970 const struct sbrec_rbac_role
*rbac_role
)
6972 struct sbrec_rbac_permission
*rbac_perm
;
6974 rbac_perm
= sbrec_rbac_permission_insert(ctx
->ovnsb_txn
);
6975 sbrec_rbac_permission_set_table(rbac_perm
, pcfg
->table
);
6976 sbrec_rbac_permission_set_authorization(rbac_perm
,
6979 sbrec_rbac_permission_set_insert_delete(rbac_perm
, pcfg
->insdel
);
6980 sbrec_rbac_permission_set_update(rbac_perm
,
6983 sbrec_rbac_role_update_permissions_setkey(rbac_role
, pcfg
->table
,
6988 check_and_update_rbac(struct northd_context
*ctx
)
6990 const struct sbrec_rbac_role
*rbac_role
= NULL
;
6991 const struct sbrec_rbac_permission
*perm_row
, *perm_next
;
6992 const struct sbrec_rbac_role
*role_row
, *role_row_next
;
6993 struct rbac_perm_cfg
*pcfg
;
6995 for (pcfg
= rbac_perm_cfg
; pcfg
->table
; pcfg
++) {
6999 SBREC_RBAC_PERMISSION_FOR_EACH_SAFE (perm_row
, perm_next
, ctx
->ovnsb_idl
) {
7000 if (!ovn_rbac_validate_perm(perm_row
)) {
7001 sbrec_rbac_permission_delete(perm_row
);
7004 SBREC_RBAC_ROLE_FOR_EACH_SAFE (role_row
, role_row_next
, ctx
->ovnsb_idl
) {
7005 if (strcmp(role_row
->name
, "ovn-controller")) {
7006 sbrec_rbac_role_delete(role_row
);
7008 rbac_role
= role_row
;
7013 rbac_role
= sbrec_rbac_role_insert(ctx
->ovnsb_txn
);
7014 sbrec_rbac_role_set_name(rbac_role
, "ovn-controller");
7017 for (pcfg
= rbac_perm_cfg
; pcfg
->table
; pcfg
++) {
7019 ovn_rbac_create_perm(pcfg
, ctx
, rbac_role
);
7024 /* Updates the sb_cfg and hv_cfg columns in the northbound NB_Global table. */
7026 update_northbound_cfg(struct northd_context
*ctx
,
7027 struct ovsdb_idl_loop
*sb_loop
)
7029 /* Update northbound sb_cfg if appropriate. */
7030 const struct nbrec_nb_global
*nbg
= nbrec_nb_global_first(ctx
->ovnnb_idl
);
7031 int64_t sb_cfg
= sb_loop
->cur_cfg
;
7032 if (nbg
&& sb_cfg
&& nbg
->sb_cfg
!= sb_cfg
) {
7033 nbrec_nb_global_set_sb_cfg(nbg
, sb_cfg
);
7036 /* Update northbound hv_cfg if appropriate. */
7038 /* Find minimum nb_cfg among all chassis. */
7039 const struct sbrec_chassis
*chassis
;
7040 int64_t hv_cfg
= nbg
->nb_cfg
;
7041 SBREC_CHASSIS_FOR_EACH (chassis
, ctx
->ovnsb_idl
) {
7042 if (chassis
->nb_cfg
< hv_cfg
) {
7043 hv_cfg
= chassis
->nb_cfg
;
7047 /* Update hv_cfg. */
7048 if (nbg
->hv_cfg
!= hv_cfg
) {
7049 nbrec_nb_global_set_hv_cfg(nbg
, hv_cfg
);
7054 /* Handle a fairly small set of changes in the southbound database. */
7056 ovnsb_db_run(struct northd_context
*ctx
, struct ovsdb_idl_loop
*sb_loop
)
7058 if (!ctx
->ovnnb_txn
|| !ovsdb_idl_has_ever_connected(ctx
->ovnsb_idl
)) {
7062 update_logical_port_status(ctx
);
7063 update_northbound_cfg(ctx
, sb_loop
);
7067 parse_options(int argc OVS_UNUSED
, char *argv
[] OVS_UNUSED
)
7070 DAEMON_OPTION_ENUMS
,
7074 static const struct option long_options
[] = {
7075 {"ovnsb-db", required_argument
, NULL
, 'd'},
7076 {"ovnnb-db", required_argument
, NULL
, 'D'},
7077 {"unixctl", required_argument
, NULL
, 'u'},
7078 {"help", no_argument
, NULL
, 'h'},
7079 {"options", no_argument
, NULL
, 'o'},
7080 {"version", no_argument
, NULL
, 'V'},
7081 DAEMON_LONG_OPTIONS
,
7083 STREAM_SSL_LONG_OPTIONS
,
7086 char *short_options
= ovs_cmdl_long_options_to_short_options(long_options
);
7091 c
= getopt_long(argc
, argv
, short_options
, long_options
, NULL
);
7097 DAEMON_OPTION_HANDLERS
;
7098 VLOG_OPTION_HANDLERS
;
7099 STREAM_SSL_OPTION_HANDLERS
;
7110 unixctl_path
= optarg
;
7118 ovs_cmdl_print_options(long_options
);
7122 ovs_print_version(0, 0);
7131 ovnsb_db
= default_sb_db();
7135 ovnnb_db
= default_nb_db();
7138 free(short_options
);
7142 add_column_noalert(struct ovsdb_idl
*idl
,
7143 const struct ovsdb_idl_column
*column
)
7145 ovsdb_idl_add_column(idl
, column
);
7146 ovsdb_idl_omit_alert(idl
, column
);
7150 main(int argc
, char *argv
[])
7152 int res
= EXIT_SUCCESS
;
7153 struct unixctl_server
*unixctl
;
7157 fatal_ignore_sigpipe();
7158 ovs_cmdl_proctitle_init(argc
, argv
);
7159 set_program_name(argv
[0]);
7160 service_start(&argc
, &argv
);
7161 parse_options(argc
, argv
);
7163 daemonize_start(false);
7165 retval
= unixctl_server_create(unixctl_path
, &unixctl
);
7169 unixctl_command_register("exit", "", 0, 0, ovn_northd_exit
, &exiting
);
7171 daemonize_complete();
7173 /* We want to detect (almost) all changes to the ovn-nb db. */
7174 struct ovsdb_idl_loop ovnnb_idl_loop
= OVSDB_IDL_LOOP_INITIALIZER(
7175 ovsdb_idl_create(ovnnb_db
, &nbrec_idl_class
, true, true));
7176 ovsdb_idl_omit_alert(ovnnb_idl_loop
.idl
, &nbrec_nb_global_col_sb_cfg
);
7177 ovsdb_idl_omit_alert(ovnnb_idl_loop
.idl
, &nbrec_nb_global_col_hv_cfg
);
7179 /* We want to detect only selected changes to the ovn-sb db. */
7180 struct ovsdb_idl_loop ovnsb_idl_loop
= OVSDB_IDL_LOOP_INITIALIZER(
7181 ovsdb_idl_create(ovnsb_db
, &sbrec_idl_class
, false, true));
7183 ovsdb_idl_add_table(ovnsb_idl_loop
.idl
, &sbrec_table_sb_global
);
7184 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_sb_global_col_nb_cfg
);
7186 ovsdb_idl_add_table(ovnsb_idl_loop
.idl
, &sbrec_table_logical_flow
);
7187 add_column_noalert(ovnsb_idl_loop
.idl
,
7188 &sbrec_logical_flow_col_logical_datapath
);
7189 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_logical_flow_col_pipeline
);
7190 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_logical_flow_col_table_id
);
7191 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_logical_flow_col_priority
);
7192 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_logical_flow_col_match
);
7193 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_logical_flow_col_actions
);
7195 ovsdb_idl_add_table(ovnsb_idl_loop
.idl
, &sbrec_table_multicast_group
);
7196 add_column_noalert(ovnsb_idl_loop
.idl
,
7197 &sbrec_multicast_group_col_datapath
);
7198 add_column_noalert(ovnsb_idl_loop
.idl
,
7199 &sbrec_multicast_group_col_tunnel_key
);
7200 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_multicast_group_col_name
);
7201 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_multicast_group_col_ports
);
7203 ovsdb_idl_add_table(ovnsb_idl_loop
.idl
, &sbrec_table_datapath_binding
);
7204 add_column_noalert(ovnsb_idl_loop
.idl
,
7205 &sbrec_datapath_binding_col_tunnel_key
);
7206 add_column_noalert(ovnsb_idl_loop
.idl
,
7207 &sbrec_datapath_binding_col_external_ids
);
7209 ovsdb_idl_add_table(ovnsb_idl_loop
.idl
, &sbrec_table_port_binding
);
7210 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_port_binding_col_datapath
);
7211 add_column_noalert(ovnsb_idl_loop
.idl
,
7212 &sbrec_port_binding_col_logical_port
);
7213 add_column_noalert(ovnsb_idl_loop
.idl
,
7214 &sbrec_port_binding_col_tunnel_key
);
7215 add_column_noalert(ovnsb_idl_loop
.idl
,
7216 &sbrec_port_binding_col_parent_port
);
7217 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_port_binding_col_tag
);
7218 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_port_binding_col_type
);
7219 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_port_binding_col_options
);
7220 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_port_binding_col_mac
);
7221 add_column_noalert(ovnsb_idl_loop
.idl
,
7222 &sbrec_port_binding_col_nat_addresses
);
7223 ovsdb_idl_add_column(ovnsb_idl_loop
.idl
, &sbrec_port_binding_col_chassis
);
7224 ovsdb_idl_add_column(ovnsb_idl_loop
.idl
,
7225 &sbrec_port_binding_col_gateway_chassis
);
7226 ovsdb_idl_add_column(ovnsb_idl_loop
.idl
,
7227 &sbrec_gateway_chassis_col_chassis
);
7228 ovsdb_idl_add_column(ovnsb_idl_loop
.idl
, &sbrec_gateway_chassis_col_name
);
7229 ovsdb_idl_add_column(ovnsb_idl_loop
.idl
,
7230 &sbrec_gateway_chassis_col_priority
);
7231 ovsdb_idl_add_column(ovnsb_idl_loop
.idl
,
7232 &sbrec_gateway_chassis_col_external_ids
);
7233 ovsdb_idl_add_column(ovnsb_idl_loop
.idl
,
7234 &sbrec_gateway_chassis_col_options
);
7235 add_column_noalert(ovnsb_idl_loop
.idl
,
7236 &sbrec_port_binding_col_external_ids
);
7237 ovsdb_idl_add_table(ovnsb_idl_loop
.idl
, &sbrec_table_mac_binding
);
7238 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_mac_binding_col_datapath
);
7239 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_mac_binding_col_ip
);
7240 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_mac_binding_col_mac
);
7241 add_column_noalert(ovnsb_idl_loop
.idl
,
7242 &sbrec_mac_binding_col_logical_port
);
7243 ovsdb_idl_add_table(ovnsb_idl_loop
.idl
, &sbrec_table_dhcp_options
);
7244 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_dhcp_options_col_code
);
7245 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_dhcp_options_col_type
);
7246 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_dhcp_options_col_name
);
7247 ovsdb_idl_add_table(ovnsb_idl_loop
.idl
, &sbrec_table_dhcpv6_options
);
7248 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_dhcpv6_options_col_code
);
7249 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_dhcpv6_options_col_type
);
7250 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_dhcpv6_options_col_name
);
7251 ovsdb_idl_add_table(ovnsb_idl_loop
.idl
, &sbrec_table_address_set
);
7252 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_address_set_col_name
);
7253 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_address_set_col_addresses
);
7254 ovsdb_idl_add_table(ovnsb_idl_loop
.idl
, &sbrec_table_port_group
);
7255 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_port_group_col_name
);
7256 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_port_group_col_ports
);
7258 ovsdb_idl_add_table(ovnsb_idl_loop
.idl
, &sbrec_table_dns
);
7259 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_dns_col_datapaths
);
7260 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_dns_col_records
);
7261 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_dns_col_external_ids
);
7263 ovsdb_idl_add_table(ovnsb_idl_loop
.idl
, &sbrec_table_rbac_role
);
7264 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_rbac_role_col_name
);
7265 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_rbac_role_col_permissions
);
7267 ovsdb_idl_add_table(ovnsb_idl_loop
.idl
, &sbrec_table_rbac_permission
);
7268 add_column_noalert(ovnsb_idl_loop
.idl
,
7269 &sbrec_rbac_permission_col_table
);
7270 add_column_noalert(ovnsb_idl_loop
.idl
,
7271 &sbrec_rbac_permission_col_authorization
);
7272 add_column_noalert(ovnsb_idl_loop
.idl
,
7273 &sbrec_rbac_permission_col_insert_delete
);
7274 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_rbac_permission_col_update
);
7276 ovsdb_idl_add_table(ovnsb_idl_loop
.idl
, &sbrec_table_chassis
);
7277 ovsdb_idl_add_column(ovnsb_idl_loop
.idl
, &sbrec_chassis_col_nb_cfg
);
7278 ovsdb_idl_add_column(ovnsb_idl_loop
.idl
, &sbrec_chassis_col_name
);
7280 struct ovsdb_idl_index
*sbrec_chassis_by_name
7281 = chassis_index_create(ovnsb_idl_loop
.idl
);
7283 /* Ensure that only a single ovn-northd is active in the deployment by
7284 * acquiring a lock called "ovn_northd" on the southbound database
7285 * and then only performing DB transactions if the lock is held. */
7286 ovsdb_idl_set_lock(ovnsb_idl_loop
.idl
, "ovn_northd");
7287 bool had_lock
= false;
7292 struct northd_context ctx
= {
7293 .ovnnb_idl
= ovnnb_idl_loop
.idl
,
7294 .ovnnb_txn
= ovsdb_idl_loop_run(&ovnnb_idl_loop
),
7295 .ovnsb_idl
= ovnsb_idl_loop
.idl
,
7296 .ovnsb_txn
= ovsdb_idl_loop_run(&ovnsb_idl_loop
),
7299 if (!had_lock
&& ovsdb_idl_has_lock(ovnsb_idl_loop
.idl
)) {
7300 VLOG_INFO("ovn-northd lock acquired. "
7301 "This ovn-northd instance is now active.");
7303 } else if (had_lock
&& !ovsdb_idl_has_lock(ovnsb_idl_loop
.idl
)) {
7304 VLOG_INFO("ovn-northd lock lost. "
7305 "This ovn-northd instance is now on standby.");
7309 if (ovsdb_idl_has_lock(ovnsb_idl_loop
.idl
)) {
7310 ovnnb_db_run(&ctx
, sbrec_chassis_by_name
, &ovnsb_idl_loop
);
7311 ovnsb_db_run(&ctx
, &ovnsb_idl_loop
);
7312 if (ctx
.ovnsb_txn
) {
7313 check_and_add_supported_dhcp_opts_to_sb_db(&ctx
);
7314 check_and_add_supported_dhcpv6_opts_to_sb_db(&ctx
);
7315 check_and_update_rbac(&ctx
);
7319 unixctl_server_run(unixctl
);
7320 unixctl_server_wait(unixctl
);
7322 poll_immediate_wake();
7324 ovsdb_idl_loop_commit_and_wait(&ovnnb_idl_loop
);
7325 ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop
);
7328 if (should_service_stop()) {
7333 unixctl_server_destroy(unixctl
);
7334 ovsdb_idl_loop_destroy(&ovnnb_idl_loop
);
7335 ovsdb_idl_loop_destroy(&ovnsb_idl_loop
);
7342 ovn_northd_exit(struct unixctl_conn
*conn
, int argc OVS_UNUSED
,
7343 const char *argv
[] OVS_UNUSED
, void *exiting_
)
7345 bool *exiting
= exiting_
;
7348 unixctl_command_reply(conn
, NULL
);