2 * Licensed under the Apache License, Version 2.0 (the "License");
3 * you may not use this file except in compliance with the License.
4 * You may obtain a copy of the License at:
6 * http://www.apache.org/licenses/LICENSE-2.0
8 * Unless required by applicable law or agreed to in writing, software
9 * distributed under the License is distributed on an "AS IS" BASIS,
10 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 * See the License for the specific language governing permissions and
12 * limitations under the License.
22 #include "command-line.h"
25 #include "openvswitch/dynamic-string.h"
26 #include "fatal-signal.h"
28 #include "openvswitch/hmap.h"
29 #include "openvswitch/json.h"
31 #include "ovn/lib/chassis-index.h"
32 #include "ovn/lib/logical-fields.h"
33 #include "ovn/lib/ovn-l7.h"
34 #include "ovn/lib/ovn-nb-idl.h"
35 #include "ovn/lib/ovn-sb-idl.h"
36 #include "ovn/lib/ovn-util.h"
37 #include "ovn/actions.h"
39 #include "openvswitch/poll-loop.h"
43 #include "stream-ssl.h"
47 #include "openvswitch/vlog.h"
49 VLOG_DEFINE_THIS_MODULE(ovn_northd
);
51 static unixctl_cb_func ovn_northd_exit
;
53 struct northd_context
{
54 struct ovsdb_idl
*ovnnb_idl
;
55 struct ovsdb_idl
*ovnsb_idl
;
56 struct ovsdb_idl_txn
*ovnnb_txn
;
57 struct ovsdb_idl_txn
*ovnsb_txn
;
60 static const char *ovnnb_db
;
61 static const char *ovnsb_db
;
63 #define MAC_ADDR_PREFIX 0x0A0000000000ULL
64 #define MAC_ADDR_SPACE 0xffffff
66 /* MAC address management (macam) table of "struct eth_addr"s, that holds the
67 * MAC addresses allocated by the OVN ipam module. */
68 static struct hmap macam
= HMAP_INITIALIZER(&macam
);
70 #define MAX_OVN_TAGS 4096
72 /* Pipeline stages. */
74 /* The two pipelines in an OVN logical flow table. */
76 P_IN
, /* Ingress pipeline. */
77 P_OUT
/* Egress pipeline. */
80 /* The two purposes for which ovn-northd uses OVN logical datapaths. */
81 enum ovn_datapath_type
{
82 DP_SWITCH
, /* OVN logical switch. */
83 DP_ROUTER
/* OVN logical router. */
86 /* Returns an "enum ovn_stage" built from the arguments.
88 * (It's better to use ovn_stage_build() for type-safety reasons, but inline
89 * functions can't be used in enums or switch cases.) */
90 #define OVN_STAGE_BUILD(DP_TYPE, PIPELINE, TABLE) \
91 (((DP_TYPE) << 9) | ((PIPELINE) << 8) | (TABLE))
93 /* A stage within an OVN logical switch or router.
95 * An "enum ovn_stage" indicates whether the stage is part of a logical switch
96 * or router, whether the stage is part of the ingress or egress pipeline, and
97 * the table within that pipeline. The first three components are combined to
98 * form the stage's full name, e.g. S_SWITCH_IN_PORT_SEC_L2,
99 * S_ROUTER_OUT_DELIVERY. */
101 #define PIPELINE_STAGES \
102 /* Logical switch ingress stages. */ \
103 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_L2, 0, "ls_in_port_sec_l2") \
104 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_IP, 1, "ls_in_port_sec_ip") \
105 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_ND, 2, "ls_in_port_sec_nd") \
106 PIPELINE_STAGE(SWITCH, IN, PRE_ACL, 3, "ls_in_pre_acl") \
107 PIPELINE_STAGE(SWITCH, IN, PRE_LB, 4, "ls_in_pre_lb") \
108 PIPELINE_STAGE(SWITCH, IN, PRE_STATEFUL, 5, "ls_in_pre_stateful") \
109 PIPELINE_STAGE(SWITCH, IN, ACL, 6, "ls_in_acl") \
110 PIPELINE_STAGE(SWITCH, IN, QOS_MARK, 7, "ls_in_qos_mark") \
111 PIPELINE_STAGE(SWITCH, IN, LB, 8, "ls_in_lb") \
112 PIPELINE_STAGE(SWITCH, IN, STATEFUL, 9, "ls_in_stateful") \
113 PIPELINE_STAGE(SWITCH, IN, ARP_ND_RSP, 10, "ls_in_arp_rsp") \
114 PIPELINE_STAGE(SWITCH, IN, DHCP_OPTIONS, 11, "ls_in_dhcp_options") \
115 PIPELINE_STAGE(SWITCH, IN, DHCP_RESPONSE, 12, "ls_in_dhcp_response") \
116 PIPELINE_STAGE(SWITCH, IN, DNS_LOOKUP, 13, "ls_in_dns_lookup") \
117 PIPELINE_STAGE(SWITCH, IN, DNS_RESPONSE, 14, "ls_in_dns_response") \
118 PIPELINE_STAGE(SWITCH, IN, L2_LKUP, 15, "ls_in_l2_lkup") \
120 /* Logical switch egress stages. */ \
121 PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 0, "ls_out_pre_lb") \
122 PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 1, "ls_out_pre_acl") \
123 PIPELINE_STAGE(SWITCH, OUT, PRE_STATEFUL, 2, "ls_out_pre_stateful") \
124 PIPELINE_STAGE(SWITCH, OUT, LB, 3, "ls_out_lb") \
125 PIPELINE_STAGE(SWITCH, OUT, ACL, 4, "ls_out_acl") \
126 PIPELINE_STAGE(SWITCH, OUT, QOS_MARK, 5, "ls_out_qos_mark") \
127 PIPELINE_STAGE(SWITCH, OUT, STATEFUL, 6, "ls_out_stateful") \
128 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_IP, 7, "ls_out_port_sec_ip") \
129 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_L2, 8, "ls_out_port_sec_l2") \
131 /* Logical router ingress stages. */ \
132 PIPELINE_STAGE(ROUTER, IN, ADMISSION, 0, "lr_in_admission") \
133 PIPELINE_STAGE(ROUTER, IN, IP_INPUT, 1, "lr_in_ip_input") \
134 PIPELINE_STAGE(ROUTER, IN, DEFRAG, 2, "lr_in_defrag") \
135 PIPELINE_STAGE(ROUTER, IN, UNSNAT, 3, "lr_in_unsnat") \
136 PIPELINE_STAGE(ROUTER, IN, DNAT, 4, "lr_in_dnat") \
137 PIPELINE_STAGE(ROUTER, IN, ND_RA_OPTIONS, 5, "lr_in_nd_ra_options") \
138 PIPELINE_STAGE(ROUTER, IN, ND_RA_RESPONSE, 6, "lr_in_nd_ra_response") \
139 PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 7, "lr_in_ip_routing") \
140 PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 8, "lr_in_arp_resolve") \
141 PIPELINE_STAGE(ROUTER, IN, GW_REDIRECT, 9, "lr_in_gw_redirect") \
142 PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 10, "lr_in_arp_request") \
144 /* Logical router egress stages. */ \
145 PIPELINE_STAGE(ROUTER, OUT, UNDNAT, 0, "lr_out_undnat") \
146 PIPELINE_STAGE(ROUTER, OUT, SNAT, 1, "lr_out_snat") \
147 PIPELINE_STAGE(ROUTER, OUT, EGR_LOOP, 2, "lr_out_egr_loop") \
148 PIPELINE_STAGE(ROUTER, OUT, DELIVERY, 3, "lr_out_delivery")
150 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
151 S_##DP_TYPE##_##PIPELINE##_##STAGE \
152 = OVN_STAGE_BUILD(DP_##DP_TYPE, P_##PIPELINE, TABLE),
154 #undef PIPELINE_STAGE
157 /* Due to various hard-coded priorities need to implement ACLs, the
158 * northbound database supports a smaller range of ACL priorities than
159 * are available to logical flows. This value is added to an ACL
160 * priority to determine the ACL's logical flow priority. */
161 #define OVN_ACL_PRI_OFFSET 1000
163 /* Register definitions specific to switches. */
164 #define REGBIT_CONNTRACK_DEFRAG "reg0[0]"
165 #define REGBIT_CONNTRACK_COMMIT "reg0[1]"
166 #define REGBIT_CONNTRACK_NAT "reg0[2]"
167 #define REGBIT_DHCP_OPTS_RESULT "reg0[3]"
168 #define REGBIT_DNS_LOOKUP_RESULT "reg0[4]"
169 #define REGBIT_ND_RA_OPTS_RESULT "reg0[5]"
171 /* Register definitions for switches and routers. */
172 #define REGBIT_NAT_REDIRECT "reg9[0]"
173 /* Indicate that this packet has been recirculated using egress
174 * loopback. This allows certain checks to be bypassed, such as a
175 * logical router dropping packets with source IP address equals
176 * one of the logical router's own IP addresses. */
177 #define REGBIT_EGRESS_LOOPBACK "reg9[1]"
179 /* Returns an "enum ovn_stage" built from the arguments. */
180 static enum ovn_stage
181 ovn_stage_build(enum ovn_datapath_type dp_type
, enum ovn_pipeline pipeline
,
184 return OVN_STAGE_BUILD(dp_type
, pipeline
, table
);
187 /* Returns the pipeline to which 'stage' belongs. */
188 static enum ovn_pipeline
189 ovn_stage_get_pipeline(enum ovn_stage stage
)
191 return (stage
>> 8) & 1;
194 /* Returns the table to which 'stage' belongs. */
196 ovn_stage_get_table(enum ovn_stage stage
)
201 /* Returns a string name for 'stage'. */
203 ovn_stage_to_str(enum ovn_stage stage
)
206 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
207 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return NAME;
209 #undef PIPELINE_STAGE
210 default: return "<unknown>";
214 /* Returns the type of the datapath to which a flow with the given 'stage' may
216 static enum ovn_datapath_type
217 ovn_stage_to_datapath_type(enum ovn_stage stage
)
220 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
221 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return DP_##DP_TYPE;
223 #undef PIPELINE_STAGE
224 default: OVS_NOT_REACHED();
232 %s: OVN northbound management daemon\n\
233 usage: %s [OPTIONS]\n\
236 --ovnnb-db=DATABASE connect to ovn-nb database at DATABASE\n\
238 --ovnsb-db=DATABASE connect to ovn-sb database at DATABASE\n\
240 -h, --help display this help message\n\
241 -o, --options list available options\n\
242 -V, --version display version information\n\
243 ", program_name
, program_name
, default_nb_db(), default_sb_db());
246 stream_usage("database", true, true, false);
250 struct hmap_node hmap_node
;
255 destroy_tnlids(struct hmap
*tnlids
)
257 struct tnlid_node
*node
;
258 HMAP_FOR_EACH_POP (node
, hmap_node
, tnlids
) {
261 hmap_destroy(tnlids
);
265 add_tnlid(struct hmap
*set
, uint32_t tnlid
)
267 struct tnlid_node
*node
= xmalloc(sizeof *node
);
268 hmap_insert(set
, &node
->hmap_node
, hash_int(tnlid
, 0));
273 tnlid_in_use(const struct hmap
*set
, uint32_t tnlid
)
275 const struct tnlid_node
*node
;
276 HMAP_FOR_EACH_IN_BUCKET (node
, hmap_node
, hash_int(tnlid
, 0), set
) {
277 if (node
->tnlid
== tnlid
) {
285 allocate_tnlid(struct hmap
*set
, const char *name
, uint32_t max
,
288 for (uint32_t tnlid
= *hint
+ 1; tnlid
!= *hint
;
289 tnlid
= tnlid
+ 1 <= max
? tnlid
+ 1 : 1) {
290 if (!tnlid_in_use(set
, tnlid
)) {
291 add_tnlid(set
, tnlid
);
297 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
298 VLOG_WARN_RL(&rl
, "all %s tunnel ids exhausted", name
);
302 struct ovn_chassis_qdisc_queues
{
303 struct hmap_node key_node
;
305 struct uuid chassis_uuid
;
309 destroy_chassis_queues(struct hmap
*set
)
311 struct ovn_chassis_qdisc_queues
*node
;
312 HMAP_FOR_EACH_POP (node
, key_node
, set
) {
319 add_chassis_queue(struct hmap
*set
, struct uuid
*chassis_uuid
,
322 struct ovn_chassis_qdisc_queues
*node
= xmalloc(sizeof *node
);
323 node
->queue_id
= queue_id
;
324 memcpy(&node
->chassis_uuid
, chassis_uuid
, sizeof node
->chassis_uuid
);
325 hmap_insert(set
, &node
->key_node
, uuid_hash(chassis_uuid
));
329 chassis_queueid_in_use(const struct hmap
*set
, struct uuid
*chassis_uuid
,
332 const struct ovn_chassis_qdisc_queues
*node
;
333 HMAP_FOR_EACH_WITH_HASH (node
, key_node
, uuid_hash(chassis_uuid
), set
) {
334 if (uuid_equals(chassis_uuid
, &node
->chassis_uuid
)
335 && node
->queue_id
== queue_id
) {
343 allocate_chassis_queueid(struct hmap
*set
, struct sbrec_chassis
*chassis
)
345 for (uint32_t queue_id
= QDISC_MIN_QUEUE_ID
+ 1;
346 queue_id
<= QDISC_MAX_QUEUE_ID
;
348 if (!chassis_queueid_in_use(set
, &chassis
->header_
.uuid
, queue_id
)) {
349 add_chassis_queue(set
, &chassis
->header_
.uuid
, queue_id
);
354 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
355 VLOG_WARN_RL(&rl
, "all %s queue ids exhausted", chassis
->name
);
360 free_chassis_queueid(struct hmap
*set
, struct sbrec_chassis
*chassis
,
363 struct ovn_chassis_qdisc_queues
*node
;
364 HMAP_FOR_EACH_WITH_HASH (node
, key_node
,
365 uuid_hash(&chassis
->header_
.uuid
),
367 if (uuid_equals(&chassis
->header_
.uuid
, &node
->chassis_uuid
)
368 && node
->queue_id
== queue_id
) {
369 hmap_remove(set
, &node
->key_node
);
376 port_has_qos_params(const struct smap
*opts
)
378 return (smap_get(opts
, "qos_max_rate") ||
379 smap_get(opts
, "qos_burst"));
386 unsigned long *allocated_ipv4s
; /* A bitmap of allocated IPv4s */
387 bool ipv6_prefix_set
;
388 struct in6_addr ipv6_prefix
;
391 /* The 'key' comes from nbs->header_.uuid or nbr->header_.uuid or
392 * sb->external_ids:logical-switch. */
393 struct ovn_datapath
{
394 struct hmap_node key_node
; /* Index on 'key'. */
395 struct uuid key
; /* (nbs/nbr)->header_.uuid. */
397 const struct nbrec_logical_switch
*nbs
; /* May be NULL. */
398 const struct nbrec_logical_router
*nbr
; /* May be NULL. */
399 const struct sbrec_datapath_binding
*sb
; /* May be NULL. */
401 struct ovs_list list
; /* In list of similar records. */
403 /* Logical switch data. */
404 struct ovn_port
**router_ports
;
405 size_t n_router_ports
;
407 struct hmap port_tnlids
;
408 uint32_t port_key_hint
;
413 struct ipam_info
*ipam_info
;
415 /* OVN northd only needs to know about the logical router gateway port for
416 * NAT on a distributed router. This "distributed gateway port" is
417 * populated only when there is a "redirect-chassis" specified for one of
418 * the ports on the logical router. Otherwise this will be NULL. */
419 struct ovn_port
*l3dgw_port
;
420 /* The "derived" OVN port representing the instance of l3dgw_port on
421 * the "redirect-chassis". */
422 struct ovn_port
*l3redirect_port
;
423 struct ovn_port
*localnet_port
;
427 struct hmap_node hmap_node
;
428 struct eth_addr mac_addr
; /* Allocated MAC address. */
432 cleanup_macam(struct hmap
*macam
)
434 struct macam_node
*node
;
435 HMAP_FOR_EACH_POP (node
, hmap_node
, macam
) {
440 static struct ovn_datapath
*
441 ovn_datapath_create(struct hmap
*datapaths
, const struct uuid
*key
,
442 const struct nbrec_logical_switch
*nbs
,
443 const struct nbrec_logical_router
*nbr
,
444 const struct sbrec_datapath_binding
*sb
)
446 struct ovn_datapath
*od
= xzalloc(sizeof *od
);
451 hmap_init(&od
->port_tnlids
);
452 od
->port_key_hint
= 0;
453 hmap_insert(datapaths
, &od
->key_node
, uuid_hash(&od
->key
));
458 ovn_datapath_destroy(struct hmap
*datapaths
, struct ovn_datapath
*od
)
461 /* Don't remove od->list. It is used within build_datapaths() as a
462 * private list and once we've exited that function it is not safe to
464 hmap_remove(datapaths
, &od
->key_node
);
465 destroy_tnlids(&od
->port_tnlids
);
467 bitmap_free(od
->ipam_info
->allocated_ipv4s
);
470 free(od
->router_ports
);
475 /* Returns 'od''s datapath type. */
476 static enum ovn_datapath_type
477 ovn_datapath_get_type(const struct ovn_datapath
*od
)
479 return od
->nbs
? DP_SWITCH
: DP_ROUTER
;
482 static struct ovn_datapath
*
483 ovn_datapath_find(struct hmap
*datapaths
, const struct uuid
*uuid
)
485 struct ovn_datapath
*od
;
487 HMAP_FOR_EACH_WITH_HASH (od
, key_node
, uuid_hash(uuid
), datapaths
) {
488 if (uuid_equals(uuid
, &od
->key
)) {
495 static struct ovn_datapath
*
496 ovn_datapath_from_sbrec(struct hmap
*datapaths
,
497 const struct sbrec_datapath_binding
*sb
)
501 if (!smap_get_uuid(&sb
->external_ids
, "logical-switch", &key
) &&
502 !smap_get_uuid(&sb
->external_ids
, "logical-router", &key
)) {
505 return ovn_datapath_find(datapaths
, &key
);
509 lrouter_is_enabled(const struct nbrec_logical_router
*lrouter
)
511 return !lrouter
->enabled
|| *lrouter
->enabled
;
515 init_ipam_info_for_datapath(struct ovn_datapath
*od
)
521 const char *subnet_str
= smap_get(&od
->nbs
->other_config
, "subnet");
522 const char *ipv6_prefix
= smap_get(&od
->nbs
->other_config
, "ipv6_prefix");
525 od
->ipam_info
= xzalloc(sizeof *od
->ipam_info
);
526 od
->ipam_info
->ipv6_prefix_set
= ipv6_parse(
527 ipv6_prefix
, &od
->ipam_info
->ipv6_prefix
);
534 ovs_be32 subnet
, mask
;
535 char *error
= ip_parse_masked(subnet_str
, &subnet
, &mask
);
536 if (error
|| mask
== OVS_BE32_MAX
|| !ip_is_cidr(mask
)) {
537 static struct vlog_rate_limit rl
538 = VLOG_RATE_LIMIT_INIT(5, 1);
539 VLOG_WARN_RL(&rl
, "bad 'subnet' %s", subnet_str
);
544 if (!od
->ipam_info
) {
545 od
->ipam_info
= xzalloc(sizeof *od
->ipam_info
);
547 od
->ipam_info
->start_ipv4
= ntohl(subnet
) + 1;
548 od
->ipam_info
->total_ipv4s
= ~ntohl(mask
);
549 od
->ipam_info
->allocated_ipv4s
=
550 bitmap_allocate(od
->ipam_info
->total_ipv4s
);
552 /* Mark first IP as taken */
553 bitmap_set1(od
->ipam_info
->allocated_ipv4s
, 0);
555 /* Check if there are any reserver IPs (list) to be excluded from IPAM */
556 const char *exclude_ip_list
= smap_get(&od
->nbs
->other_config
,
558 if (!exclude_ip_list
) {
563 lexer_init(&lexer
, exclude_ip_list
);
564 /* exclude_ip_list could be in the format -
565 * "10.0.0.4 10.0.0.10 10.0.0.20..10.0.0.50 10.0.0.100..10.0.0.110".
568 while (lexer
.token
.type
!= LEX_T_END
) {
569 if (lexer
.token
.type
!= LEX_T_INTEGER
) {
570 lexer_syntax_error(&lexer
, "expecting address");
573 uint32_t start
= ntohl(lexer
.token
.value
.ipv4
);
576 uint32_t end
= start
+ 1;
577 if (lexer_match(&lexer
, LEX_T_ELLIPSIS
)) {
578 if (lexer
.token
.type
!= LEX_T_INTEGER
) {
579 lexer_syntax_error(&lexer
, "expecting address range");
582 end
= ntohl(lexer
.token
.value
.ipv4
) + 1;
586 /* Clamp start...end to fit the subnet. */
587 start
= MAX(od
->ipam_info
->start_ipv4
, start
);
588 end
= MIN(od
->ipam_info
->start_ipv4
+ od
->ipam_info
->total_ipv4s
, end
);
590 bitmap_set_multiple(od
->ipam_info
->allocated_ipv4s
,
591 start
- od
->ipam_info
->start_ipv4
,
594 lexer_error(&lexer
, "excluded addresses not in subnet");
598 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
599 VLOG_WARN_RL(&rl
, "logical switch "UUID_FMT
": bad exclude_ips (%s)",
600 UUID_ARGS(&od
->key
), lexer
.error
);
602 lexer_destroy(&lexer
);
606 ovn_datapath_update_external_ids(struct ovn_datapath
*od
)
608 /* Get the logical-switch or logical-router UUID to set in
610 char uuid_s
[UUID_LEN
+ 1];
611 sprintf(uuid_s
, UUID_FMT
, UUID_ARGS(&od
->key
));
612 const char *key
= od
->nbs
? "logical-switch" : "logical-router";
614 /* Get names to set in external-ids. */
615 const char *name
= od
->nbs
? od
->nbs
->name
: od
->nbr
->name
;
616 const char *name2
= (od
->nbs
617 ? smap_get(&od
->nbs
->external_ids
,
618 "neutron:network_name")
619 : smap_get(&od
->nbr
->external_ids
,
620 "neutron:router_name"));
622 /* Set external-ids. */
623 struct smap ids
= SMAP_INITIALIZER(&ids
);
624 smap_add(&ids
, key
, uuid_s
);
625 smap_add(&ids
, "name", name
);
626 if (name2
&& name2
[0]) {
627 smap_add(&ids
, "name2", name2
);
629 sbrec_datapath_binding_set_external_ids(od
->sb
, &ids
);
634 join_datapaths(struct northd_context
*ctx
, struct hmap
*datapaths
,
635 struct ovs_list
*sb_only
, struct ovs_list
*nb_only
,
636 struct ovs_list
*both
)
638 hmap_init(datapaths
);
639 ovs_list_init(sb_only
);
640 ovs_list_init(nb_only
);
643 const struct sbrec_datapath_binding
*sb
, *sb_next
;
644 SBREC_DATAPATH_BINDING_FOR_EACH_SAFE (sb
, sb_next
, ctx
->ovnsb_idl
) {
646 if (!smap_get_uuid(&sb
->external_ids
, "logical-switch", &key
) &&
647 !smap_get_uuid(&sb
->external_ids
, "logical-router", &key
)) {
648 ovsdb_idl_txn_add_comment(
650 "deleting Datapath_Binding "UUID_FMT
" that lacks "
651 "external-ids:logical-switch and "
652 "external-ids:logical-router",
653 UUID_ARGS(&sb
->header_
.uuid
));
654 sbrec_datapath_binding_delete(sb
);
658 if (ovn_datapath_find(datapaths
, &key
)) {
659 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
661 &rl
, "deleting Datapath_Binding "UUID_FMT
" with "
662 "duplicate external-ids:logical-switch/router "UUID_FMT
,
663 UUID_ARGS(&sb
->header_
.uuid
), UUID_ARGS(&key
));
664 sbrec_datapath_binding_delete(sb
);
668 struct ovn_datapath
*od
= ovn_datapath_create(datapaths
, &key
,
670 ovs_list_push_back(sb_only
, &od
->list
);
673 const struct nbrec_logical_switch
*nbs
;
674 NBREC_LOGICAL_SWITCH_FOR_EACH (nbs
, ctx
->ovnnb_idl
) {
675 struct ovn_datapath
*od
= ovn_datapath_find(datapaths
,
679 ovs_list_remove(&od
->list
);
680 ovs_list_push_back(both
, &od
->list
);
681 ovn_datapath_update_external_ids(od
);
683 od
= ovn_datapath_create(datapaths
, &nbs
->header_
.uuid
,
685 ovs_list_push_back(nb_only
, &od
->list
);
688 init_ipam_info_for_datapath(od
);
691 const struct nbrec_logical_router
*nbr
;
692 NBREC_LOGICAL_ROUTER_FOR_EACH (nbr
, ctx
->ovnnb_idl
) {
693 if (!lrouter_is_enabled(nbr
)) {
697 struct ovn_datapath
*od
= ovn_datapath_find(datapaths
,
702 ovs_list_remove(&od
->list
);
703 ovs_list_push_back(both
, &od
->list
);
704 ovn_datapath_update_external_ids(od
);
707 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
709 "duplicate UUID "UUID_FMT
" in OVN_Northbound",
710 UUID_ARGS(&nbr
->header_
.uuid
));
714 od
= ovn_datapath_create(datapaths
, &nbr
->header_
.uuid
,
716 ovs_list_push_back(nb_only
, &od
->list
);
722 ovn_datapath_allocate_key(struct hmap
*dp_tnlids
)
724 static uint32_t hint
;
725 return allocate_tnlid(dp_tnlids
, "datapath", (1u << 24) - 1, &hint
);
728 /* Updates the southbound Datapath_Binding table so that it contains the
729 * logical switches and routers specified by the northbound database.
731 * Initializes 'datapaths' to contain a "struct ovn_datapath" for every logical
732 * switch and router. */
734 build_datapaths(struct northd_context
*ctx
, struct hmap
*datapaths
)
736 struct ovs_list sb_only
, nb_only
, both
;
738 join_datapaths(ctx
, datapaths
, &sb_only
, &nb_only
, &both
);
740 if (!ovs_list_is_empty(&nb_only
)) {
741 /* First index the in-use datapath tunnel IDs. */
742 struct hmap dp_tnlids
= HMAP_INITIALIZER(&dp_tnlids
);
743 struct ovn_datapath
*od
;
744 LIST_FOR_EACH (od
, list
, &both
) {
745 add_tnlid(&dp_tnlids
, od
->sb
->tunnel_key
);
748 /* Add southbound record for each unmatched northbound record. */
749 LIST_FOR_EACH (od
, list
, &nb_only
) {
750 uint16_t tunnel_key
= ovn_datapath_allocate_key(&dp_tnlids
);
755 od
->sb
= sbrec_datapath_binding_insert(ctx
->ovnsb_txn
);
756 ovn_datapath_update_external_ids(od
);
757 sbrec_datapath_binding_set_tunnel_key(od
->sb
, tunnel_key
);
759 destroy_tnlids(&dp_tnlids
);
762 /* Delete southbound records without northbound matches. */
763 struct ovn_datapath
*od
, *next
;
764 LIST_FOR_EACH_SAFE (od
, next
, list
, &sb_only
) {
765 ovs_list_remove(&od
->list
);
766 sbrec_datapath_binding_delete(od
->sb
);
767 ovn_datapath_destroy(datapaths
, od
);
772 struct hmap_node key_node
; /* Index on 'key'. */
773 char *key
; /* nbs->name, nbr->name, sb->logical_port. */
774 char *json_key
; /* 'key', quoted for use in JSON. */
776 const struct sbrec_port_binding
*sb
; /* May be NULL. */
778 /* Logical switch port data. */
779 const struct nbrec_logical_switch_port
*nbsp
; /* May be NULL. */
781 struct lport_addresses
*lsp_addrs
; /* Logical switch port addresses. */
782 unsigned int n_lsp_addrs
;
784 struct lport_addresses
*ps_addrs
; /* Port security addresses. */
785 unsigned int n_ps_addrs
;
787 /* Logical router port data. */
788 const struct nbrec_logical_router_port
*nbrp
; /* May be NULL. */
790 struct lport_addresses lrp_networks
;
792 bool derived
; /* Indicates whether this is an additional port
793 * derived from nbsp or nbrp. */
797 * - A switch port S of type "router" has a router port R as a peer,
798 * and R in turn has S has its peer.
800 * - Two connected logical router ports have each other as peer. */
801 struct ovn_port
*peer
;
803 struct ovn_datapath
*od
;
805 struct ovs_list list
; /* In list of similar records. */
808 static struct ovn_port
*
809 ovn_port_create(struct hmap
*ports
, const char *key
,
810 const struct nbrec_logical_switch_port
*nbsp
,
811 const struct nbrec_logical_router_port
*nbrp
,
812 const struct sbrec_port_binding
*sb
)
814 struct ovn_port
*op
= xzalloc(sizeof *op
);
816 struct ds json_key
= DS_EMPTY_INITIALIZER
;
817 json_string_escape(key
, &json_key
);
818 op
->json_key
= ds_steal_cstr(&json_key
);
820 op
->key
= xstrdup(key
);
825 hmap_insert(ports
, &op
->key_node
, hash_string(op
->key
, 0));
830 ovn_port_destroy(struct hmap
*ports
, struct ovn_port
*port
)
833 /* Don't remove port->list. It is used within build_ports() as a
834 * private list and once we've exited that function it is not safe to
836 hmap_remove(ports
, &port
->key_node
);
838 for (int i
= 0; i
< port
->n_lsp_addrs
; i
++) {
839 destroy_lport_addresses(&port
->lsp_addrs
[i
]);
841 free(port
->lsp_addrs
);
843 for (int i
= 0; i
< port
->n_ps_addrs
; i
++) {
844 destroy_lport_addresses(&port
->ps_addrs
[i
]);
846 free(port
->ps_addrs
);
848 destroy_lport_addresses(&port
->lrp_networks
);
849 free(port
->json_key
);
855 static struct ovn_port
*
856 ovn_port_find(struct hmap
*ports
, const char *name
)
860 HMAP_FOR_EACH_WITH_HASH (op
, key_node
, hash_string(name
, 0), ports
) {
861 if (!strcmp(op
->key
, name
)) {
869 ovn_port_allocate_key(struct ovn_datapath
*od
)
871 return allocate_tnlid(&od
->port_tnlids
, "port",
872 (1u << 15) - 1, &od
->port_key_hint
);
876 chassis_redirect_name(const char *port_name
)
878 return xasprintf("cr-%s", port_name
);
882 ipam_is_duplicate_mac(struct eth_addr
*ea
, uint64_t mac64
, bool warn
)
884 struct macam_node
*macam_node
;
885 HMAP_FOR_EACH_WITH_HASH (macam_node
, hmap_node
, hash_uint64(mac64
),
887 if (eth_addr_equals(*ea
, macam_node
->mac_addr
)) {
889 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
890 VLOG_WARN_RL(&rl
, "Duplicate MAC set: "ETH_ADDR_FMT
,
891 ETH_ADDR_ARGS(macam_node
->mac_addr
));
900 ipam_insert_mac(struct eth_addr
*ea
, bool check
)
906 uint64_t mac64
= eth_addr_to_uint64(*ea
);
907 /* If the new MAC was not assigned by this address management system or
908 * check is true and the new MAC is a duplicate, do not insert it into the
910 if (((mac64
^ MAC_ADDR_PREFIX
) >> 24)
911 || (check
&& ipam_is_duplicate_mac(ea
, mac64
, true))) {
915 struct macam_node
*new_macam_node
= xmalloc(sizeof *new_macam_node
);
916 new_macam_node
->mac_addr
= *ea
;
917 hmap_insert(&macam
, &new_macam_node
->hmap_node
, hash_uint64(mac64
));
921 ipam_insert_ip(struct ovn_datapath
*od
, uint32_t ip
)
923 if (!od
|| !od
->ipam_info
|| !od
->ipam_info
->allocated_ipv4s
) {
927 if (ip
>= od
->ipam_info
->start_ipv4
&&
928 ip
< (od
->ipam_info
->start_ipv4
+ od
->ipam_info
->total_ipv4s
)) {
929 bitmap_set1(od
->ipam_info
->allocated_ipv4s
,
930 ip
- od
->ipam_info
->start_ipv4
);
935 ipam_insert_lsp_addresses(struct ovn_datapath
*od
, struct ovn_port
*op
,
938 if (!od
|| !op
|| !address
|| !strcmp(address
, "unknown")
939 || !strcmp(address
, "router") || is_dynamic_lsp_address(address
)) {
943 struct lport_addresses laddrs
;
944 if (!extract_lsp_addresses(address
, &laddrs
)) {
945 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
946 VLOG_WARN_RL(&rl
, "Extract addresses failed.");
949 ipam_insert_mac(&laddrs
.ea
, true);
951 /* IP is only added to IPAM if the switch's subnet option
952 * is set, whereas MAC is always added to MACAM. */
953 if (!od
->ipam_info
|| !od
->ipam_info
->allocated_ipv4s
) {
954 destroy_lport_addresses(&laddrs
);
958 for (size_t j
= 0; j
< laddrs
.n_ipv4_addrs
; j
++) {
959 uint32_t ip
= ntohl(laddrs
.ipv4_addrs
[j
].addr
);
960 ipam_insert_ip(od
, ip
);
963 destroy_lport_addresses(&laddrs
);
967 ipam_add_port_addresses(struct ovn_datapath
*od
, struct ovn_port
*op
)
974 /* Add all the port's addresses to address data structures. */
975 for (size_t i
= 0; i
< op
->nbsp
->n_addresses
; i
++) {
976 ipam_insert_lsp_addresses(od
, op
, op
->nbsp
->addresses
[i
]);
978 if (op
->nbsp
->dynamic_addresses
) {
979 ipam_insert_lsp_addresses(od
, op
, op
->nbsp
->dynamic_addresses
);
981 } else if (op
->nbrp
) {
982 struct lport_addresses lrp_networks
;
983 if (!extract_lrp_networks(op
->nbrp
, &lrp_networks
)) {
984 static struct vlog_rate_limit rl
985 = VLOG_RATE_LIMIT_INIT(1, 1);
986 VLOG_WARN_RL(&rl
, "Extract addresses failed.");
989 ipam_insert_mac(&lrp_networks
.ea
, true);
991 if (!op
->peer
|| !op
->peer
->nbsp
|| !op
->peer
->od
|| !op
->peer
->od
->nbs
992 || !smap_get(&op
->peer
->od
->nbs
->other_config
, "subnet")) {
993 destroy_lport_addresses(&lrp_networks
);
997 for (size_t i
= 0; i
< lrp_networks
.n_ipv4_addrs
; i
++) {
998 uint32_t ip
= ntohl(lrp_networks
.ipv4_addrs
[i
].addr
);
999 ipam_insert_ip(op
->peer
->od
, ip
);
1002 destroy_lport_addresses(&lrp_networks
);
1007 ipam_get_unused_mac(void)
1009 /* Stores the suffix of the most recently ipam-allocated MAC address. */
1010 static uint32_t last_mac
;
1013 struct eth_addr mac
;
1014 uint32_t mac_addr_suffix
, i
;
1015 for (i
= 0; i
< MAC_ADDR_SPACE
- 1; i
++) {
1016 /* The tentative MAC's suffix will be in the interval (1, 0xfffffe). */
1017 mac_addr_suffix
= ((last_mac
+ i
) % (MAC_ADDR_SPACE
- 1)) + 1;
1018 mac64
= MAC_ADDR_PREFIX
| mac_addr_suffix
;
1019 eth_addr_from_uint64(mac64
, &mac
);
1020 if (!ipam_is_duplicate_mac(&mac
, mac64
, false)) {
1021 last_mac
= mac_addr_suffix
;
1026 if (i
== MAC_ADDR_SPACE
) {
1027 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
1028 VLOG_WARN_RL(&rl
, "MAC address space exhausted.");
1036 ipam_get_unused_ip(struct ovn_datapath
*od
)
1038 if (!od
|| !od
->ipam_info
|| !od
->ipam_info
->allocated_ipv4s
) {
1042 size_t new_ip_index
= bitmap_scan(od
->ipam_info
->allocated_ipv4s
, 0, 0,
1043 od
->ipam_info
->total_ipv4s
- 1);
1044 if (new_ip_index
== od
->ipam_info
->total_ipv4s
- 1) {
1045 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
1046 VLOG_WARN_RL( &rl
, "Subnet address space has been exhausted.");
1050 return od
->ipam_info
->start_ipv4
+ new_ip_index
;
1054 ipam_allocate_addresses(struct ovn_datapath
*od
, struct ovn_port
*op
,
1055 const char *addrspec
)
1057 if (!op
->nbsp
|| !od
->ipam_info
) {
1061 /* Get or generate MAC address. */
1062 struct eth_addr mac
;
1065 if (ovs_scan(addrspec
, ETH_ADDR_SCAN_FMT
" dynamic%n",
1066 ETH_ADDR_SCAN_ARGS(mac
), &n
)
1067 && addrspec
[n
] == '\0') {
1068 dynamic_mac
= false;
1070 uint64_t mac64
= ipam_get_unused_mac();
1074 eth_addr_from_uint64(mac64
, &mac
);
1078 /* Generate IPv4 address, if desirable. */
1079 bool dynamic_ip4
= od
->ipam_info
->allocated_ipv4s
!= NULL
;
1080 uint32_t ip4
= dynamic_ip4
? ipam_get_unused_ip(od
) : 0;
1082 /* Generate IPv6 address, if desirable. */
1083 bool dynamic_ip6
= od
->ipam_info
->ipv6_prefix_set
;
1084 struct in6_addr ip6
;
1086 in6_generate_eui64(mac
, &od
->ipam_info
->ipv6_prefix
, &ip6
);
1089 /* If we didn't generate anything, bail out. */
1090 if (!dynamic_ip4
&& !dynamic_ip6
) {
1094 /* Save the dynamic addresses. */
1095 struct ds new_addr
= DS_EMPTY_INITIALIZER
;
1096 ds_put_format(&new_addr
, ETH_ADDR_FMT
, ETH_ADDR_ARGS(mac
));
1097 if (dynamic_ip4
&& ip4
) {
1098 ipam_insert_ip(od
, ip4
);
1099 ds_put_format(&new_addr
, " "IP_FMT
, IP_ARGS(htonl(ip4
)));
1102 char ip6_s
[INET6_ADDRSTRLEN
+ 1];
1103 ipv6_string_mapped(ip6_s
, &ip6
);
1104 ds_put_format(&new_addr
, " %s", ip6_s
);
1106 ipam_insert_mac(&mac
, !dynamic_mac
);
1107 nbrec_logical_switch_port_set_dynamic_addresses(op
->nbsp
,
1108 ds_cstr(&new_addr
));
1109 ds_destroy(&new_addr
);
1114 build_ipam(struct hmap
*datapaths
, struct hmap
*ports
)
1116 /* IPAM generally stands for IP address management. In non-virtualized
1117 * world, MAC addresses come with the hardware. But, with virtualized
1118 * workloads, they need to be assigned and managed. This function
1119 * does both IP address management (ipam) and MAC address management
1122 /* If the switch's other_config:subnet is set, allocate new addresses for
1123 * ports that have the "dynamic" keyword in their addresses column. */
1124 struct ovn_datapath
*od
;
1125 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
1126 if (!od
->nbs
|| !od
->ipam_info
) {
1130 struct ovn_port
*op
;
1131 for (size_t i
= 0; i
< od
->nbs
->n_ports
; i
++) {
1132 const struct nbrec_logical_switch_port
*nbsp
=
1139 op
= ovn_port_find(ports
, nbsp
->name
);
1140 if (!op
|| (op
->nbsp
&& op
->peer
)) {
1141 /* Do not allocate addresses for logical switch ports that
1146 for (size_t j
= 0; j
< nbsp
->n_addresses
; j
++) {
1147 if (is_dynamic_lsp_address(nbsp
->addresses
[j
])
1148 && !nbsp
->dynamic_addresses
) {
1149 if (!ipam_allocate_addresses(od
, op
, nbsp
->addresses
[j
])
1150 || !extract_lsp_addresses(nbsp
->dynamic_addresses
,
1151 &op
->lsp_addrs
[op
->n_lsp_addrs
])) {
1152 static struct vlog_rate_limit rl
1153 = VLOG_RATE_LIMIT_INIT(1, 1);
1154 VLOG_INFO_RL(&rl
, "Failed to allocate address.");
1162 if (!nbsp
->n_addresses
&& nbsp
->dynamic_addresses
) {
1163 nbrec_logical_switch_port_set_dynamic_addresses(op
->nbsp
,
1170 /* Tag allocation for nested containers.
1172 * For a logical switch port with 'parent_name' and a request to allocate tags,
1173 * keeps a track of all allocated tags. */
1174 struct tag_alloc_node
{
1175 struct hmap_node hmap_node
;
1177 unsigned long *allocated_tags
; /* A bitmap to track allocated tags. */
1181 tag_alloc_destroy(struct hmap
*tag_alloc_table
)
1183 struct tag_alloc_node
*node
;
1184 HMAP_FOR_EACH_POP (node
, hmap_node
, tag_alloc_table
) {
1185 bitmap_free(node
->allocated_tags
);
1186 free(node
->parent_name
);
1189 hmap_destroy(tag_alloc_table
);
1192 static struct tag_alloc_node
*
1193 tag_alloc_get_node(struct hmap
*tag_alloc_table
, const char *parent_name
)
1195 /* If a node for the 'parent_name' exists, return it. */
1196 struct tag_alloc_node
*tag_alloc_node
;
1197 HMAP_FOR_EACH_WITH_HASH (tag_alloc_node
, hmap_node
,
1198 hash_string(parent_name
, 0),
1200 if (!strcmp(tag_alloc_node
->parent_name
, parent_name
)) {
1201 return tag_alloc_node
;
1205 /* Create a new node. */
1206 tag_alloc_node
= xmalloc(sizeof *tag_alloc_node
);
1207 tag_alloc_node
->parent_name
= xstrdup(parent_name
);
1208 tag_alloc_node
->allocated_tags
= bitmap_allocate(MAX_OVN_TAGS
);
1209 /* Tag 0 is invalid for nested containers. */
1210 bitmap_set1(tag_alloc_node
->allocated_tags
, 0);
1211 hmap_insert(tag_alloc_table
, &tag_alloc_node
->hmap_node
,
1212 hash_string(parent_name
, 0));
1214 return tag_alloc_node
;
1218 tag_alloc_add_existing_tags(struct hmap
*tag_alloc_table
,
1219 const struct nbrec_logical_switch_port
*nbsp
)
1221 /* Add the tags of already existing nested containers. If there is no
1222 * 'nbsp->parent_name' or no 'nbsp->tag' set, there is nothing to do. */
1223 if (!nbsp
->parent_name
|| !nbsp
->parent_name
[0] || !nbsp
->tag
) {
1227 struct tag_alloc_node
*tag_alloc_node
;
1228 tag_alloc_node
= tag_alloc_get_node(tag_alloc_table
, nbsp
->parent_name
);
1229 bitmap_set1(tag_alloc_node
->allocated_tags
, *nbsp
->tag
);
1233 tag_alloc_create_new_tag(struct hmap
*tag_alloc_table
,
1234 const struct nbrec_logical_switch_port
*nbsp
)
1236 if (!nbsp
->tag_request
) {
1240 if (nbsp
->parent_name
&& nbsp
->parent_name
[0]
1241 && *nbsp
->tag_request
== 0) {
1242 /* For nested containers that need allocation, do the allocation. */
1245 /* This has already been allocated. */
1249 struct tag_alloc_node
*tag_alloc_node
;
1251 tag_alloc_node
= tag_alloc_get_node(tag_alloc_table
,
1253 tag
= bitmap_scan(tag_alloc_node
->allocated_tags
, 0, 1, MAX_OVN_TAGS
);
1254 if (tag
== MAX_OVN_TAGS
) {
1255 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
1256 VLOG_ERR_RL(&rl
, "out of vlans for logical switch ports with "
1257 "parent %s", nbsp
->parent_name
);
1260 bitmap_set1(tag_alloc_node
->allocated_tags
, tag
);
1261 nbrec_logical_switch_port_set_tag(nbsp
, &tag
, 1);
1262 } else if (*nbsp
->tag_request
!= 0) {
1263 /* For everything else, copy the contents of 'tag_request' to 'tag'. */
1264 nbrec_logical_switch_port_set_tag(nbsp
, nbsp
->tag_request
, 1);
1270 * This function checks if the MAC in "address" parameter (if present) is
1271 * different from the one stored in Logical_Switch_Port.dynamic_addresses
1275 check_and_update_mac_in_dynamic_addresses(
1276 const char *address
,
1277 const struct nbrec_logical_switch_port
*nbsp
)
1279 if (!nbsp
->dynamic_addresses
) {
1284 if (!ovs_scan_len(address
, &buf_index
,
1285 ETH_ADDR_SCAN_FMT
, ETH_ADDR_SCAN_ARGS(ea
))) {
1289 struct eth_addr present_ea
;
1291 if (ovs_scan_len(nbsp
->dynamic_addresses
, &buf_index
,
1292 ETH_ADDR_SCAN_FMT
, ETH_ADDR_SCAN_ARGS(present_ea
))
1293 && !eth_addr_equals(ea
, present_ea
)) {
1294 /* MAC address has changed. Update it */
1295 char *new_addr
= xasprintf(
1296 ETH_ADDR_FMT
"%s", ETH_ADDR_ARGS(ea
),
1297  
->dynamic_addresses
[buf_index
]);
1298 nbrec_logical_switch_port_set_dynamic_addresses(
1305 join_logical_ports(struct northd_context
*ctx
,
1306 struct hmap
*datapaths
, struct hmap
*ports
,
1307 struct hmap
*chassis_qdisc_queues
,
1308 struct hmap
*tag_alloc_table
, struct ovs_list
*sb_only
,
1309 struct ovs_list
*nb_only
, struct ovs_list
*both
)
1312 ovs_list_init(sb_only
);
1313 ovs_list_init(nb_only
);
1314 ovs_list_init(both
);
1316 const struct sbrec_port_binding
*sb
;
1317 SBREC_PORT_BINDING_FOR_EACH (sb
, ctx
->ovnsb_idl
) {
1318 struct ovn_port
*op
= ovn_port_create(ports
, sb
->logical_port
,
1320 ovs_list_push_back(sb_only
, &op
->list
);
1323 struct ovn_datapath
*od
;
1324 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
1326 for (size_t i
= 0; i
< od
->nbs
->n_ports
; i
++) {
1327 const struct nbrec_logical_switch_port
*nbsp
1328 = od
->nbs
->ports
[i
];
1329 struct ovn_port
*op
= ovn_port_find(ports
, nbsp
->name
);
1331 if (op
->nbsp
|| op
->nbrp
) {
1332 static struct vlog_rate_limit rl
1333 = VLOG_RATE_LIMIT_INIT(5, 1);
1334 VLOG_WARN_RL(&rl
, "duplicate logical port %s",
1339 ovs_list_remove(&op
->list
);
1341 uint32_t queue_id
= smap_get_int(&op
->sb
->options
,
1342 "qdisc_queue_id", 0);
1343 if (queue_id
&& op
->sb
->chassis
) {
1345 chassis_qdisc_queues
, &op
->sb
->chassis
->header_
.uuid
,
1349 ovs_list_push_back(both
, &op
->list
);
1351 /* This port exists due to a SB binding, but should
1352 * not have been initialized fully. */
1353 ovs_assert(!op
->n_lsp_addrs
&& !op
->n_ps_addrs
);
1355 op
= ovn_port_create(ports
, nbsp
->name
, nbsp
, NULL
, NULL
);
1356 ovs_list_push_back(nb_only
, &op
->list
);
1359 if (!strcmp(nbsp
->type
, "localnet")) {
1360 od
->localnet_port
= op
;
1364 = xmalloc(sizeof *op
->lsp_addrs
* nbsp
->n_addresses
);
1365 for (size_t j
= 0; j
< nbsp
->n_addresses
; j
++) {
1366 if (!strcmp(nbsp
->addresses
[j
], "unknown")
1367 || !strcmp(nbsp
->addresses
[j
], "router")) {
1370 if (is_dynamic_lsp_address(nbsp
->addresses
[j
])) {
1371 if (nbsp
->dynamic_addresses
) {
1372 check_and_update_mac_in_dynamic_addresses(
1373 nbsp
->addresses
[j
], nbsp
);
1374 if (!extract_lsp_addresses(nbsp
->dynamic_addresses
,
1375 &op
->lsp_addrs
[op
->n_lsp_addrs
])) {
1376 static struct vlog_rate_limit rl
1377 = VLOG_RATE_LIMIT_INIT(1, 1);
1378 VLOG_INFO_RL(&rl
, "invalid syntax '%s' in "
1379 "logical switch port "
1380 "dynamic_addresses. No "
1381 "MAC address found",
1382 op
->nbsp
->dynamic_addresses
);
1388 } else if (!extract_lsp_addresses(nbsp
->addresses
[j
],
1389 &op
->lsp_addrs
[op
->n_lsp_addrs
])) {
1390 static struct vlog_rate_limit rl
1391 = VLOG_RATE_LIMIT_INIT(1, 1);
1392 VLOG_INFO_RL(&rl
, "invalid syntax '%s' in logical "
1393 "switch port addresses. No MAC "
1395 op
->nbsp
->addresses
[j
]);
1402 = xmalloc(sizeof *op
->ps_addrs
* nbsp
->n_port_security
);
1403 for (size_t j
= 0; j
< nbsp
->n_port_security
; j
++) {
1404 if (!extract_lsp_addresses(nbsp
->port_security
[j
],
1405 &op
->ps_addrs
[op
->n_ps_addrs
])) {
1406 static struct vlog_rate_limit rl
1407 = VLOG_RATE_LIMIT_INIT(1, 1);
1408 VLOG_INFO_RL(&rl
, "invalid syntax '%s' in port "
1409 "security. No MAC address found",
1410 op
->nbsp
->port_security
[j
]);
1417 ipam_add_port_addresses(od
, op
);
1418 tag_alloc_add_existing_tags(tag_alloc_table
, nbsp
);
1421 for (size_t i
= 0; i
< od
->nbr
->n_ports
; i
++) {
1422 const struct nbrec_logical_router_port
*nbrp
1423 = od
->nbr
->ports
[i
];
1425 struct lport_addresses lrp_networks
;
1426 if (!extract_lrp_networks(nbrp
, &lrp_networks
)) {
1427 static struct vlog_rate_limit rl
1428 = VLOG_RATE_LIMIT_INIT(5, 1);
1429 VLOG_WARN_RL(&rl
, "bad 'mac' %s", nbrp
->mac
);
1433 if (!lrp_networks
.n_ipv4_addrs
&& !lrp_networks
.n_ipv6_addrs
) {
1437 struct ovn_port
*op
= ovn_port_find(ports
, nbrp
->name
);
1439 if (op
->nbsp
|| op
->nbrp
) {
1440 static struct vlog_rate_limit rl
1441 = VLOG_RATE_LIMIT_INIT(5, 1);
1442 VLOG_WARN_RL(&rl
, "duplicate logical router port %s",
1447 ovs_list_remove(&op
->list
);
1448 ovs_list_push_back(both
, &op
->list
);
1450 /* This port exists but should not have been
1451 * initialized fully. */
1452 ovs_assert(!op
->lrp_networks
.n_ipv4_addrs
1453 && !op
->lrp_networks
.n_ipv6_addrs
);
1455 op
= ovn_port_create(ports
, nbrp
->name
, NULL
, nbrp
, NULL
);
1456 ovs_list_push_back(nb_only
, &op
->list
);
1459 op
->lrp_networks
= lrp_networks
;
1461 ipam_add_port_addresses(op
->od
, op
);
1463 const char *redirect_chassis
= smap_get(&op
->nbrp
->options
,
1464 "redirect-chassis");
1465 if (redirect_chassis
|| op
->nbrp
->n_gateway_chassis
) {
1466 /* Additional "derived" ovn_port crp represents the
1467 * instance of op on the "redirect-chassis". */
1468 const char *gw_chassis
= smap_get(&op
->od
->nbr
->options
,
1471 static struct vlog_rate_limit rl
1472 = VLOG_RATE_LIMIT_INIT(1, 1);
1473 VLOG_WARN_RL(&rl
, "Bad configuration: "
1474 "redirect-chassis configured on port %s "
1475 "on L3 gateway router", nbrp
->name
);
1478 if (od
->l3dgw_port
|| od
->l3redirect_port
) {
1479 static struct vlog_rate_limit rl
1480 = VLOG_RATE_LIMIT_INIT(1, 1);
1481 VLOG_WARN_RL(&rl
, "Bad configuration: multiple ports "
1482 "with redirect-chassis on same logical "
1483 "router %s", od
->nbr
->name
);
1487 char *redirect_name
= chassis_redirect_name(nbrp
->name
);
1488 struct ovn_port
*crp
= ovn_port_find(ports
, redirect_name
);
1490 crp
->derived
= true;
1492 ovs_list_remove(&crp
->list
);
1493 ovs_list_push_back(both
, &crp
->list
);
1495 crp
= ovn_port_create(ports
, redirect_name
,
1497 crp
->derived
= true;
1498 ovs_list_push_back(nb_only
, &crp
->list
);
1501 free(redirect_name
);
1503 /* Set l3dgw_port and l3redirect_port in od, for later
1504 * use during flow creation. */
1505 od
->l3dgw_port
= op
;
1506 od
->l3redirect_port
= crp
;
1512 /* Connect logical router ports, and logical switch ports of type "router",
1513 * to their peers. */
1514 struct ovn_port
*op
;
1515 HMAP_FOR_EACH (op
, key_node
, ports
) {
1516 if (op
->nbsp
&& !strcmp(op
->nbsp
->type
, "router") && !op
->derived
) {
1517 const char *peer_name
= smap_get(&op
->nbsp
->options
, "router-port");
1522 struct ovn_port
*peer
= ovn_port_find(ports
, peer_name
);
1523 if (!peer
|| !peer
->nbrp
) {
1529 op
->od
->router_ports
= xrealloc(
1530 op
->od
->router_ports
,
1531 sizeof *op
->od
->router_ports
* (op
->od
->n_router_ports
+ 1));
1532 op
->od
->router_ports
[op
->od
->n_router_ports
++] = op
;
1534 /* Fill op->lsp_addrs for op->nbsp->addresses[] with
1535 * contents "router", which was skipped in the loop above. */
1536 for (size_t j
= 0; j
< op
->nbsp
->n_addresses
; j
++) {
1537 if (!strcmp(op
->nbsp
->addresses
[j
], "router")) {
1538 if (extract_lrp_networks(peer
->nbrp
,
1539 &op
->lsp_addrs
[op
->n_lsp_addrs
])) {
1545 } else if (op
->nbrp
&& op
->nbrp
->peer
&& !op
->derived
) {
1546 struct ovn_port
*peer
= ovn_port_find(ports
, op
->nbrp
->peer
);
1550 } else if (peer
->nbsp
) {
1551 /* An ovn_port for a switch port of type "router" does have
1552 * a router port as its peer (see the case above for
1553 * "router" ports), but this is set via options:router-port
1554 * in Logical_Switch_Port and does not involve the
1555 * Logical_Router_Port's 'peer' column. */
1556 static struct vlog_rate_limit rl
=
1557 VLOG_RATE_LIMIT_INIT(5, 1);
1558 VLOG_WARN_RL(&rl
, "Bad configuration: The peer of router "
1559 "port %s is a switch port", op
->key
);
1567 ip_address_and_port_from_lb_key(const char *key
, char **ip_address
,
1568 uint16_t *port
, int *addr_family
);
1571 get_router_load_balancer_ips(const struct ovn_datapath
*od
,
1572 struct sset
*all_ips
, int *addr_family
)
1578 for (int i
= 0; i
< od
->nbr
->n_load_balancer
; i
++) {
1579 struct nbrec_load_balancer
*lb
= od
->nbr
->load_balancer
[i
];
1580 struct smap
*vips
= &lb
->vips
;
1581 struct smap_node
*node
;
1583 SMAP_FOR_EACH (node
, vips
) {
1584 /* node->key contains IP:port or just IP. */
1585 char *ip_address
= NULL
;
1588 ip_address_and_port_from_lb_key(node
->key
, &ip_address
, &port
,
1594 if (!sset_contains(all_ips
, ip_address
)) {
1595 sset_add(all_ips
, ip_address
);
1603 /* Returns an array of strings, each consisting of a MAC address followed
1604 * by one or more IP addresses, and if the port is a distributed gateway
1605 * port, followed by 'is_chassis_resident("LPORT_NAME")', where the
1606 * LPORT_NAME is the name of the L3 redirect port or the name of the
1607 * logical_port specified in a NAT rule. These strings include the
1608 * external IP addresses of all NAT rules defined on that router, and all
1609 * of the IP addresses used in load balancer VIPs defined on that router.
1611 * The caller must free each of the n returned strings with free(),
1612 * and must free the returned array when it is no longer needed. */
1614 get_nat_addresses(const struct ovn_port
*op
, size_t *n
)
1617 struct eth_addr mac
;
1618 if (!op
->nbrp
|| !op
->od
|| !op
->od
->nbr
1619 || (!op
->od
->nbr
->n_nat
&& !op
->od
->nbr
->n_load_balancer
)
1620 || !eth_addr_from_string(op
->nbrp
->mac
, &mac
)) {
1625 struct ds c_addresses
= DS_EMPTY_INITIALIZER
;
1626 ds_put_format(&c_addresses
, ETH_ADDR_FMT
, ETH_ADDR_ARGS(mac
));
1627 bool central_ip_address
= false;
1630 addresses
= xmalloc(sizeof *addresses
* (op
->od
->nbr
->n_nat
+ 1));
1632 /* Get NAT IP addresses. */
1633 for (size_t i
= 0; i
< op
->od
->nbr
->n_nat
; i
++) {
1634 const struct nbrec_nat
*nat
= op
->od
->nbr
->nat
[i
];
1637 char *error
= ip_parse_masked(nat
->external_ip
, &ip
, &mask
);
1638 if (error
|| mask
!= OVS_BE32_MAX
) {
1643 /* Determine whether this NAT rule satisfies the conditions for
1644 * distributed NAT processing. */
1645 if (op
->od
->l3redirect_port
&& !strcmp(nat
->type
, "dnat_and_snat")
1646 && nat
->logical_port
&& nat
->external_mac
) {
1647 /* Distributed NAT rule. */
1648 if (eth_addr_from_string(nat
->external_mac
, &mac
)) {
1649 struct ds address
= DS_EMPTY_INITIALIZER
;
1650 ds_put_format(&address
, ETH_ADDR_FMT
, ETH_ADDR_ARGS(mac
));
1651 ds_put_format(&address
, " %s", nat
->external_ip
);
1652 ds_put_format(&address
, " is_chassis_resident(\"%s\")",
1654 addresses
[n_nats
++] = ds_steal_cstr(&address
);
1657 /* Centralized NAT rule, either on gateway router or distributed
1659 ds_put_format(&c_addresses
, " %s", nat
->external_ip
);
1660 central_ip_address
= true;
1664 /* A set to hold all load-balancer vips. */
1665 struct sset all_ips
= SSET_INITIALIZER(&all_ips
);
1667 get_router_load_balancer_ips(op
->od
, &all_ips
, &addr_family
);
1669 const char *ip_address
;
1670 SSET_FOR_EACH (ip_address
, &all_ips
) {
1671 ds_put_format(&c_addresses
, " %s", ip_address
);
1672 central_ip_address
= true;
1674 sset_destroy(&all_ips
);
1676 if (central_ip_address
) {
1677 /* Gratuitous ARP for centralized NAT rules on distributed gateway
1678 * ports should be restricted to the "redirect-chassis". */
1679 if (op
->od
->l3redirect_port
) {
1680 ds_put_format(&c_addresses
, " is_chassis_resident(%s)",
1681 op
->od
->l3redirect_port
->json_key
);
1684 addresses
[n_nats
++] = ds_steal_cstr(&c_addresses
);
1693 gateway_chassis_equal(const struct nbrec_gateway_chassis
*nb_gwc
,
1694 const struct sbrec_chassis
*nb_gwc_c
,
1695 const struct sbrec_gateway_chassis
*sb_gwc
)
1697 bool equal
= !strcmp(nb_gwc
->name
, sb_gwc
->name
)
1698 && nb_gwc
->priority
== sb_gwc
->priority
1699 && smap_equal(&nb_gwc
->options
, &sb_gwc
->options
)
1700 && smap_equal(&nb_gwc
->external_ids
, &sb_gwc
->external_ids
);
1706 /* If everything else matched and we were unable to find the SBDB
1707 * Chassis entry at this time, assume a match and return true.
1708 * This happens when an ovn-controller is restarting and the Chassis
1709 * entry is gone away momentarily */
1711 || (sb_gwc
->chassis
&& !strcmp(nb_gwc_c
->name
,
1712 sb_gwc
->chassis
->name
));
1716 sbpb_gw_chassis_needs_update(
1717 const struct sbrec_port_binding
*port_binding
,
1718 const struct nbrec_logical_router_port
*lrp
,
1719 const struct chassis_index
*chassis_index
)
1721 if (!lrp
|| !port_binding
) {
1725 /* These arrays are used to collect valid Gateway_Chassis and valid
1726 * Chassis records from the Logical_Router_Port Gateway_Chassis list,
1727 * we ignore the ones we can't match on the SBDB */
1728 struct nbrec_gateway_chassis
**lrp_gwc
= xzalloc(lrp
->n_gateway_chassis
*
1730 const struct sbrec_chassis
**lrp_gwc_c
= xzalloc(lrp
->n_gateway_chassis
*
1733 /* Count the number of gateway chassis chassis names from the logical
1734 * router port that we are able to match on the southbound database */
1735 int lrp_n_gateway_chassis
= 0;
1737 for (n
= 0; n
< lrp
->n_gateway_chassis
; n
++) {
1739 if (!lrp
->gateway_chassis
[n
]->chassis_name
) {
1743 const struct sbrec_chassis
*chassis
=
1744 chassis_lookup_by_name(chassis_index
,
1745 lrp
->gateway_chassis
[n
]->chassis_name
);
1747 lrp_gwc_c
[lrp_n_gateway_chassis
] = chassis
;
1748 lrp_gwc
[lrp_n_gateway_chassis
] = lrp
->gateway_chassis
[n
];
1749 lrp_n_gateway_chassis
++;
1751 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
1753 &rl
, "Chassis name %s referenced in NBDB via Gateway_Chassis "
1754 "on logical router port %s does not exist in SBDB",
1755 lrp
->gateway_chassis
[n
]->chassis_name
, lrp
->name
);
1759 /* Basic check, different amount of Gateway_Chassis means that we
1760 * need to update southbound database Port_Binding */
1761 if (lrp_n_gateway_chassis
!= port_binding
->n_gateway_chassis
) {
1767 for (n
= 0; n
< lrp_n_gateway_chassis
; n
++) {
1769 /* For each of the valid gw chassis on the lrp, check if there's
1770 * a match on the Port_Binding list, we assume order is not
1772 for (i
= 0; i
< port_binding
->n_gateway_chassis
; i
++) {
1773 if (gateway_chassis_equal(lrp_gwc
[n
],
1775 port_binding
->gateway_chassis
[i
])) {
1776 break; /* we found a match */
1780 /* if no Port_Binding gateway chassis matched for the entry... */
1781 if (i
== port_binding
->n_gateway_chassis
) {
1784 return true; /* found no match for this gateway chassis on lrp */
1788 /* no need for update, all ports matched */
1794 /* This functions translates the gw chassis on the nb database
1795 * to sb database entries, the only difference is that SB database
1796 * Gateway_Chassis table references the chassis directly instead
1797 * of using the name */
1799 copy_gw_chassis_from_nbrp_to_sbpb(
1800 struct northd_context
*ctx
,
1801 const struct nbrec_logical_router_port
*lrp
,
1802 const struct chassis_index
*chassis_index
,
1803 const struct sbrec_port_binding
*port_binding
) {
1805 if (!lrp
|| !port_binding
|| !lrp
->n_gateway_chassis
) {
1809 struct sbrec_gateway_chassis
**gw_chassis
= NULL
;
1813 /* XXX: This can be improved. This code will generate a set of new
1814 * Gateway_Chassis and push them all in a single transaction, instead
1815 * this would be more optimal if we just add/update/remove the rows in
1816 * the southbound db that need to change. We don't expect lots of
1817 * changes to the Gateway_Chassis table, but if that proves to be wrong
1818 * we should optimize this. */
1819 for (n
= 0; n
< lrp
->n_gateway_chassis
; n
++) {
1820 struct nbrec_gateway_chassis
*lrp_gwc
= lrp
->gateway_chassis
[n
];
1821 if (!lrp_gwc
->chassis_name
) {
1825 const struct sbrec_chassis
*chassis
=
1826 chassis_lookup_by_name(chassis_index
, lrp_gwc
->chassis_name
);
1828 gw_chassis
= xrealloc(gw_chassis
, (n_gwc
+ 1) * sizeof *gw_chassis
);
1830 struct sbrec_gateway_chassis
*pb_gwc
=
1831 sbrec_gateway_chassis_insert(ctx
->ovnsb_txn
);
1833 sbrec_gateway_chassis_set_name(pb_gwc
, lrp_gwc
->name
);
1834 sbrec_gateway_chassis_set_priority(pb_gwc
, lrp_gwc
->priority
);
1835 sbrec_gateway_chassis_set_chassis(pb_gwc
, chassis
);
1836 sbrec_gateway_chassis_set_options(pb_gwc
, &lrp_gwc
->options
);
1837 sbrec_gateway_chassis_set_external_ids(pb_gwc
, &lrp_gwc
->external_ids
);
1839 gw_chassis
[n_gwc
++] = pb_gwc
;
1841 sbrec_port_binding_set_gateway_chassis(port_binding
, gw_chassis
, n_gwc
);
1846 ovn_port_update_sbrec(struct northd_context
*ctx
,
1847 const struct ovn_port
*op
,
1848 const struct chassis_index
*chassis_index
,
1849 struct hmap
*chassis_qdisc_queues
)
1851 sbrec_port_binding_set_datapath(op
->sb
, op
->od
->sb
);
1853 /* If the router is for l3 gateway, it resides on a chassis
1854 * and its port type is "l3gateway". */
1855 const char *chassis_name
= smap_get(&op
->od
->nbr
->options
, "chassis");
1857 sbrec_port_binding_set_type(op
->sb
, "chassisredirect");
1858 } else if (chassis_name
) {
1859 sbrec_port_binding_set_type(op
->sb
, "l3gateway");
1861 sbrec_port_binding_set_type(op
->sb
, "patch");
1867 const char *redirect_chassis
= smap_get(&op
->nbrp
->options
,
1868 "redirect-chassis");
1869 if (op
->nbrp
->n_gateway_chassis
&& redirect_chassis
) {
1870 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
1872 &rl
, "logical router port %s has both options:"
1873 "redirect-chassis and gateway_chassis populated "
1874 "redirect-chassis will be ignored in favour of "
1875 "gateway chassis", op
->nbrp
->name
);
1878 if (op
->nbrp
->n_gateway_chassis
) {
1879 if (sbpb_gw_chassis_needs_update(op
->sb
, op
->nbrp
,
1881 copy_gw_chassis_from_nbrp_to_sbpb(ctx
, op
->nbrp
,
1882 chassis_index
, op
->sb
);
1885 } else if (redirect_chassis
) {
1886 /* Handle ports that had redirect-chassis option attached
1887 * to them, and for backwards compatibility convert them
1888 * to a single Gateway_Chassis entry */
1889 const struct sbrec_chassis
*chassis
=
1890 chassis_lookup_by_name(chassis_index
, redirect_chassis
);
1892 /* If we found the chassis, and the gw chassis on record
1893 * differs from what we expect go ahead and update */
1894 if (op
->sb
->n_gateway_chassis
!= 1
1895 || !op
->sb
->gateway_chassis
[0]->chassis
1896 || strcmp(op
->sb
->gateway_chassis
[0]->chassis
->name
,
1898 || op
->sb
->gateway_chassis
[0]->priority
!= 0) {
1899 /* Construct a single Gateway_Chassis entry on the
1900 * Port_Binding attached to the redirect_chassis
1902 struct sbrec_gateway_chassis
*gw_chassis
=
1903 sbrec_gateway_chassis_insert(ctx
->ovnsb_txn
);
1905 char *gwc_name
= xasprintf("%s_%s", op
->nbrp
->name
,
1908 /* XXX: Again, here, we could just update an existing
1909 * Gateway_Chassis, instead of creating a new one
1910 * and replacing it */
1911 sbrec_gateway_chassis_set_name(gw_chassis
, gwc_name
);
1912 sbrec_gateway_chassis_set_priority(gw_chassis
, 0);
1913 sbrec_gateway_chassis_set_chassis(gw_chassis
, chassis
);
1914 sbrec_gateway_chassis_set_external_ids(gw_chassis
,
1915 &op
->nbrp
->external_ids
);
1916 sbrec_port_binding_set_gateway_chassis(op
->sb
,
1921 VLOG_WARN("chassis name '%s' from redirect from logical "
1922 " router port '%s' redirect-chassis not found",
1923 redirect_chassis
, op
->nbrp
->name
);
1924 if (op
->sb
->n_gateway_chassis
) {
1925 sbrec_port_binding_set_gateway_chassis(op
->sb
, NULL
,
1930 smap_add(&new, "distributed-port", op
->nbrp
->name
);
1933 smap_add(&new, "peer", op
->peer
->key
);
1936 smap_add(&new, "l3gateway-chassis", chassis_name
);
1939 sbrec_port_binding_set_options(op
->sb
, &new);
1942 sbrec_port_binding_set_parent_port(op
->sb
, NULL
);
1943 sbrec_port_binding_set_tag(op
->sb
, NULL
, 0);
1944 sbrec_port_binding_set_mac(op
->sb
, NULL
, 0);
1946 struct smap ids
= SMAP_INITIALIZER(&ids
);
1947 sbrec_port_binding_set_external_ids(op
->sb
, &ids
);
1949 if (strcmp(op
->nbsp
->type
, "router")) {
1950 uint32_t queue_id
= smap_get_int(
1951 &op
->sb
->options
, "qdisc_queue_id", 0);
1952 bool has_qos
= port_has_qos_params(&op
->nbsp
->options
);
1953 struct smap options
;
1955 if (op
->sb
->chassis
&& has_qos
&& !queue_id
) {
1956 queue_id
= allocate_chassis_queueid(chassis_qdisc_queues
,
1958 } else if (!has_qos
&& queue_id
) {
1959 free_chassis_queueid(chassis_qdisc_queues
,
1965 smap_clone(&options
, &op
->nbsp
->options
);
1967 smap_add_format(&options
,
1968 "qdisc_queue_id", "%d", queue_id
);
1970 sbrec_port_binding_set_options(op
->sb
, &options
);
1971 smap_destroy(&options
);
1972 if (ovn_is_known_nb_lsp_type(op
->nbsp
->type
)) {
1973 sbrec_port_binding_set_type(op
->sb
, op
->nbsp
->type
);
1975 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
1977 &rl
, "Unknown port type '%s' set on logical switch '%s'.",
1978 op
->nbsp
->type
, op
->nbsp
->name
);
1981 const char *chassis
= NULL
;
1982 if (op
->peer
&& op
->peer
->od
&& op
->peer
->od
->nbr
) {
1983 chassis
= smap_get(&op
->peer
->od
->nbr
->options
, "chassis");
1986 /* A switch port connected to a gateway router is also of
1987 * type "l3gateway". */
1989 sbrec_port_binding_set_type(op
->sb
, "l3gateway");
1991 sbrec_port_binding_set_type(op
->sb
, "patch");
1994 const char *router_port
= smap_get(&op
->nbsp
->options
,
1996 if (router_port
|| chassis
) {
2000 smap_add(&new, "peer", router_port
);
2003 smap_add(&new, "l3gateway-chassis", chassis
);
2005 sbrec_port_binding_set_options(op
->sb
, &new);
2009 const char *nat_addresses
= smap_get(&op
->nbsp
->options
,
2011 if (nat_addresses
&& !strcmp(nat_addresses
, "router")) {
2012 if (op
->peer
&& op
->peer
->od
2013 && (chassis
|| op
->peer
->od
->l3redirect_port
)) {
2015 char **nats
= get_nat_addresses(op
->peer
, &n_nats
);
2017 sbrec_port_binding_set_nat_addresses(op
->sb
,
2018 (const char **) nats
, n_nats
);
2019 for (size_t i
= 0; i
< n_nats
; i
++) {
2024 sbrec_port_binding_set_nat_addresses(op
->sb
, NULL
, 0);
2027 sbrec_port_binding_set_nat_addresses(op
->sb
, NULL
, 0);
2029 /* Only accept manual specification of ethernet address
2030 * followed by IPv4 addresses on type "l3gateway" ports. */
2031 } else if (nat_addresses
&& chassis
) {
2032 struct lport_addresses laddrs
;
2033 if (!extract_lsp_addresses(nat_addresses
, &laddrs
)) {
2034 static struct vlog_rate_limit rl
=
2035 VLOG_RATE_LIMIT_INIT(1, 1);
2036 VLOG_WARN_RL(&rl
, "Error extracting nat-addresses.");
2037 sbrec_port_binding_set_nat_addresses(op
->sb
, NULL
, 0);
2039 sbrec_port_binding_set_nat_addresses(op
->sb
,
2041 destroy_lport_addresses(&laddrs
);
2044 sbrec_port_binding_set_nat_addresses(op
->sb
, NULL
, 0);
2047 sbrec_port_binding_set_parent_port(op
->sb
, op
->nbsp
->parent_name
);
2048 sbrec_port_binding_set_tag(op
->sb
, op
->nbsp
->tag
, op
->nbsp
->n_tag
);
2049 sbrec_port_binding_set_mac(op
->sb
, (const char **) op
->nbsp
->addresses
,
2050 op
->nbsp
->n_addresses
);
2052 struct smap ids
= SMAP_INITIALIZER(&ids
);
2053 smap_clone(&ids
, &op
->nbsp
->external_ids
);
2054 const char *name
= smap_get(&ids
, "neutron:port_name");
2055 if (name
&& name
[0]) {
2056 smap_add(&ids
, "name", name
);
2058 sbrec_port_binding_set_external_ids(op
->sb
, &ids
);
2063 /* Remove mac_binding entries that refer to logical_ports which are
2066 cleanup_mac_bindings(struct northd_context
*ctx
, struct hmap
*ports
)
2068 const struct sbrec_mac_binding
*b
, *n
;
2069 SBREC_MAC_BINDING_FOR_EACH_SAFE (b
, n
, ctx
->ovnsb_idl
) {
2070 if (!ovn_port_find(ports
, b
->logical_port
)) {
2071 sbrec_mac_binding_delete(b
);
2076 /* Updates the southbound Port_Binding table so that it contains the logical
2077 * switch ports specified by the northbound database.
2079 * Initializes 'ports' to contain a "struct ovn_port" for every logical port,
2080 * using the "struct ovn_datapath"s in 'datapaths' to look up logical
2083 build_ports(struct northd_context
*ctx
, struct hmap
*datapaths
,
2084 const struct chassis_index
*chassis_index
, struct hmap
*ports
)
2086 struct ovs_list sb_only
, nb_only
, both
;
2087 struct hmap tag_alloc_table
= HMAP_INITIALIZER(&tag_alloc_table
);
2088 struct hmap chassis_qdisc_queues
= HMAP_INITIALIZER(&chassis_qdisc_queues
);
2090 join_logical_ports(ctx
, datapaths
, ports
, &chassis_qdisc_queues
,
2091 &tag_alloc_table
, &sb_only
, &nb_only
, &both
);
2093 struct ovn_port
*op
, *next
;
2094 /* For logical ports that are in both databases, update the southbound
2095 * record based on northbound data. Also index the in-use tunnel_keys.
2096 * For logical ports that are in NB database, do any tag allocation
2098 LIST_FOR_EACH_SAFE (op
, next
, list
, &both
) {
2100 tag_alloc_create_new_tag(&tag_alloc_table
, op
->nbsp
);
2102 ovn_port_update_sbrec(ctx
, op
, chassis_index
, &chassis_qdisc_queues
);
2104 add_tnlid(&op
->od
->port_tnlids
, op
->sb
->tunnel_key
);
2105 if (op
->sb
->tunnel_key
> op
->od
->port_key_hint
) {
2106 op
->od
->port_key_hint
= op
->sb
->tunnel_key
;
2110 /* Add southbound record for each unmatched northbound record. */
2111 LIST_FOR_EACH_SAFE (op
, next
, list
, &nb_only
) {
2112 uint16_t tunnel_key
= ovn_port_allocate_key(op
->od
);
2117 op
->sb
= sbrec_port_binding_insert(ctx
->ovnsb_txn
);
2118 ovn_port_update_sbrec(ctx
, op
, chassis_index
, &chassis_qdisc_queues
);
2120 sbrec_port_binding_set_logical_port(op
->sb
, op
->key
);
2121 sbrec_port_binding_set_tunnel_key(op
->sb
, tunnel_key
);
2124 bool remove_mac_bindings
= false;
2125 if (!ovs_list_is_empty(&sb_only
)) {
2126 remove_mac_bindings
= true;
2129 /* Delete southbound records without northbound matches. */
2130 LIST_FOR_EACH_SAFE(op
, next
, list
, &sb_only
) {
2131 ovs_list_remove(&op
->list
);
2132 sbrec_port_binding_delete(op
->sb
);
2133 ovn_port_destroy(ports
, op
);
2135 if (remove_mac_bindings
) {
2136 cleanup_mac_bindings(ctx
, ports
);
2139 tag_alloc_destroy(&tag_alloc_table
);
2140 destroy_chassis_queues(&chassis_qdisc_queues
);
2143 #define OVN_MIN_MULTICAST 32768
2144 #define OVN_MAX_MULTICAST 65535
2146 struct multicast_group
{
2148 uint16_t key
; /* OVN_MIN_MULTICAST...OVN_MAX_MULTICAST. */
2151 #define MC_FLOOD "_MC_flood"
2152 static const struct multicast_group mc_flood
= { MC_FLOOD
, 65535 };
2154 #define MC_UNKNOWN "_MC_unknown"
2155 static const struct multicast_group mc_unknown
= { MC_UNKNOWN
, 65534 };
2158 multicast_group_equal(const struct multicast_group
*a
,
2159 const struct multicast_group
*b
)
2161 return !strcmp(a
->name
, b
->name
) && a
->key
== b
->key
;
2164 /* Multicast group entry. */
2165 struct ovn_multicast
{
2166 struct hmap_node hmap_node
; /* Index on 'datapath' and 'key'. */
2167 struct ovn_datapath
*datapath
;
2168 const struct multicast_group
*group
;
2170 struct ovn_port
**ports
;
2171 size_t n_ports
, allocated_ports
;
2175 ovn_multicast_hash(const struct ovn_datapath
*datapath
,
2176 const struct multicast_group
*group
)
2178 return hash_pointer(datapath
, group
->key
);
2181 static struct ovn_multicast
*
2182 ovn_multicast_find(struct hmap
*mcgroups
, struct ovn_datapath
*datapath
,
2183 const struct multicast_group
*group
)
2185 struct ovn_multicast
*mc
;
2187 HMAP_FOR_EACH_WITH_HASH (mc
, hmap_node
,
2188 ovn_multicast_hash(datapath
, group
), mcgroups
) {
2189 if (mc
->datapath
== datapath
2190 && multicast_group_equal(mc
->group
, group
)) {
2198 ovn_multicast_add(struct hmap
*mcgroups
, const struct multicast_group
*group
,
2199 struct ovn_port
*port
)
2201 struct ovn_datapath
*od
= port
->od
;
2202 struct ovn_multicast
*mc
= ovn_multicast_find(mcgroups
, od
, group
);
2204 mc
= xmalloc(sizeof *mc
);
2205 hmap_insert(mcgroups
, &mc
->hmap_node
, ovn_multicast_hash(od
, group
));
2209 mc
->allocated_ports
= 4;
2210 mc
->ports
= xmalloc(mc
->allocated_ports
* sizeof *mc
->ports
);
2212 if (mc
->n_ports
>= mc
->allocated_ports
) {
2213 mc
->ports
= x2nrealloc(mc
->ports
, &mc
->allocated_ports
,
2216 mc
->ports
[mc
->n_ports
++] = port
;
2220 ovn_multicast_destroy(struct hmap
*mcgroups
, struct ovn_multicast
*mc
)
2223 hmap_remove(mcgroups
, &mc
->hmap_node
);
2230 ovn_multicast_update_sbrec(const struct ovn_multicast
*mc
,
2231 const struct sbrec_multicast_group
*sb
)
2233 struct sbrec_port_binding
**ports
= xmalloc(mc
->n_ports
* sizeof *ports
);
2234 for (size_t i
= 0; i
< mc
->n_ports
; i
++) {
2235 ports
[i
] = CONST_CAST(struct sbrec_port_binding
*, mc
->ports
[i
]->sb
);
2237 sbrec_multicast_group_set_ports(sb
, ports
, mc
->n_ports
);
2241 /* Logical flow generation.
2243 * This code generates the Logical_Flow table in the southbound database, as a
2244 * function of most of the northbound database.
2248 struct hmap_node hmap_node
;
2250 struct ovn_datapath
*od
;
2251 enum ovn_stage stage
;
2260 ovn_lflow_hash(const struct ovn_lflow
*lflow
)
2262 size_t hash
= uuid_hash(&lflow
->od
->key
);
2263 hash
= hash_2words((lflow
->stage
<< 16) | lflow
->priority
, hash
);
2264 hash
= hash_string(lflow
->match
, hash
);
2265 return hash_string(lflow
->actions
, hash
);
2269 ovn_lflow_equal(const struct ovn_lflow
*a
, const struct ovn_lflow
*b
)
2271 return (a
->od
== b
->od
2272 && a
->stage
== b
->stage
2273 && a
->priority
== b
->priority
2274 && !strcmp(a
->match
, b
->match
)
2275 && !strcmp(a
->actions
, b
->actions
));
2279 ovn_lflow_init(struct ovn_lflow
*lflow
, struct ovn_datapath
*od
,
2280 enum ovn_stage stage
, uint16_t priority
,
2281 char *match
, char *actions
, char *stage_hint
,
2285 lflow
->stage
= stage
;
2286 lflow
->priority
= priority
;
2287 lflow
->match
= match
;
2288 lflow
->actions
= actions
;
2289 lflow
->stage_hint
= stage_hint
;
2290 lflow
->where
= where
;
2293 /* Adds a row with the specified contents to the Logical_Flow table. */
2295 ovn_lflow_add_at(struct hmap
*lflow_map
, struct ovn_datapath
*od
,
2296 enum ovn_stage stage
, uint16_t priority
,
2297 const char *match
, const char *actions
,
2298 const char *stage_hint
, const char *where
)
2300 ovs_assert(ovn_stage_to_datapath_type(stage
) == ovn_datapath_get_type(od
));
2302 struct ovn_lflow
*lflow
= xmalloc(sizeof *lflow
);
2303 ovn_lflow_init(lflow
, od
, stage
, priority
,
2304 xstrdup(match
), xstrdup(actions
),
2305 nullable_xstrdup(stage_hint
), where
);
2306 hmap_insert(lflow_map
, &lflow
->hmap_node
, ovn_lflow_hash(lflow
));
2309 /* Adds a row with the specified contents to the Logical_Flow table. */
2310 #define ovn_lflow_add_with_hint(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, \
2311 ACTIONS, STAGE_HINT) \
2312 ovn_lflow_add_at(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, ACTIONS, \
2313 STAGE_HINT, OVS_SOURCE_LOCATOR)
2315 #define ovn_lflow_add(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, ACTIONS) \
2316 ovn_lflow_add_with_hint(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, \
2319 static struct ovn_lflow
*
2320 ovn_lflow_find(struct hmap
*lflows
, struct ovn_datapath
*od
,
2321 enum ovn_stage stage
, uint16_t priority
,
2322 const char *match
, const char *actions
)
2324 struct ovn_lflow target
;
2325 ovn_lflow_init(&target
, od
, stage
, priority
,
2326 CONST_CAST(char *, match
), CONST_CAST(char *, actions
),
2329 struct ovn_lflow
*lflow
;
2330 HMAP_FOR_EACH_WITH_HASH (lflow
, hmap_node
, ovn_lflow_hash(&target
),
2332 if (ovn_lflow_equal(lflow
, &target
)) {
2340 ovn_lflow_destroy(struct hmap
*lflows
, struct ovn_lflow
*lflow
)
2343 hmap_remove(lflows
, &lflow
->hmap_node
);
2345 free(lflow
->actions
);
2346 free(lflow
->stage_hint
);
2351 /* Appends port security constraints on L2 address field 'eth_addr_field'
2352 * (e.g. "eth.src" or "eth.dst") to 'match'. 'ps_addrs', with 'n_ps_addrs'
2353 * elements, is the collection of port_security constraints from an
2354 * OVN_NB Logical_Switch_Port row generated by extract_lsp_addresses(). */
2356 build_port_security_l2(const char *eth_addr_field
,
2357 struct lport_addresses
*ps_addrs
,
2358 unsigned int n_ps_addrs
,
2365 ds_put_format(match
, " && %s == {", eth_addr_field
);
2367 for (size_t i
= 0; i
< n_ps_addrs
; i
++) {
2368 ds_put_format(match
, "%s ", ps_addrs
[i
].ea_s
);
2370 ds_chomp(match
, ' ');
2371 ds_put_cstr(match
, "}");
2375 build_port_security_ipv6_nd_flow(
2376 struct ds
*match
, struct eth_addr ea
, struct ipv6_netaddr
*ipv6_addrs
,
2379 ds_put_format(match
, " && ip6 && nd && ((nd.sll == "ETH_ADDR_FMT
" || "
2380 "nd.sll == "ETH_ADDR_FMT
") || ((nd.tll == "ETH_ADDR_FMT
" || "
2381 "nd.tll == "ETH_ADDR_FMT
")", ETH_ADDR_ARGS(eth_addr_zero
),
2382 ETH_ADDR_ARGS(ea
), ETH_ADDR_ARGS(eth_addr_zero
),
2384 if (!n_ipv6_addrs
) {
2385 ds_put_cstr(match
, "))");
2389 char ip6_str
[INET6_ADDRSTRLEN
+ 1];
2390 struct in6_addr lla
;
2391 in6_generate_lla(ea
, &lla
);
2392 memset(ip6_str
, 0, sizeof(ip6_str
));
2393 ipv6_string_mapped(ip6_str
, &lla
);
2394 ds_put_format(match
, " && (nd.target == %s", ip6_str
);
2396 for(int i
= 0; i
< n_ipv6_addrs
; i
++) {
2397 memset(ip6_str
, 0, sizeof(ip6_str
));
2398 ipv6_string_mapped(ip6_str
, &ipv6_addrs
[i
].addr
);
2399 ds_put_format(match
, " || nd.target == %s", ip6_str
);
2402 ds_put_format(match
, ")))");
2406 build_port_security_ipv6_flow(
2407 enum ovn_pipeline pipeline
, struct ds
*match
, struct eth_addr ea
,
2408 struct ipv6_netaddr
*ipv6_addrs
, int n_ipv6_addrs
)
2410 char ip6_str
[INET6_ADDRSTRLEN
+ 1];
2412 ds_put_format(match
, " && %s == {",
2413 pipeline
== P_IN
? "ip6.src" : "ip6.dst");
2415 /* Allow link-local address. */
2416 struct in6_addr lla
;
2417 in6_generate_lla(ea
, &lla
);
2418 ipv6_string_mapped(ip6_str
, &lla
);
2419 ds_put_format(match
, "%s, ", ip6_str
);
2421 /* Allow ip6.dst=ff00::/8 for multicast packets */
2422 if (pipeline
== P_OUT
) {
2423 ds_put_cstr(match
, "ff00::/8, ");
2425 for(int i
= 0; i
< n_ipv6_addrs
; i
++) {
2426 ipv6_string_mapped(ip6_str
, &ipv6_addrs
[i
].addr
);
2427 ds_put_format(match
, "%s, ", ip6_str
);
2429 /* Replace ", " by "}". */
2430 ds_chomp(match
, ' ');
2431 ds_chomp(match
, ',');
2432 ds_put_cstr(match
, "}");
2436 * Build port security constraints on ARP and IPv6 ND fields
2437 * and add logical flows to S_SWITCH_IN_PORT_SEC_ND stage.
2439 * For each port security of the logical port, following
2440 * logical flows are added
2441 * - If the port security has no IP (both IPv4 and IPv6) or
2442 * if it has IPv4 address(es)
2443 * - Priority 90 flow to allow ARP packets for known MAC addresses
2444 * in the eth.src and arp.spa fields. If the port security
2445 * has IPv4 addresses, allow known IPv4 addresses in the arp.tpa field.
2447 * - If the port security has no IP (both IPv4 and IPv6) or
2448 * if it has IPv6 address(es)
2449 * - Priority 90 flow to allow IPv6 ND packets for known MAC addresses
2450 * in the eth.src and nd.sll/nd.tll fields. If the port security
2451 * has IPv6 addresses, allow known IPv6 addresses in the nd.target field
2452 * for IPv6 Neighbor Advertisement packet.
2454 * - Priority 80 flow to drop ARP and IPv6 ND packets.
2457 build_port_security_nd(struct ovn_port
*op
, struct hmap
*lflows
)
2459 struct ds match
= DS_EMPTY_INITIALIZER
;
2461 for (size_t i
= 0; i
< op
->n_ps_addrs
; i
++) {
2462 struct lport_addresses
*ps
= &op
->ps_addrs
[i
];
2464 bool no_ip
= !(ps
->n_ipv4_addrs
|| ps
->n_ipv6_addrs
);
2467 if (ps
->n_ipv4_addrs
|| no_ip
) {
2468 ds_put_format(&match
,
2469 "inport == %s && eth.src == %s && arp.sha == %s",
2470 op
->json_key
, ps
->ea_s
, ps
->ea_s
);
2472 if (ps
->n_ipv4_addrs
) {
2473 ds_put_cstr(&match
, " && arp.spa == {");
2474 for (size_t j
= 0; j
< ps
->n_ipv4_addrs
; j
++) {
2475 /* When the netmask is applied, if the host portion is
2476 * non-zero, the host can only use the specified
2477 * address in the arp.spa. If zero, the host is allowed
2478 * to use any address in the subnet. */
2479 if (ps
->ipv4_addrs
[j
].plen
== 32
2480 || ps
->ipv4_addrs
[j
].addr
& ~ps
->ipv4_addrs
[j
].mask
) {
2481 ds_put_cstr(&match
, ps
->ipv4_addrs
[j
].addr_s
);
2483 ds_put_format(&match
, "%s/%d",
2484 ps
->ipv4_addrs
[j
].network_s
,
2485 ps
->ipv4_addrs
[j
].plen
);
2487 ds_put_cstr(&match
, ", ");
2489 ds_chomp(&match
, ' ');
2490 ds_chomp(&match
, ',');
2491 ds_put_cstr(&match
, "}");
2493 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_PORT_SEC_ND
, 90,
2494 ds_cstr(&match
), "next;");
2497 if (ps
->n_ipv6_addrs
|| no_ip
) {
2499 ds_put_format(&match
, "inport == %s && eth.src == %s",
2500 op
->json_key
, ps
->ea_s
);
2501 build_port_security_ipv6_nd_flow(&match
, ps
->ea
, ps
->ipv6_addrs
,
2503 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_PORT_SEC_ND
, 90,
2504 ds_cstr(&match
), "next;");
2509 ds_put_format(&match
, "inport == %s && (arp || nd)", op
->json_key
);
2510 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_PORT_SEC_ND
, 80,
2511 ds_cstr(&match
), "drop;");
2516 * Build port security constraints on IPv4 and IPv6 src and dst fields
2517 * and add logical flows to S_SWITCH_(IN/OUT)_PORT_SEC_IP stage.
2519 * For each port security of the logical port, following
2520 * logical flows are added
2521 * - If the port security has IPv4 addresses,
2522 * - Priority 90 flow to allow IPv4 packets for known IPv4 addresses
2524 * - If the port security has IPv6 addresses,
2525 * - Priority 90 flow to allow IPv6 packets for known IPv6 addresses
2527 * - If the port security has IPv4 addresses or IPv6 addresses or both
2528 * - Priority 80 flow to drop all IPv4 and IPv6 traffic
2531 build_port_security_ip(enum ovn_pipeline pipeline
, struct ovn_port
*op
,
2532 struct hmap
*lflows
)
2534 char *port_direction
;
2535 enum ovn_stage stage
;
2536 if (pipeline
== P_IN
) {
2537 port_direction
= "inport";
2538 stage
= S_SWITCH_IN_PORT_SEC_IP
;
2540 port_direction
= "outport";
2541 stage
= S_SWITCH_OUT_PORT_SEC_IP
;
2544 for (size_t i
= 0; i
< op
->n_ps_addrs
; i
++) {
2545 struct lport_addresses
*ps
= &op
->ps_addrs
[i
];
2547 if (!(ps
->n_ipv4_addrs
|| ps
->n_ipv6_addrs
)) {
2551 if (ps
->n_ipv4_addrs
) {
2552 struct ds match
= DS_EMPTY_INITIALIZER
;
2553 if (pipeline
== P_IN
) {
2554 /* Permit use of the unspecified address for DHCP discovery */
2555 struct ds dhcp_match
= DS_EMPTY_INITIALIZER
;
2556 ds_put_format(&dhcp_match
, "inport == %s"
2558 " && ip4.src == 0.0.0.0"
2559 " && ip4.dst == 255.255.255.255"
2560 " && udp.src == 68 && udp.dst == 67",
2561 op
->json_key
, ps
->ea_s
);
2562 ovn_lflow_add(lflows
, op
->od
, stage
, 90,
2563 ds_cstr(&dhcp_match
), "next;");
2564 ds_destroy(&dhcp_match
);
2565 ds_put_format(&match
, "inport == %s && eth.src == %s"
2566 " && ip4.src == {", op
->json_key
,
2569 ds_put_format(&match
, "outport == %s && eth.dst == %s"
2570 " && ip4.dst == {255.255.255.255, 224.0.0.0/4, ",
2571 op
->json_key
, ps
->ea_s
);
2574 for (int j
= 0; j
< ps
->n_ipv4_addrs
; j
++) {
2575 ovs_be32 mask
= ps
->ipv4_addrs
[j
].mask
;
2576 /* When the netmask is applied, if the host portion is
2577 * non-zero, the host can only use the specified
2578 * address. If zero, the host is allowed to use any
2579 * address in the subnet.
2581 if (ps
->ipv4_addrs
[j
].plen
== 32
2582 || ps
->ipv4_addrs
[j
].addr
& ~mask
) {
2583 ds_put_format(&match
, "%s", ps
->ipv4_addrs
[j
].addr_s
);
2584 if (pipeline
== P_OUT
&& ps
->ipv4_addrs
[j
].plen
!= 32) {
2585 /* Host is also allowed to receive packets to the
2586 * broadcast address in the specified subnet. */
2587 ds_put_format(&match
, ", %s",
2588 ps
->ipv4_addrs
[j
].bcast_s
);
2591 /* host portion is zero */
2592 ds_put_format(&match
, "%s/%d", ps
->ipv4_addrs
[j
].network_s
,
2593 ps
->ipv4_addrs
[j
].plen
);
2595 ds_put_cstr(&match
, ", ");
2598 /* Replace ", " by "}". */
2599 ds_chomp(&match
, ' ');
2600 ds_chomp(&match
, ',');
2601 ds_put_cstr(&match
, "}");
2602 ovn_lflow_add(lflows
, op
->od
, stage
, 90, ds_cstr(&match
), "next;");
2606 if (ps
->n_ipv6_addrs
) {
2607 struct ds match
= DS_EMPTY_INITIALIZER
;
2608 if (pipeline
== P_IN
) {
2609 /* Permit use of unspecified address for duplicate address
2611 struct ds dad_match
= DS_EMPTY_INITIALIZER
;
2612 ds_put_format(&dad_match
, "inport == %s"
2615 " && ip6.dst == ff02::/16"
2616 " && icmp6.type == {131, 135, 143}", op
->json_key
,
2618 ovn_lflow_add(lflows
, op
->od
, stage
, 90,
2619 ds_cstr(&dad_match
), "next;");
2620 ds_destroy(&dad_match
);
2622 ds_put_format(&match
, "%s == %s && %s == %s",
2623 port_direction
, op
->json_key
,
2624 pipeline
== P_IN
? "eth.src" : "eth.dst", ps
->ea_s
);
2625 build_port_security_ipv6_flow(pipeline
, &match
, ps
->ea
,
2626 ps
->ipv6_addrs
, ps
->n_ipv6_addrs
);
2627 ovn_lflow_add(lflows
, op
->od
, stage
, 90,
2628 ds_cstr(&match
), "next;");
2632 char *match
= xasprintf("%s == %s && %s == %s && ip",
2633 port_direction
, op
->json_key
,
2634 pipeline
== P_IN
? "eth.src" : "eth.dst",
2636 ovn_lflow_add(lflows
, op
->od
, stage
, 80, match
, "drop;");
2643 lsp_is_enabled(const struct nbrec_logical_switch_port
*lsp
)
2645 return !lsp
->enabled
|| *lsp
->enabled
;
2649 lsp_is_up(const struct nbrec_logical_switch_port
*lsp
)
2651 return !lsp
->up
|| *lsp
->up
;
2655 build_dhcpv4_action(struct ovn_port
*op
, ovs_be32 offer_ip
,
2656 struct ds
*options_action
, struct ds
*response_action
,
2657 struct ds
*ipv4_addr_match
)
2659 if (!op
->nbsp
->dhcpv4_options
) {
2660 /* CMS has disabled native DHCPv4 for this lport. */
2664 ovs_be32 host_ip
, mask
;
2665 char *error
= ip_parse_masked(op
->nbsp
->dhcpv4_options
->cidr
, &host_ip
,
2667 if (error
|| ((offer_ip
^ host_ip
) & mask
)) {
2669 * - cidr defined is invalid or
2670 * - the offer ip of the logical port doesn't belong to the cidr
2671 * defined in the DHCPv4 options.
2677 const char *server_ip
= smap_get(
2678 &op
->nbsp
->dhcpv4_options
->options
, "server_id");
2679 const char *server_mac
= smap_get(
2680 &op
->nbsp
->dhcpv4_options
->options
, "server_mac");
2681 const char *lease_time
= smap_get(
2682 &op
->nbsp
->dhcpv4_options
->options
, "lease_time");
2684 if (!(server_ip
&& server_mac
&& lease_time
)) {
2685 /* "server_id", "server_mac" and "lease_time" should be
2686 * present in the dhcp_options. */
2687 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
2688 VLOG_WARN_RL(&rl
, "Required DHCPv4 options not defined for lport - %s",
2693 struct smap dhcpv4_options
= SMAP_INITIALIZER(&dhcpv4_options
);
2694 smap_clone(&dhcpv4_options
, &op
->nbsp
->dhcpv4_options
->options
);
2696 /* server_mac is not DHCPv4 option, delete it from the smap. */
2697 smap_remove(&dhcpv4_options
, "server_mac");
2698 char *netmask
= xasprintf(IP_FMT
, IP_ARGS(mask
));
2699 smap_add(&dhcpv4_options
, "netmask", netmask
);
2702 ds_put_format(options_action
,
2703 REGBIT_DHCP_OPTS_RESULT
" = put_dhcp_opts(offerip = "
2704 IP_FMT
", ", IP_ARGS(offer_ip
));
2706 /* We're not using SMAP_FOR_EACH because we want a consistent order of the
2707 * options on different architectures (big or little endian, SSE4.2) */
2708 const struct smap_node
**sorted_opts
= smap_sort(&dhcpv4_options
);
2709 for (size_t i
= 0; i
< smap_count(&dhcpv4_options
); i
++) {
2710 const struct smap_node
*node
= sorted_opts
[i
];
2711 ds_put_format(options_action
, "%s = %s, ", node
->key
, node
->value
);
2715 ds_chomp(options_action
, ' ');
2716 ds_chomp(options_action
, ',');
2717 ds_put_cstr(options_action
, "); next;");
2719 ds_put_format(response_action
, "eth.dst = eth.src; eth.src = %s; "
2720 "ip4.dst = "IP_FMT
"; ip4.src = %s; udp.src = 67; "
2721 "udp.dst = 68; outport = inport; flags.loopback = 1; "
2723 server_mac
, IP_ARGS(offer_ip
), server_ip
);
2725 ds_put_format(ipv4_addr_match
,
2726 "ip4.src == "IP_FMT
" && ip4.dst == {%s, 255.255.255.255}",
2727 IP_ARGS(offer_ip
), server_ip
);
2728 smap_destroy(&dhcpv4_options
);
2733 build_dhcpv6_action(struct ovn_port
*op
, struct in6_addr
*offer_ip
,
2734 struct ds
*options_action
, struct ds
*response_action
)
2736 if (!op
->nbsp
->dhcpv6_options
) {
2737 /* CMS has disabled native DHCPv6 for this lport. */
2741 struct in6_addr host_ip
, mask
;
2743 char *error
= ipv6_parse_masked(op
->nbsp
->dhcpv6_options
->cidr
, &host_ip
,
2749 struct in6_addr ip6_mask
= ipv6_addr_bitxor(offer_ip
, &host_ip
);
2750 ip6_mask
= ipv6_addr_bitand(&ip6_mask
, &mask
);
2751 if (!ipv6_mask_is_any(&ip6_mask
)) {
2752 /* offer_ip doesn't belongs to the cidr defined in lport's DHCPv6
2757 const struct smap
*options_map
= &op
->nbsp
->dhcpv6_options
->options
;
2758 /* "server_id" should be the MAC address. */
2759 const char *server_mac
= smap_get(options_map
, "server_id");
2761 if (!server_mac
|| !eth_addr_from_string(server_mac
, &ea
)) {
2762 /* "server_id" should be present in the dhcpv6_options. */
2763 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
2764 VLOG_WARN_RL(&rl
, "server_id not present in the DHCPv6 options"
2765 " for lport %s", op
->json_key
);
2769 /* Get the link local IP of the DHCPv6 server from the server MAC. */
2770 struct in6_addr lla
;
2771 in6_generate_lla(ea
, &lla
);
2773 char server_ip
[INET6_ADDRSTRLEN
+ 1];
2774 ipv6_string_mapped(server_ip
, &lla
);
2776 char ia_addr
[INET6_ADDRSTRLEN
+ 1];
2777 ipv6_string_mapped(ia_addr
, offer_ip
);
2779 ds_put_format(options_action
,
2780 REGBIT_DHCP_OPTS_RESULT
" = put_dhcpv6_opts(");
2782 /* Check whether the dhcpv6 options should be configured as stateful.
2783 * Only reply with ia_addr option for dhcpv6 stateful address mode. */
2784 if (!smap_get_bool(options_map
, "dhcpv6_stateless", false)) {
2785 ipv6_string_mapped(ia_addr
, offer_ip
);
2786 ds_put_format(options_action
, "ia_addr = %s, ", ia_addr
);
2789 /* We're not using SMAP_FOR_EACH because we want a consistent order of the
2790 * options on different architectures (big or little endian, SSE4.2) */
2791 const struct smap_node
**sorted_opts
= smap_sort(options_map
);
2792 for (size_t i
= 0; i
< smap_count(options_map
); i
++) {
2793 const struct smap_node
*node
= sorted_opts
[i
];
2794 if (strcmp(node
->key
, "dhcpv6_stateless")) {
2795 ds_put_format(options_action
, "%s = %s, ", node
->key
, node
->value
);
2800 ds_chomp(options_action
, ' ');
2801 ds_chomp(options_action
, ',');
2802 ds_put_cstr(options_action
, "); next;");
2804 ds_put_format(response_action
, "eth.dst = eth.src; eth.src = %s; "
2805 "ip6.dst = ip6.src; ip6.src = %s; udp.src = 547; "
2806 "udp.dst = 546; outport = inport; flags.loopback = 1; "
2808 server_mac
, server_ip
);
2814 has_stateful_acl(struct ovn_datapath
*od
)
2816 for (size_t i
= 0; i
< od
->nbs
->n_acls
; i
++) {
2817 struct nbrec_acl
*acl
= od
->nbs
->acls
[i
];
2818 if (!strcmp(acl
->action
, "allow-related")) {
2827 build_pre_acls(struct ovn_datapath
*od
, struct hmap
*lflows
)
2829 bool has_stateful
= has_stateful_acl(od
);
2831 /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are
2832 * allowed by default. */
2833 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_PRE_ACL
, 0, "1", "next;");
2834 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_PRE_ACL
, 0, "1", "next;");
2836 /* If there are any stateful ACL rules in this datapath, we must
2837 * send all IP packets through the conntrack action, which handles
2838 * defragmentation, in order to match L4 headers. */
2840 for (size_t i
= 0; i
< od
->n_router_ports
; i
++) {
2841 struct ovn_port
*op
= od
->router_ports
[i
];
2842 /* Can't use ct() for router ports. Consider the
2843 * following configuration: lp1(10.0.0.2) on
2844 * hostA--ls1--lr0--ls2--lp2(10.0.1.2) on hostB, For a
2845 * ping from lp1 to lp2, First, the response will go
2846 * through ct() with a zone for lp2 in the ls2 ingress
2847 * pipeline on hostB. That ct zone knows about this
2848 * connection. Next, it goes through ct() with the zone
2849 * for the router port in the egress pipeline of ls2 on
2850 * hostB. This zone does not know about the connection,
2851 * as the icmp request went through the logical router
2852 * on hostA, not hostB. This would only work with
2853 * distributed conntrack state across all chassis. */
2854 struct ds match_in
= DS_EMPTY_INITIALIZER
;
2855 struct ds match_out
= DS_EMPTY_INITIALIZER
;
2857 ds_put_format(&match_in
, "ip && inport == %s", op
->json_key
);
2858 ds_put_format(&match_out
, "ip && outport == %s", op
->json_key
);
2859 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_PRE_ACL
, 110,
2860 ds_cstr(&match_in
), "next;");
2861 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_PRE_ACL
, 110,
2862 ds_cstr(&match_out
), "next;");
2864 ds_destroy(&match_in
);
2865 ds_destroy(&match_out
);
2867 if (od
->localnet_port
) {
2868 struct ds match_in
= DS_EMPTY_INITIALIZER
;
2869 struct ds match_out
= DS_EMPTY_INITIALIZER
;
2871 ds_put_format(&match_in
, "ip && inport == %s",
2872 od
->localnet_port
->json_key
);
2873 ds_put_format(&match_out
, "ip && outport == %s",
2874 od
->localnet_port
->json_key
);
2875 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_PRE_ACL
, 110,
2876 ds_cstr(&match_in
), "next;");
2877 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_PRE_ACL
, 110,
2878 ds_cstr(&match_out
), "next;");
2880 ds_destroy(&match_in
);
2881 ds_destroy(&match_out
);
2884 /* Ingress and Egress Pre-ACL Table (Priority 110).
2886 * Not to do conntrack on ND packets. */
2887 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_PRE_ACL
, 110, "nd", "next;");
2888 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_PRE_ACL
, 110, "(nd_rs || nd_ra)",
2890 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_PRE_ACL
, 110, "nd", "next;");
2891 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_PRE_ACL
, 110,
2892 "(nd_rs || nd_ra)", "next;");
2894 /* Ingress and Egress Pre-ACL Table (Priority 100).
2896 * Regardless of whether the ACL is "from-lport" or "to-lport",
2897 * we need rules in both the ingress and egress table, because
2898 * the return traffic needs to be followed.
2900 * 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send
2901 * it to conntrack for tracking and defragmentation. */
2902 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_PRE_ACL
, 100, "ip",
2903 REGBIT_CONNTRACK_DEFRAG
" = 1; next;");
2904 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_PRE_ACL
, 100, "ip",
2905 REGBIT_CONNTRACK_DEFRAG
" = 1; next;");
2909 /* For a 'key' of the form "IP:port" or just "IP", sets 'port' and
2910 * 'ip_address'. The caller must free() the memory allocated for
2913 ip_address_and_port_from_lb_key(const char *key
, char **ip_address
,
2914 uint16_t *port
, int *addr_family
)
2916 struct sockaddr_storage ss
;
2917 char ip_addr_buf
[INET6_ADDRSTRLEN
];
2920 error
= ipv46_parse(key
, PORT_OPTIONAL
, &ss
);
2922 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
2923 VLOG_WARN_RL(&rl
, "bad ip address or port for load balancer key %s",
2929 if (ss
.ss_family
== AF_INET
) {
2930 struct sockaddr_in
*sin
= ALIGNED_CAST(struct sockaddr_in
*, &ss
);
2931 *port
= sin
->sin_port
== 0 ? 0 : ntohs(sin
->sin_port
);
2932 inet_ntop(AF_INET
, &sin
->sin_addr
, ip_addr_buf
, sizeof ip_addr_buf
);
2934 struct sockaddr_in6
*sin6
= ALIGNED_CAST(struct sockaddr_in6
*, &ss
);
2935 *port
= sin6
->sin6_port
== 0 ? 0 : ntohs(sin6
->sin6_port
);
2936 inet_ntop(AF_INET6
, &sin6
->sin6_addr
, ip_addr_buf
, sizeof ip_addr_buf
);
2939 *ip_address
= xstrdup(ip_addr_buf
);
2940 *addr_family
= ss
.ss_family
;
2944 * Returns true if logical switch is configured with DNS records, false
2948 ls_has_dns_records(const struct nbrec_logical_switch
*nbs
)
2950 for (size_t i
= 0; i
< nbs
->n_dns_records
; i
++) {
2951 if (!smap_is_empty(&nbs
->dns_records
[i
]->records
)) {
2960 build_pre_lb(struct ovn_datapath
*od
, struct hmap
*lflows
)
2962 /* Allow all packets to go to next tables by default. */
2963 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_PRE_LB
, 0, "1", "next;");
2964 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_PRE_LB
, 0, "1", "next;");
2966 struct sset all_ips
= SSET_INITIALIZER(&all_ips
);
2967 bool vip_configured
= false;
2968 int addr_family
= AF_INET
;
2969 for (int i
= 0; i
< od
->nbs
->n_load_balancer
; i
++) {
2970 struct nbrec_load_balancer
*lb
= od
->nbs
->load_balancer
[i
];
2971 struct smap
*vips
= &lb
->vips
;
2972 struct smap_node
*node
;
2974 SMAP_FOR_EACH (node
, vips
) {
2975 vip_configured
= true;
2977 /* node->key contains IP:port or just IP. */
2978 char *ip_address
= NULL
;
2980 ip_address_and_port_from_lb_key(node
->key
, &ip_address
, &port
,
2986 if (!sset_contains(&all_ips
, ip_address
)) {
2987 sset_add(&all_ips
, ip_address
);
2992 /* Ignore L4 port information in the key because fragmented packets
2993 * may not have L4 information. The pre-stateful table will send
2994 * the packet through ct() action to de-fragment. In stateful
2995 * table, we will eventually look at L4 information. */
2999 /* 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send
3000 * packet to conntrack for defragmentation. */
3001 const char *ip_address
;
3002 SSET_FOR_EACH(ip_address
, &all_ips
) {
3005 if (addr_family
== AF_INET
) {
3006 match
= xasprintf("ip && ip4.dst == %s", ip_address
);
3008 match
= xasprintf("ip && ip6.dst == %s", ip_address
);
3010 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_PRE_LB
,
3011 100, match
, REGBIT_CONNTRACK_DEFRAG
" = 1; next;");
3015 sset_destroy(&all_ips
);
3017 if (vip_configured
) {
3018 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_PRE_LB
,
3019 100, "ip", REGBIT_CONNTRACK_DEFRAG
" = 1; next;");
3024 build_pre_stateful(struct ovn_datapath
*od
, struct hmap
*lflows
)
3026 /* Ingress and Egress pre-stateful Table (Priority 0): Packets are
3027 * allowed by default. */
3028 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_PRE_STATEFUL
, 0, "1", "next;");
3029 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_PRE_STATEFUL
, 0, "1", "next;");
3031 /* If REGBIT_CONNTRACK_DEFRAG is set as 1, then the packets should be
3032 * sent to conntrack for tracking and defragmentation. */
3033 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_PRE_STATEFUL
, 100,
3034 REGBIT_CONNTRACK_DEFRAG
" == 1", "ct_next;");
3035 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_PRE_STATEFUL
, 100,
3036 REGBIT_CONNTRACK_DEFRAG
" == 1", "ct_next;");
3040 build_acl_log(struct ds
*actions
, const struct nbrec_acl
*acl
)
3046 ds_put_cstr(actions
, "log(");
3049 ds_put_format(actions
, "name=\"%s\", ", acl
->name
);
3052 /* If a severity level isn't specified, default to "info". */
3053 if (acl
->severity
) {
3054 ds_put_format(actions
, "severity=%s, ", acl
->severity
);
3056 ds_put_format(actions
, "severity=info, ");
3059 if (!strcmp(acl
->action
, "drop")) {
3060 ds_put_cstr(actions
, "verdict=drop, ");
3061 } else if (!strcmp(acl
->action
, "reject")) {
3062 ds_put_cstr(actions
, "verdict=reject, ");
3063 } else if (!strcmp(acl
->action
, "allow")
3064 || !strcmp(acl
->action
, "allow-related")) {
3065 ds_put_cstr(actions
, "verdict=allow, ");
3068 ds_chomp(actions
, ' ');
3069 ds_chomp(actions
, ',');
3070 ds_put_cstr(actions
, "); ");
3074 build_acls(struct ovn_datapath
*od
, struct hmap
*lflows
)
3076 bool has_stateful
= has_stateful_acl(od
);
3078 /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
3079 * default. A related rule at priority 1 is added below if there
3080 * are any stateful ACLs in this datapath. */
3081 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_ACL
, 0, "1", "next;");
3082 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_ACL
, 0, "1", "next;");
3085 /* Ingress and Egress ACL Table (Priority 1).
3087 * By default, traffic is allowed. This is partially handled by
3088 * the Priority 0 ACL flows added earlier, but we also need to
3089 * commit IP flows. This is because, while the initiater's
3090 * direction may not have any stateful rules, the server's may
3091 * and then its return traffic would not have an associated
3092 * conntrack entry and would return "+invalid".
3094 * We use "ct_commit" for a connection that is not already known
3095 * by the connection tracker. Once a connection is committed,
3096 * subsequent packets will hit the flow at priority 0 that just
3099 * We also check for established connections that have ct_label.blocked
3100 * set on them. That's a connection that was disallowed, but is
3101 * now allowed by policy again since it hit this default-allow flow.
3102 * We need to set ct_label.blocked=0 to let the connection continue,
3103 * which will be done by ct_commit() in the "stateful" stage.
3104 * Subsequent packets will hit the flow at priority 0 that just
3106 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_ACL
, 1,
3107 "ip && (!ct.est || (ct.est && ct_label.blocked == 1))",
3108 REGBIT_CONNTRACK_COMMIT
" = 1; next;");
3109 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_ACL
, 1,
3110 "ip && (!ct.est || (ct.est && ct_label.blocked == 1))",
3111 REGBIT_CONNTRACK_COMMIT
" = 1; next;");
3113 /* Ingress and Egress ACL Table (Priority 65535).
3115 * Always drop traffic that's in an invalid state. Also drop
3116 * reply direction packets for connections that have been marked
3117 * for deletion (bit 0 of ct_label is set).
3119 * This is enforced at a higher priority than ACLs can be defined. */
3120 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_ACL
, UINT16_MAX
,
3121 "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
3123 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_ACL
, UINT16_MAX
,
3124 "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)",
3127 /* Ingress and Egress ACL Table (Priority 65535).
3129 * Allow reply traffic that is part of an established
3130 * conntrack entry that has not been marked for deletion
3131 * (bit 0 of ct_label). We only match traffic in the
3132 * reply direction because we want traffic in the request
3133 * direction to hit the currently defined policy from ACLs.
3135 * This is enforced at a higher priority than ACLs can be defined. */
3136 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_ACL
, UINT16_MAX
,
3137 "ct.est && !ct.rel && !ct.new && !ct.inv "
3138 "&& ct.rpl && ct_label.blocked == 0",
3140 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_ACL
, UINT16_MAX
,
3141 "ct.est && !ct.rel && !ct.new && !ct.inv "
3142 "&& ct.rpl && ct_label.blocked == 0",
3145 /* Ingress and Egress ACL Table (Priority 65535).
3147 * Allow traffic that is related to an existing conntrack entry that
3148 * has not been marked for deletion (bit 0 of ct_label).
3150 * This is enforced at a higher priority than ACLs can be defined.
3152 * NOTE: This does not support related data sessions (eg,
3153 * a dynamically negotiated FTP data channel), but will allow
3154 * related traffic such as an ICMP Port Unreachable through
3155 * that's generated from a non-listening UDP port. */
3156 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_ACL
, UINT16_MAX
,
3157 "!ct.est && ct.rel && !ct.new && !ct.inv "
3158 "&& ct_label.blocked == 0",
3160 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_ACL
, UINT16_MAX
,
3161 "!ct.est && ct.rel && !ct.new && !ct.inv "
3162 "&& ct_label.blocked == 0",
3165 /* Ingress and Egress ACL Table (Priority 65535).
3167 * Not to do conntrack on ND packets. */
3168 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_ACL
, UINT16_MAX
, "nd", "next;");
3169 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_ACL
, UINT16_MAX
, "nd", "next;");
3172 /* Ingress or Egress ACL Table (Various priorities). */
3173 for (size_t i
= 0; i
< od
->nbs
->n_acls
; i
++) {
3174 struct nbrec_acl
*acl
= od
->nbs
->acls
[i
];
3175 bool ingress
= !strcmp(acl
->direction
, "from-lport") ? true :false;
3176 enum ovn_stage stage
= ingress
? S_SWITCH_IN_ACL
: S_SWITCH_OUT_ACL
;
3178 char *stage_hint
= xasprintf("%08x", acl
->header_
.uuid
.parts
[0]);
3179 if (!strcmp(acl
->action
, "allow")
3180 || !strcmp(acl
->action
, "allow-related")) {
3181 /* If there are any stateful flows, we must even commit "allow"
3182 * actions. This is because, while the initiater's
3183 * direction may not have any stateful rules, the server's
3184 * may and then its return traffic would not have an
3185 * associated conntrack entry and would return "+invalid". */
3186 if (!has_stateful
) {
3187 struct ds actions
= DS_EMPTY_INITIALIZER
;
3188 build_acl_log(&actions
, acl
);
3189 ds_put_cstr(&actions
, "next;");
3190 ovn_lflow_add_with_hint(lflows
, od
, stage
,
3191 acl
->priority
+ OVN_ACL_PRI_OFFSET
,
3192 acl
->match
, ds_cstr(&actions
),
3194 ds_destroy(&actions
);
3196 struct ds match
= DS_EMPTY_INITIALIZER
;
3197 struct ds actions
= DS_EMPTY_INITIALIZER
;
3199 /* Commit the connection tracking entry if it's a new
3200 * connection that matches this ACL. After this commit,
3201 * the reply traffic is allowed by a flow we create at
3202 * priority 65535, defined earlier.
3204 * It's also possible that a known connection was marked for
3205 * deletion after a policy was deleted, but the policy was
3206 * re-added while that connection is still known. We catch
3207 * that case here and un-set ct_label.blocked (which will be done
3208 * by ct_commit in the "stateful" stage) to indicate that the
3209 * connection should be allowed to resume.
3211 ds_put_format(&match
, "((ct.new && !ct.est)"
3212 " || (!ct.new && ct.est && !ct.rpl "
3213 "&& ct_label.blocked == 1)) "
3214 "&& (%s)", acl
->match
);
3215 ds_put_cstr(&actions
, REGBIT_CONNTRACK_COMMIT
" = 1; ");
3216 build_acl_log(&actions
, acl
);
3217 ds_put_cstr(&actions
, "next;");
3218 ovn_lflow_add_with_hint(lflows
, od
, stage
,
3219 acl
->priority
+ OVN_ACL_PRI_OFFSET
,
3224 /* Match on traffic in the request direction for an established
3225 * connection tracking entry that has not been marked for
3226 * deletion. There is no need to commit here, so we can just
3227 * proceed to the next table. We use this to ensure that this
3228 * connection is still allowed by the currently defined
3232 ds_put_format(&match
,
3233 "!ct.new && ct.est && !ct.rpl"
3234 " && ct_label.blocked == 0 && (%s)",
3237 build_acl_log(&actions
, acl
);
3238 ds_put_cstr(&actions
, "next;");
3239 ovn_lflow_add_with_hint(lflows
, od
, stage
,
3240 acl
->priority
+ OVN_ACL_PRI_OFFSET
,
3241 ds_cstr(&match
), ds_cstr(&actions
),
3245 ds_destroy(&actions
);
3247 } else if (!strcmp(acl
->action
, "drop")
3248 || !strcmp(acl
->action
, "reject")) {
3249 struct ds match
= DS_EMPTY_INITIALIZER
;
3250 struct ds actions
= DS_EMPTY_INITIALIZER
;
3252 /* XXX Need to support "reject", treat it as "drop;" for now. */
3253 if (!strcmp(acl
->action
, "reject")) {
3254 VLOG_INFO("reject is not a supported action");
3257 /* The implementation of "drop" differs if stateful ACLs are in
3258 * use for this datapath. In that case, the actions differ
3259 * depending on whether the connection was previously committed
3260 * to the connection tracker with ct_commit. */
3262 /* If the packet is not part of an established connection, then
3263 * we can simply drop it. */
3264 ds_put_format(&match
,
3265 "(!ct.est || (ct.est && ct_label.blocked == 1)) "
3269 build_acl_log(&actions
, acl
);
3270 ds_put_cstr(&actions
, "/* drop */");
3271 ovn_lflow_add_with_hint(lflows
, od
, stage
,
3272 acl
->priority
+ OVN_ACL_PRI_OFFSET
,
3273 ds_cstr(&match
), ds_cstr(&actions
),
3276 /* For an existing connection without ct_label set, we've
3277 * encountered a policy change. ACLs previously allowed
3278 * this connection and we committed the connection tracking
3279 * entry. Current policy says that we should drop this
3280 * connection. First, we set bit 0 of ct_label to indicate
3281 * that this connection is set for deletion. By not
3282 * specifying "next;", we implicitly drop the packet after
3283 * updating conntrack state. We would normally defer
3284 * ct_commit() to the "stateful" stage, but since we're
3285 * dropping the packet, we go ahead and do it here. */
3288 ds_put_format(&match
,
3289 "ct.est && ct_label.blocked == 0 && (%s)",
3291 ds_put_cstr(&actions
, "ct_commit(ct_label=1/1); ");
3292 build_acl_log(&actions
, acl
);
3293 ds_put_cstr(&actions
, "/* drop */");
3294 ovn_lflow_add_with_hint(lflows
, od
, stage
,
3295 acl
->priority
+ OVN_ACL_PRI_OFFSET
,
3296 ds_cstr(&match
), ds_cstr(&actions
),
3300 /* There are no stateful ACLs in use on this datapath,
3301 * so a "drop" ACL is simply the "drop" logical flow action
3304 build_acl_log(&actions
, acl
);
3305 ds_put_cstr(&actions
, "/* drop */");
3306 ovn_lflow_add_with_hint(lflows
, od
, stage
,
3307 acl
->priority
+ OVN_ACL_PRI_OFFSET
,
3308 acl
->match
, ds_cstr(&actions
),
3312 ds_destroy(&actions
);
3317 /* Add 34000 priority flow to allow DHCP reply from ovn-controller to all
3318 * logical ports of the datapath if the CMS has configured DHCPv4 options.
3320 for (size_t i
= 0; i
< od
->nbs
->n_ports
; i
++) {
3321 if (od
->nbs
->ports
[i
]->dhcpv4_options
) {
3322 const char *server_id
= smap_get(
3323 &od
->nbs
->ports
[i
]->dhcpv4_options
->options
, "server_id");
3324 const char *server_mac
= smap_get(
3325 &od
->nbs
->ports
[i
]->dhcpv4_options
->options
, "server_mac");
3326 const char *lease_time
= smap_get(
3327 &od
->nbs
->ports
[i
]->dhcpv4_options
->options
, "lease_time");
3328 if (server_id
&& server_mac
&& lease_time
) {
3329 struct ds match
= DS_EMPTY_INITIALIZER
;
3330 const char *actions
=
3331 has_stateful
? "ct_commit; next;" : "next;";
3332 ds_put_format(&match
, "outport == \"%s\" && eth.src == %s "
3333 "&& ip4.src == %s && udp && udp.src == 67 "
3334 "&& udp.dst == 68", od
->nbs
->ports
[i
]->name
,
3335 server_mac
, server_id
);
3337 lflows
, od
, S_SWITCH_OUT_ACL
, 34000, ds_cstr(&match
),
3343 if (od
->nbs
->ports
[i
]->dhcpv6_options
) {
3344 const char *server_mac
= smap_get(
3345 &od
->nbs
->ports
[i
]->dhcpv6_options
->options
, "server_id");
3347 if (server_mac
&& eth_addr_from_string(server_mac
, &ea
)) {
3348 /* Get the link local IP of the DHCPv6 server from the
3350 struct in6_addr lla
;
3351 in6_generate_lla(ea
, &lla
);
3353 char server_ip
[INET6_ADDRSTRLEN
+ 1];
3354 ipv6_string_mapped(server_ip
, &lla
);
3356 struct ds match
= DS_EMPTY_INITIALIZER
;
3357 const char *actions
= has_stateful
? "ct_commit; next;" :
3359 ds_put_format(&match
, "outport == \"%s\" && eth.src == %s "
3360 "&& ip6.src == %s && udp && udp.src == 547 "
3361 "&& udp.dst == 546", od
->nbs
->ports
[i
]->name
,
3362 server_mac
, server_ip
);
3364 lflows
, od
, S_SWITCH_OUT_ACL
, 34000, ds_cstr(&match
),
3371 /* Add a 34000 priority flow to advance the DNS reply from ovn-controller,
3372 * if the CMS has configured DNS records for the datapath.
3374 if (ls_has_dns_records(od
->nbs
)) {
3375 const char *actions
= has_stateful
? "ct_commit; next;" : "next;";
3377 lflows
, od
, S_SWITCH_OUT_ACL
, 34000, "udp.src == 53",
3383 build_qos(struct ovn_datapath
*od
, struct hmap
*lflows
) {
3384 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_QOS_MARK
, 0, "1", "next;");
3385 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_QOS_MARK
, 0, "1", "next;");
3387 for (size_t i
= 0; i
< od
->nbs
->n_qos_rules
; i
++) {
3388 struct nbrec_qos
*qos
= od
->nbs
->qos_rules
[i
];
3389 bool ingress
= !strcmp(qos
->direction
, "from-lport") ? true :false;
3390 enum ovn_stage stage
= ingress
? S_SWITCH_IN_QOS_MARK
: S_SWITCH_OUT_QOS_MARK
;
3392 if (!strcmp(qos
->key_action
, "dscp")) {
3393 struct ds dscp_action
= DS_EMPTY_INITIALIZER
;
3395 ds_put_format(&dscp_action
, "ip.dscp = %d; next;",
3396 (uint8_t)qos
->value_action
);
3397 ovn_lflow_add(lflows
, od
, stage
,
3399 qos
->match
, ds_cstr(&dscp_action
));
3400 ds_destroy(&dscp_action
);
3406 build_lb(struct ovn_datapath
*od
, struct hmap
*lflows
)
3408 /* Ingress and Egress LB Table (Priority 0): Packets are allowed by
3410 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_LB
, 0, "1", "next;");
3411 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_LB
, 0, "1", "next;");
3413 if (od
->nbs
->load_balancer
) {
3414 /* Ingress and Egress LB Table (Priority 65535).
3416 * Send established traffic through conntrack for just NAT. */
3417 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_LB
, UINT16_MAX
,
3418 "ct.est && !ct.rel && !ct.new && !ct.inv",
3419 REGBIT_CONNTRACK_NAT
" = 1; next;");
3420 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_LB
, UINT16_MAX
,
3421 "ct.est && !ct.rel && !ct.new && !ct.inv",
3422 REGBIT_CONNTRACK_NAT
" = 1; next;");
3427 build_stateful(struct ovn_datapath
*od
, struct hmap
*lflows
)
3429 /* Ingress and Egress stateful Table (Priority 0): Packets are
3430 * allowed by default. */
3431 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_STATEFUL
, 0, "1", "next;");
3432 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_STATEFUL
, 0, "1", "next;");
3434 /* If REGBIT_CONNTRACK_COMMIT is set as 1, then the packets should be
3435 * committed to conntrack. We always set ct_label.blocked to 0 here as
3436 * any packet that makes it this far is part of a connection we
3437 * want to allow to continue. */
3438 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_STATEFUL
, 100,
3439 REGBIT_CONNTRACK_COMMIT
" == 1", "ct_commit(ct_label=0/1); next;");
3440 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_STATEFUL
, 100,
3441 REGBIT_CONNTRACK_COMMIT
" == 1", "ct_commit(ct_label=0/1); next;");
3443 /* If REGBIT_CONNTRACK_NAT is set as 1, then packets should just be sent
3444 * through nat (without committing).
3446 * REGBIT_CONNTRACK_COMMIT is set for new connections and
3447 * REGBIT_CONNTRACK_NAT is set for established connections. So they
3450 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_STATEFUL
, 100,
3451 REGBIT_CONNTRACK_NAT
" == 1", "ct_lb;");
3452 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_STATEFUL
, 100,
3453 REGBIT_CONNTRACK_NAT
" == 1", "ct_lb;");
3455 /* Load balancing rules for new connections get committed to conntrack
3456 * table. So even if REGBIT_CONNTRACK_COMMIT is set in a previous table
3457 * a higher priority rule for load balancing below also commits the
3458 * connection, so it is okay if we do not hit the above match on
3459 * REGBIT_CONNTRACK_COMMIT. */
3460 for (int i
= 0; i
< od
->nbs
->n_load_balancer
; i
++) {
3461 struct nbrec_load_balancer
*lb
= od
->nbs
->load_balancer
[i
];
3462 struct smap
*vips
= &lb
->vips
;
3463 struct smap_node
*node
;
3465 SMAP_FOR_EACH (node
, vips
) {
3469 /* node->key contains IP:port or just IP. */
3470 char *ip_address
= NULL
;
3471 ip_address_and_port_from_lb_key(node
->key
, &ip_address
, &port
,
3477 /* New connections in Ingress table. */
3478 char *action
= xasprintf("ct_lb(%s);", node
->value
);
3479 struct ds match
= DS_EMPTY_INITIALIZER
;
3480 if (addr_family
== AF_INET
) {
3481 ds_put_format(&match
, "ct.new && ip4.dst == %s", ip_address
);
3483 ds_put_format(&match
, "ct.new && ip6.dst == %s", ip_address
);
3486 if (lb
->protocol
&& !strcmp(lb
->protocol
, "udp")) {
3487 ds_put_format(&match
, " && udp.dst == %d", port
);
3489 ds_put_format(&match
, " && tcp.dst == %d", port
);
3491 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_STATEFUL
,
3492 120, ds_cstr(&match
), action
);
3494 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_STATEFUL
,
3495 110, ds_cstr(&match
), action
);
3506 build_lswitch_flows(struct hmap
*datapaths
, struct hmap
*ports
,
3507 struct hmap
*lflows
, struct hmap
*mcgroups
)
3509 /* This flow table structure is documented in ovn-northd(8), so please
3510 * update ovn-northd.8.xml if you change anything. */
3512 struct ds match
= DS_EMPTY_INITIALIZER
;
3513 struct ds actions
= DS_EMPTY_INITIALIZER
;
3515 /* Build pre-ACL and ACL tables for both ingress and egress.
3516 * Ingress tables 3 through 9. Egress tables 0 through 6. */
3517 struct ovn_datapath
*od
;
3518 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
3523 build_pre_acls(od
, lflows
);
3524 build_pre_lb(od
, lflows
);
3525 build_pre_stateful(od
, lflows
);
3526 build_acls(od
, lflows
);
3527 build_qos(od
, lflows
);
3528 build_lb(od
, lflows
);
3529 build_stateful(od
, lflows
);
3532 /* Logical switch ingress table 0: Admission control framework (priority
3534 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
3539 /* Logical VLANs not supported. */
3540 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_PORT_SEC_L2
, 100, "vlan.present",
3543 /* Broadcast/multicast source address is invalid. */
3544 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_PORT_SEC_L2
, 100, "eth.src[40]",
3547 /* Port security flows have priority 50 (see below) and will continue
3548 * to the next table if packet source is acceptable. */
3551 /* Logical switch ingress table 0: Ingress port security - L2
3553 * Ingress table 1: Ingress port security - IP (priority 90 and 80)
3554 * Ingress table 2: Ingress port security - ND (priority 90 and 80)
3556 struct ovn_port
*op
;
3557 HMAP_FOR_EACH (op
, key_node
, ports
) {
3562 if (!lsp_is_enabled(op
->nbsp
)) {
3563 /* Drop packets from disabled logical ports (since logical flow
3564 * tables are default-drop). */
3570 ds_put_format(&match
, "inport == %s", op
->json_key
);
3571 build_port_security_l2("eth.src", op
->ps_addrs
, op
->n_ps_addrs
,
3574 const char *queue_id
= smap_get(&op
->sb
->options
, "qdisc_queue_id");
3576 ds_put_format(&actions
, "set_queue(%s); ", queue_id
);
3578 ds_put_cstr(&actions
, "next;");
3579 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_PORT_SEC_L2
, 50,
3580 ds_cstr(&match
), ds_cstr(&actions
));
3582 if (op
->nbsp
->n_port_security
) {
3583 build_port_security_ip(P_IN
, op
, lflows
);
3584 build_port_security_nd(op
, lflows
);
3588 /* Ingress table 1 and 2: Port security - IP and ND, by default goto next.
3590 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
3595 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_PORT_SEC_ND
, 0, "1", "next;");
3596 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_PORT_SEC_IP
, 0, "1", "next;");
3599 /* Ingress table 10: ARP/ND responder, skip requests coming from localnet
3600 * and vtep ports. (priority 100); see ovn-northd.8.xml for the
3602 HMAP_FOR_EACH (op
, key_node
, ports
) {
3607 if ((!strcmp(op
->nbsp
->type
, "localnet")) ||
3608 (!strcmp(op
->nbsp
->type
, "vtep"))) {
3610 ds_put_format(&match
, "inport == %s", op
->json_key
);
3611 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_ARP_ND_RSP
, 100,
3612 ds_cstr(&match
), "next;");
3616 /* Ingress table 10: ARP/ND responder, reply for known IPs.
3618 HMAP_FOR_EACH (op
, key_node
, ports
) {
3624 * Add ARP/ND reply flows if either the
3626 * - port type is router or
3627 * - port type is localport
3629 if (!lsp_is_up(op
->nbsp
) && strcmp(op
->nbsp
->type
, "router") &&
3630 strcmp(op
->nbsp
->type
, "localport")) {
3634 for (size_t i
= 0; i
< op
->n_lsp_addrs
; i
++) {
3635 for (size_t j
= 0; j
< op
->lsp_addrs
[i
].n_ipv4_addrs
; j
++) {
3637 ds_put_format(&match
, "arp.tpa == %s && arp.op == 1",
3638 op
->lsp_addrs
[i
].ipv4_addrs
[j
].addr_s
);
3640 ds_put_format(&actions
,
3641 "eth.dst = eth.src; "
3643 "arp.op = 2; /* ARP reply */ "
3644 "arp.tha = arp.sha; "
3646 "arp.tpa = arp.spa; "
3648 "outport = inport; "
3649 "flags.loopback = 1; "
3651 op
->lsp_addrs
[i
].ea_s
, op
->lsp_addrs
[i
].ea_s
,
3652 op
->lsp_addrs
[i
].ipv4_addrs
[j
].addr_s
);
3653 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_ARP_ND_RSP
, 50,
3654 ds_cstr(&match
), ds_cstr(&actions
));
3656 /* Do not reply to an ARP request from the port that owns the
3657 * address (otherwise a DHCP client that ARPs to check for a
3658 * duplicate address will fail). Instead, forward it the usual
3661 * (Another alternative would be to simply drop the packet. If
3662 * everything is working as it is configured, then this would
3663 * produce equivalent results, since no one should reply to the
3664 * request. But ARPing for one's own IP address is intended to
3665 * detect situations where the network is not working as
3666 * configured, so dropping the request would frustrate that
3668 ds_put_format(&match
, " && inport == %s", op
->json_key
);
3669 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_ARP_ND_RSP
, 100,
3670 ds_cstr(&match
), "next;");
3673 /* For ND solicitations, we need to listen for both the
3674 * unicast IPv6 address and its all-nodes multicast address,
3675 * but always respond with the unicast IPv6 address. */
3676 for (size_t j
= 0; j
< op
->lsp_addrs
[i
].n_ipv6_addrs
; j
++) {
3678 ds_put_format(&match
,
3679 "nd_ns && ip6.dst == {%s, %s} && nd.target == %s",
3680 op
->lsp_addrs
[i
].ipv6_addrs
[j
].addr_s
,
3681 op
->lsp_addrs
[i
].ipv6_addrs
[j
].sn_addr_s
,
3682 op
->lsp_addrs
[i
].ipv6_addrs
[j
].addr_s
);
3685 ds_put_format(&actions
,
3691 "outport = inport; "
3692 "flags.loopback = 1; "
3695 op
->lsp_addrs
[i
].ea_s
,
3696 op
->lsp_addrs
[i
].ipv6_addrs
[j
].addr_s
,
3697 op
->lsp_addrs
[i
].ipv6_addrs
[j
].addr_s
,
3698 op
->lsp_addrs
[i
].ea_s
);
3699 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_ARP_ND_RSP
, 50,
3700 ds_cstr(&match
), ds_cstr(&actions
));
3702 /* Do not reply to a solicitation from the port that owns the
3703 * address (otherwise DAD detection will fail). */
3704 ds_put_format(&match
, " && inport == %s", op
->json_key
);
3705 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_ARP_ND_RSP
, 100,
3706 ds_cstr(&match
), "next;");
3711 /* Ingress table 10: ARP/ND responder, by default goto next.
3713 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
3718 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_ARP_ND_RSP
, 0, "1", "next;");
3721 /* Logical switch ingress table 11 and 12: DHCP options and response
3722 * priority 100 flows. */
3723 HMAP_FOR_EACH (op
, key_node
, ports
) {
3728 if (!lsp_is_enabled(op
->nbsp
) || !strcmp(op
->nbsp
->type
, "router")) {
3729 /* Don't add the DHCP flows if the port is not enabled or if the
3730 * port is a router port. */
3734 if (!op
->nbsp
->dhcpv4_options
&& !op
->nbsp
->dhcpv6_options
) {
3735 /* CMS has disabled both native DHCPv4 and DHCPv6 for this lport.
3740 for (size_t i
= 0; i
< op
->n_lsp_addrs
; i
++) {
3741 for (size_t j
= 0; j
< op
->lsp_addrs
[i
].n_ipv4_addrs
; j
++) {
3742 struct ds options_action
= DS_EMPTY_INITIALIZER
;
3743 struct ds response_action
= DS_EMPTY_INITIALIZER
;
3744 struct ds ipv4_addr_match
= DS_EMPTY_INITIALIZER
;
3745 if (build_dhcpv4_action(
3746 op
, op
->lsp_addrs
[i
].ipv4_addrs
[j
].addr
,
3747 &options_action
, &response_action
, &ipv4_addr_match
)) {
3750 &match
, "inport == %s && eth.src == %s && "
3751 "ip4.src == 0.0.0.0 && ip4.dst == 255.255.255.255 && "
3752 "udp.src == 68 && udp.dst == 67", op
->json_key
,
3753 op
->lsp_addrs
[i
].ea_s
);
3755 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_DHCP_OPTIONS
,
3756 100, ds_cstr(&match
),
3757 ds_cstr(&options_action
));
3759 /* Allow ip4.src = OFFER_IP and
3760 * ip4.dst = {SERVER_IP, 255.255.255.255} for the below
3762 * - When the client wants to renew the IP by sending
3763 * the DHCPREQUEST to the server ip.
3764 * - When the client wants to renew the IP by
3765 * broadcasting the DHCPREQUEST.
3768 &match
, "inport == %s && eth.src == %s && "
3769 "%s && udp.src == 68 && udp.dst == 67", op
->json_key
,
3770 op
->lsp_addrs
[i
].ea_s
, ds_cstr(&ipv4_addr_match
));
3772 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_DHCP_OPTIONS
,
3773 100, ds_cstr(&match
),
3774 ds_cstr(&options_action
));
3777 /* If REGBIT_DHCP_OPTS_RESULT is set, it means the
3778 * put_dhcp_opts action is successful. */
3780 &match
, "inport == %s && eth.src == %s && "
3781 "ip4 && udp.src == 68 && udp.dst == 67"
3782 " && "REGBIT_DHCP_OPTS_RESULT
, op
->json_key
,
3783 op
->lsp_addrs
[i
].ea_s
);
3784 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_DHCP_RESPONSE
,
3785 100, ds_cstr(&match
),
3786 ds_cstr(&response_action
));
3787 ds_destroy(&options_action
);
3788 ds_destroy(&response_action
);
3789 ds_destroy(&ipv4_addr_match
);
3794 for (size_t j
= 0; j
< op
->lsp_addrs
[i
].n_ipv6_addrs
; j
++) {
3795 struct ds options_action
= DS_EMPTY_INITIALIZER
;
3796 struct ds response_action
= DS_EMPTY_INITIALIZER
;
3797 if (build_dhcpv6_action(
3798 op
, &op
->lsp_addrs
[i
].ipv6_addrs
[j
].addr
,
3799 &options_action
, &response_action
)) {
3802 &match
, "inport == %s && eth.src == %s"
3803 " && ip6.dst == ff02::1:2 && udp.src == 546 &&"
3804 " udp.dst == 547", op
->json_key
,
3805 op
->lsp_addrs
[i
].ea_s
);
3807 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_DHCP_OPTIONS
, 100,
3808 ds_cstr(&match
), ds_cstr(&options_action
));
3810 /* If REGBIT_DHCP_OPTS_RESULT is set to 1, it means the
3811 * put_dhcpv6_opts action is successful */
3812 ds_put_cstr(&match
, " && "REGBIT_DHCP_OPTS_RESULT
);
3813 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_DHCP_RESPONSE
, 100,
3814 ds_cstr(&match
), ds_cstr(&response_action
));
3815 ds_destroy(&options_action
);
3816 ds_destroy(&response_action
);
3823 /* Logical switch ingress table 13 and 14: DNS lookup and response
3824 * priority 100 flows.
3826 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
3827 if (!od
->nbs
|| !ls_has_dns_records(od
->nbs
)) {
3831 struct ds action
= DS_EMPTY_INITIALIZER
;
3834 ds_put_cstr(&match
, "udp.dst == 53");
3835 ds_put_format(&action
,
3836 REGBIT_DNS_LOOKUP_RESULT
" = dns_lookup(); next;");
3837 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_DNS_LOOKUP
, 100,
3838 ds_cstr(&match
), ds_cstr(&action
));
3840 ds_put_cstr(&match
, " && "REGBIT_DNS_LOOKUP_RESULT
);
3841 ds_put_format(&action
, "eth.dst <-> eth.src; ip4.src <-> ip4.dst; "
3842 "udp.dst = udp.src; udp.src = 53; outport = inport; "
3843 "flags.loopback = 1; output;");
3844 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_DNS_RESPONSE
, 100,
3845 ds_cstr(&match
), ds_cstr(&action
));
3847 ds_put_format(&action
, "eth.dst <-> eth.src; ip6.src <-> ip6.dst; "
3848 "udp.dst = udp.src; udp.src = 53; outport = inport; "
3849 "flags.loopback = 1; output;");
3850 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_DNS_RESPONSE
, 100,
3851 ds_cstr(&match
), ds_cstr(&action
));
3852 ds_destroy(&action
);
3855 /* Ingress table 11 and 12: DHCP options and response, by default goto next.
3857 * Ingress table 13 and 14: DNS lookup and response, by default goto next.
3860 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
3865 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_DHCP_OPTIONS
, 0, "1", "next;");
3866 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_DHCP_RESPONSE
, 0, "1", "next;");
3867 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_DNS_LOOKUP
, 0, "1", "next;");
3868 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_DNS_RESPONSE
, 0, "1", "next;");
3871 /* Ingress table 15: Destination lookup, broadcast and multicast handling
3872 * (priority 100). */
3873 HMAP_FOR_EACH (op
, key_node
, ports
) {
3878 if (lsp_is_enabled(op
->nbsp
)) {
3879 ovn_multicast_add(mcgroups
, &mc_flood
, op
);
3882 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
3887 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_L2_LKUP
, 100, "eth.mcast",
3888 "outport = \""MC_FLOOD
"\"; output;");
3891 /* Ingress table 13: Destination lookup, unicast handling (priority 50), */
3892 HMAP_FOR_EACH (op
, key_node
, ports
) {
3897 for (size_t i
= 0; i
< op
->nbsp
->n_addresses
; i
++) {
3898 /* Addresses are owned by the logical port.
3899 * Ethernet address followed by zero or more IPv4
3900 * or IPv6 addresses (or both). */
3901 struct eth_addr mac
;
3902 if (ovs_scan(op
->nbsp
->addresses
[i
],
3903 ETH_ADDR_SCAN_FMT
, ETH_ADDR_SCAN_ARGS(mac
))) {
3905 ds_put_format(&match
, "eth.dst == "ETH_ADDR_FMT
,
3906 ETH_ADDR_ARGS(mac
));
3909 ds_put_format(&actions
, "outport = %s; output;", op
->json_key
);
3910 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_L2_LKUP
, 50,
3911 ds_cstr(&match
), ds_cstr(&actions
));
3912 } else if (!strcmp(op
->nbsp
->addresses
[i
], "unknown")) {
3913 if (lsp_is_enabled(op
->nbsp
)) {
3914 ovn_multicast_add(mcgroups
, &mc_unknown
, op
);
3915 op
->od
->has_unknown
= true;
3917 } else if (is_dynamic_lsp_address(op
->nbsp
->addresses
[i
])) {
3918 if (!op
->nbsp
->dynamic_addresses
3919 || !ovs_scan(op
->nbsp
->dynamic_addresses
,
3920 ETH_ADDR_SCAN_FMT
, ETH_ADDR_SCAN_ARGS(mac
))) {
3924 ds_put_format(&match
, "eth.dst == "ETH_ADDR_FMT
,
3925 ETH_ADDR_ARGS(mac
));
3928 ds_put_format(&actions
, "outport = %s; output;", op
->json_key
);
3929 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_L2_LKUP
, 50,
3930 ds_cstr(&match
), ds_cstr(&actions
));
3931 } else if (!strcmp(op
->nbsp
->addresses
[i
], "router")) {
3932 if (!op
->peer
|| !op
->peer
->nbrp
3933 || !ovs_scan(op
->peer
->nbrp
->mac
,
3934 ETH_ADDR_SCAN_FMT
, ETH_ADDR_SCAN_ARGS(mac
))) {
3938 ds_put_format(&match
, "eth.dst == "ETH_ADDR_FMT
,
3939 ETH_ADDR_ARGS(mac
));
3940 if (op
->peer
->od
->l3dgw_port
3941 && op
->peer
== op
->peer
->od
->l3dgw_port
3942 && op
->peer
->od
->l3redirect_port
) {
3943 /* The destination lookup flow for the router's
3944 * distributed gateway port MAC address should only be
3945 * programmed on the "redirect-chassis". */
3946 ds_put_format(&match
, " && is_chassis_resident(%s)",
3947 op
->peer
->od
->l3redirect_port
->json_key
);
3951 ds_put_format(&actions
, "outport = %s; output;", op
->json_key
);
3952 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_L2_LKUP
, 50,
3953 ds_cstr(&match
), ds_cstr(&actions
));
3955 /* Add ethernet addresses specified in NAT rules on
3956 * distributed logical routers. */
3957 if (op
->peer
->od
->l3dgw_port
3958 && op
->peer
== op
->peer
->od
->l3dgw_port
) {
3959 for (int j
= 0; j
< op
->peer
->od
->nbr
->n_nat
; j
++) {
3960 const struct nbrec_nat
*nat
3961 = op
->peer
->od
->nbr
->nat
[j
];
3962 if (!strcmp(nat
->type
, "dnat_and_snat")
3963 && nat
->logical_port
&& nat
->external_mac
3964 && eth_addr_from_string(nat
->external_mac
, &mac
)) {
3967 ds_put_format(&match
, "eth.dst == "ETH_ADDR_FMT
3968 " && is_chassis_resident(\"%s\")",
3973 ds_put_format(&actions
, "outport = %s; output;",
3975 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_IN_L2_LKUP
,
3976 50, ds_cstr(&match
),
3982 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
3985 "%s: invalid syntax '%s' in addresses column",
3986 op
->nbsp
->name
, op
->nbsp
->addresses
[i
]);
3991 /* Ingress table 13: Destination lookup for unknown MACs (priority 0). */
3992 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
3997 if (od
->has_unknown
) {
3998 ovn_lflow_add(lflows
, od
, S_SWITCH_IN_L2_LKUP
, 0, "1",
3999 "outport = \""MC_UNKNOWN
"\"; output;");
4003 /* Egress tables 6: Egress port security - IP (priority 0)
4004 * Egress table 7: Egress port security L2 - multicast/broadcast
4005 * (priority 100). */
4006 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
4011 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_PORT_SEC_IP
, 0, "1", "next;");
4012 ovn_lflow_add(lflows
, od
, S_SWITCH_OUT_PORT_SEC_L2
, 100, "eth.mcast",
4016 /* Egress table 6: Egress port security - IP (priorities 90 and 80)
4017 * if port security enabled.
4019 * Egress table 7: Egress port security - L2 (priorities 50 and 150).
4021 * Priority 50 rules implement port security for enabled logical port.
4023 * Priority 150 rules drop packets to disabled logical ports, so that they
4024 * don't even receive multicast or broadcast packets. */
4025 HMAP_FOR_EACH (op
, key_node
, ports
) {
4031 ds_put_format(&match
, "outport == %s", op
->json_key
);
4032 if (lsp_is_enabled(op
->nbsp
)) {
4033 build_port_security_l2("eth.dst", op
->ps_addrs
, op
->n_ps_addrs
,
4035 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_OUT_PORT_SEC_L2
, 50,
4036 ds_cstr(&match
), "output;");
4038 ovn_lflow_add(lflows
, op
->od
, S_SWITCH_OUT_PORT_SEC_L2
, 150,
4039 ds_cstr(&match
), "drop;");
4042 if (op
->nbsp
->n_port_security
) {
4043 build_port_security_ip(P_OUT
, op
, lflows
);
4048 ds_destroy(&actions
);
4052 lrport_is_enabled(const struct nbrec_logical_router_port
*lrport
)
4054 return !lrport
->enabled
|| *lrport
->enabled
;
4057 /* Returns a string of the IP address of the router port 'op' that
4058 * overlaps with 'ip_s". If one is not found, returns NULL.
4060 * The caller must not free the returned string. */
4062 find_lrp_member_ip(const struct ovn_port
*op
, const char *ip_s
)
4064 bool is_ipv4
= strchr(ip_s
, '.') ? true : false;
4069 if (!ip_parse(ip_s
, &ip
)) {
4070 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
4071 VLOG_WARN_RL(&rl
, "bad ip address %s", ip_s
);
4075 for (int i
= 0; i
< op
->lrp_networks
.n_ipv4_addrs
; i
++) {
4076 const struct ipv4_netaddr
*na
= &op
->lrp_networks
.ipv4_addrs
[i
];
4078 if (!((na
->network
^ ip
) & na
->mask
)) {
4079 /* There should be only 1 interface that matches the
4080 * supplied IP. Otherwise, it's a configuration error,
4081 * because subnets of a router's interfaces should NOT
4087 struct in6_addr ip6
;
4089 if (!ipv6_parse(ip_s
, &ip6
)) {
4090 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
4091 VLOG_WARN_RL(&rl
, "bad ipv6 address %s", ip_s
);
4095 for (int i
= 0; i
< op
->lrp_networks
.n_ipv6_addrs
; i
++) {
4096 const struct ipv6_netaddr
*na
= &op
->lrp_networks
.ipv6_addrs
[i
];
4097 struct in6_addr xor_addr
= ipv6_addr_bitxor(&na
->network
, &ip6
);
4098 struct in6_addr and_addr
= ipv6_addr_bitand(&xor_addr
, &na
->mask
);
4100 if (ipv6_is_zero(&and_addr
)) {
4101 /* There should be only 1 interface that matches the
4102 * supplied IP. Otherwise, it's a configuration error,
4103 * because subnets of a router's interfaces should NOT
4114 add_route(struct hmap
*lflows
, const struct ovn_port
*op
,
4115 const char *lrp_addr_s
, const char *network_s
, int plen
,
4116 const char *gateway
, const char *policy
)
4118 bool is_ipv4
= strchr(network_s
, '.') ? true : false;
4119 struct ds match
= DS_EMPTY_INITIALIZER
;
4123 if (policy
&& !strcmp(policy
, "src-ip")) {
4125 priority
= plen
* 2;
4128 priority
= (plen
* 2) + 1;
4131 /* IPv6 link-local addresses must be scoped to the local router port. */
4133 struct in6_addr network
;
4134 ovs_assert(ipv6_parse(network_s
, &network
));
4135 if (in6_is_lla(&network
)) {
4136 ds_put_format(&match
, "inport == %s && ", op
->json_key
);
4139 ds_put_format(&match
, "ip%s.%s == %s/%d", is_ipv4
? "4" : "6", dir
,
4142 struct ds actions
= DS_EMPTY_INITIALIZER
;
4143 ds_put_format(&actions
, "ip.ttl--; %sreg0 = ", is_ipv4
? "" : "xx");
4146 ds_put_cstr(&actions
, gateway
);
4148 ds_put_format(&actions
, "ip%s.dst", is_ipv4
? "4" : "6");
4150 ds_put_format(&actions
, "; "
4154 "flags.loopback = 1; "
4156 is_ipv4
? "" : "xx",
4158 op
->lrp_networks
.ea_s
,
4161 /* The priority here is calculated to implement longest-prefix-match
4163 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_IP_ROUTING
, priority
,
4164 ds_cstr(&match
), ds_cstr(&actions
));
4166 ds_destroy(&actions
);
4170 build_static_route_flow(struct hmap
*lflows
, struct ovn_datapath
*od
,
4172 const struct nbrec_logical_router_static_route
*route
)
4175 const char *lrp_addr_s
= NULL
;
4179 /* Verify that the next hop is an IP address with an all-ones mask. */
4180 char *error
= ip_parse_cidr(route
->nexthop
, &nexthop
, &plen
);
4183 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
4184 VLOG_WARN_RL(&rl
, "bad next hop mask %s", route
->nexthop
);
4191 struct in6_addr ip6
;
4192 error
= ipv6_parse_cidr(route
->nexthop
, &ip6
, &plen
);
4195 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
4196 VLOG_WARN_RL(&rl
, "bad next hop mask %s", route
->nexthop
);
4201 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
4202 VLOG_WARN_RL(&rl
, "bad next hop ip address %s", route
->nexthop
);
4211 /* Verify that ip prefix is a valid IPv4 address. */
4212 error
= ip_parse_cidr(route
->ip_prefix
, &prefix
, &plen
);
4214 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
4215 VLOG_WARN_RL(&rl
, "bad 'ip_prefix' in static routes %s",
4220 prefix_s
= xasprintf(IP_FMT
, IP_ARGS(prefix
& be32_prefix_mask(plen
)));
4222 /* Verify that ip prefix is a valid IPv6 address. */
4223 struct in6_addr prefix
;
4224 error
= ipv6_parse_cidr(route
->ip_prefix
, &prefix
, &plen
);
4226 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
4227 VLOG_WARN_RL(&rl
, "bad 'ip_prefix' in static routes %s",
4232 struct in6_addr mask
= ipv6_create_mask(plen
);
4233 struct in6_addr network
= ipv6_addr_bitand(&prefix
, &mask
);
4234 prefix_s
= xmalloc(INET6_ADDRSTRLEN
);
4235 inet_ntop(AF_INET6
, &network
, prefix_s
, INET6_ADDRSTRLEN
);
4238 /* Find the outgoing port. */
4239 struct ovn_port
*out_port
= NULL
;
4240 if (route
->output_port
) {
4241 out_port
= ovn_port_find(ports
, route
->output_port
);
4243 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
4244 VLOG_WARN_RL(&rl
, "Bad out port %s for static route %s",
4245 route
->output_port
, route
->ip_prefix
);
4248 lrp_addr_s
= find_lrp_member_ip(out_port
, route
->nexthop
);
4250 /* There are no IP networks configured on the router's port via
4251 * which 'route->nexthop' is theoretically reachable. But since
4252 * 'out_port' has been specified, we honor it by trying to reach
4253 * 'route->nexthop' via the first IP address of 'out_port'.
4254 * (There are cases, e.g in GCE, where each VM gets a /32 IP
4255 * address and the default gateway is still reachable from it.) */
4257 if (out_port
->lrp_networks
.n_ipv4_addrs
) {
4258 lrp_addr_s
= out_port
->lrp_networks
.ipv4_addrs
[0].addr_s
;
4261 if (out_port
->lrp_networks
.n_ipv6_addrs
) {
4262 lrp_addr_s
= out_port
->lrp_networks
.ipv6_addrs
[0].addr_s
;
4267 /* output_port is not specified, find the
4268 * router port matching the next hop. */
4270 for (i
= 0; i
< od
->nbr
->n_ports
; i
++) {
4271 struct nbrec_logical_router_port
*lrp
= od
->nbr
->ports
[i
];
4272 out_port
= ovn_port_find(ports
, lrp
->name
);
4274 /* This should not happen. */
4278 lrp_addr_s
= find_lrp_member_ip(out_port
, route
->nexthop
);
4285 if (!out_port
|| !lrp_addr_s
) {
4286 /* There is no matched out port. */
4287 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
4288 VLOG_WARN_RL(&rl
, "No path for static route %s; next hop %s",
4289 route
->ip_prefix
, route
->nexthop
);
4293 char *policy
= route
->policy
? route
->policy
: "dst-ip";
4294 add_route(lflows
, out_port
, lrp_addr_s
, prefix_s
, plen
, route
->nexthop
,
4302 op_put_v4_networks(struct ds
*ds
, const struct ovn_port
*op
, bool add_bcast
)
4304 if (!add_bcast
&& op
->lrp_networks
.n_ipv4_addrs
== 1) {
4305 ds_put_format(ds
, "%s", op
->lrp_networks
.ipv4_addrs
[0].addr_s
);
4309 ds_put_cstr(ds
, "{");
4310 for (int i
= 0; i
< op
->lrp_networks
.n_ipv4_addrs
; i
++) {
4311 ds_put_format(ds
, "%s, ", op
->lrp_networks
.ipv4_addrs
[i
].addr_s
);
4313 ds_put_format(ds
, "%s, ", op
->lrp_networks
.ipv4_addrs
[i
].bcast_s
);
4318 ds_put_cstr(ds
, "}");
4322 op_put_v6_networks(struct ds
*ds
, const struct ovn_port
*op
)
4324 if (op
->lrp_networks
.n_ipv6_addrs
== 1) {
4325 ds_put_format(ds
, "%s", op
->lrp_networks
.ipv6_addrs
[0].addr_s
);
4329 ds_put_cstr(ds
, "{");
4330 for (int i
= 0; i
< op
->lrp_networks
.n_ipv6_addrs
; i
++) {
4331 ds_put_format(ds
, "%s, ", op
->lrp_networks
.ipv6_addrs
[i
].addr_s
);
4335 ds_put_cstr(ds
, "}");
4339 get_force_snat_ip(struct ovn_datapath
*od
, const char *key_type
, ovs_be32
*ip
)
4341 char *key
= xasprintf("%s_force_snat_ip", key_type
);
4342 const char *ip_address
= smap_get(&od
->nbr
->options
, key
);
4347 char *error
= ip_parse_masked(ip_address
, ip
, &mask
);
4348 if (error
|| mask
!= OVS_BE32_MAX
) {
4349 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
4350 VLOG_WARN_RL(&rl
, "bad ip %s in options of router "UUID_FMT
"",
4351 ip_address
, UUID_ARGS(&od
->key
));
4364 add_router_lb_flow(struct hmap
*lflows
, struct ovn_datapath
*od
,
4365 struct ds
*match
, struct ds
*actions
, int priority
,
4366 const char *lb_force_snat_ip
, char *backend_ips
,
4367 bool is_udp
, int addr_family
)
4369 /* A match and actions for new connections. */
4370 char *new_match
= xasprintf("ct.new && %s", ds_cstr(match
));
4371 if (lb_force_snat_ip
) {
4372 char *new_actions
= xasprintf("flags.force_snat_for_lb = 1; %s",
4374 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_DNAT
, priority
, new_match
,
4378 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_DNAT
, priority
, new_match
,
4382 /* A match and actions for established connections. */
4383 char *est_match
= xasprintf("ct.est && %s", ds_cstr(match
));
4384 if (lb_force_snat_ip
) {
4385 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_DNAT
, priority
, est_match
,
4386 "flags.force_snat_for_lb = 1; ct_dnat;");
4388 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_DNAT
, priority
, est_match
,
4395 if (!od
->l3dgw_port
|| !od
->l3redirect_port
|| !backend_ips
4396 || addr_family
!= AF_INET
) {
4400 /* Add logical flows to UNDNAT the load balanced reverse traffic in
4401 * the router egress pipleine stage - S_ROUTER_OUT_UNDNAT if the logical
4402 * router has a gateway router port associated.
4404 struct ds undnat_match
= DS_EMPTY_INITIALIZER
;
4405 ds_put_cstr(&undnat_match
, "ip4 && (");
4406 char *start
, *next
, *ip_str
;
4407 start
= next
= xstrdup(backend_ips
);
4408 ip_str
= strsep(&next
, ",");
4409 bool backend_ips_found
= false;
4410 while (ip_str
&& ip_str
[0]) {
4411 char *ip_address
= NULL
;
4414 ip_address_and_port_from_lb_key(ip_str
, &ip_address
, &port
,
4420 ds_put_format(&undnat_match
, "(ip4.src == %s", ip_address
);
4423 ds_put_format(&undnat_match
, " && %s.src == %d) || ",
4424 is_udp
? "udp" : "tcp", port
);
4426 ds_put_cstr(&undnat_match
, ") || ");
4428 ip_str
= strsep(&next
, ",");
4429 backend_ips_found
= true;
4433 if (!backend_ips_found
) {
4434 ds_destroy(&undnat_match
);
4437 ds_chomp(&undnat_match
, ' ');
4438 ds_chomp(&undnat_match
, '|');
4439 ds_chomp(&undnat_match
, '|');
4440 ds_chomp(&undnat_match
, ' ');
4441 ds_put_format(&undnat_match
, ") && outport == %s && "
4442 "is_chassis_resident(%s)", od
->l3dgw_port
->json_key
,
4443 od
->l3redirect_port
->json_key
);
4444 if (lb_force_snat_ip
) {
4445 ovn_lflow_add(lflows
, od
, S_ROUTER_OUT_UNDNAT
, 120,
4446 ds_cstr(&undnat_match
),
4447 "flags.force_snat_for_lb = 1; ct_dnat;");
4449 ovn_lflow_add(lflows
, od
, S_ROUTER_OUT_UNDNAT
, 120,
4450 ds_cstr(&undnat_match
), "ct_dnat;");
4453 ds_destroy(&undnat_match
);
4457 build_lrouter_flows(struct hmap
*datapaths
, struct hmap
*ports
,
4458 struct hmap
*lflows
)
4460 /* This flow table structure is documented in ovn-northd(8), so please
4461 * update ovn-northd.8.xml if you change anything. */
4463 struct ds match
= DS_EMPTY_INITIALIZER
;
4464 struct ds actions
= DS_EMPTY_INITIALIZER
;
4466 /* Logical router ingress table 0: Admission control framework. */
4467 struct ovn_datapath
*od
;
4468 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
4473 /* Logical VLANs not supported.
4474 * Broadcast/multicast source address is invalid. */
4475 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_ADMISSION
, 100,
4476 "vlan.present || eth.src[40]", "drop;");
4479 /* Logical router ingress table 0: match (priority 50). */
4480 struct ovn_port
*op
;
4481 HMAP_FOR_EACH (op
, key_node
, ports
) {
4486 if (!lrport_is_enabled(op
->nbrp
)) {
4487 /* Drop packets from disabled logical ports (since logical flow
4488 * tables are default-drop). */
4493 /* No ingress packets should be received on a chassisredirect
4499 ds_put_format(&match
, "eth.mcast && inport == %s", op
->json_key
);
4500 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_ADMISSION
, 50,
4501 ds_cstr(&match
), "next;");
4504 ds_put_format(&match
, "eth.dst == %s && inport == %s",
4505 op
->lrp_networks
.ea_s
, op
->json_key
);
4506 if (op
->od
->l3dgw_port
&& op
== op
->od
->l3dgw_port
4507 && op
->od
->l3redirect_port
) {
4508 /* Traffic with eth.dst = l3dgw_port->lrp_networks.ea_s
4509 * should only be received on the "redirect-chassis". */
4510 ds_put_format(&match
, " && is_chassis_resident(%s)",
4511 op
->od
->l3redirect_port
->json_key
);
4513 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_ADMISSION
, 50,
4514 ds_cstr(&match
), "next;");
4517 /* Logical router ingress table 1: IP Input. */
4518 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
4523 /* L3 admission control: drop multicast and broadcast source, localhost
4524 * source or destination, and zero network source or destination
4525 * (priority 100). */
4526 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_IP_INPUT
, 100,
4528 "ip4.src == 255.255.255.255 || "
4529 "ip4.src == 127.0.0.0/8 || "
4530 "ip4.dst == 127.0.0.0/8 || "
4531 "ip4.src == 0.0.0.0/8 || "
4532 "ip4.dst == 0.0.0.0/8",
4535 /* ARP reply handling. Use ARP replies to populate the logical
4536 * router's ARP table. */
4537 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_IP_INPUT
, 90, "arp.op == 2",
4538 "put_arp(inport, arp.spa, arp.sha);");
4540 /* Drop Ethernet local broadcast. By definition this traffic should
4541 * not be forwarded.*/
4542 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_IP_INPUT
, 50,
4543 "eth.bcast", "drop;");
4547 * XXX Need to send ICMP time exceeded if !ip.later_frag. */
4549 ds_put_cstr(&match
, "ip4 && ip.ttl == {0, 1}");
4550 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_IP_INPUT
, 30,
4551 ds_cstr(&match
), "drop;");
4553 /* ND advertisement handling. Use advertisements to populate
4554 * the logical router's ARP/ND table. */
4555 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_IP_INPUT
, 90, "nd_na",
4556 "put_nd(inport, nd.target, nd.tll);");
4558 /* Lean from neighbor solicitations that were not directed at
4559 * us. (A priority-90 flow will respond to requests to us and
4560 * learn the sender's mac address. */
4561 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_IP_INPUT
, 80, "nd_ns",
4562 "put_nd(inport, ip6.src, nd.sll);");
4564 /* Pass other traffic not already handled to the next table for
4566 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_IP_INPUT
, 0, "1", "next;");
4569 /* Logical router ingress table 1: IP Input for IPv4. */
4570 HMAP_FOR_EACH (op
, key_node
, ports
) {
4576 /* No ingress packets are accepted on a chassisredirect
4577 * port, so no need to program flows for that port. */
4581 if (op
->lrp_networks
.n_ipv4_addrs
) {
4582 /* L3 admission control: drop packets that originate from an
4583 * IPv4 address owned by the router or a broadcast address
4584 * known to the router (priority 100). */
4586 ds_put_cstr(&match
, "ip4.src == ");
4587 op_put_v4_networks(&match
, op
, true);
4588 ds_put_cstr(&match
, " && "REGBIT_EGRESS_LOOPBACK
" == 0");
4589 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_IP_INPUT
, 100,
4590 ds_cstr(&match
), "drop;");
4592 /* ICMP echo reply. These flows reply to ICMP echo requests
4593 * received for the router's IP address. Since packets only
4594 * get here as part of the logical router datapath, the inport
4595 * (i.e. the incoming locally attached net) does not matter.
4596 * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */
4598 ds_put_cstr(&match
, "ip4.dst == ");
4599 op_put_v4_networks(&match
, op
, false);
4600 ds_put_cstr(&match
, " && icmp4.type == 8 && icmp4.code == 0");
4603 ds_put_format(&actions
,
4604 "ip4.dst <-> ip4.src; "
4607 "flags.loopback = 1; "
4609 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_IP_INPUT
, 90,
4610 ds_cstr(&match
), ds_cstr(&actions
));
4613 /* ARP reply. These flows reply to ARP requests for the router's own
4615 for (int i
= 0; i
< op
->lrp_networks
.n_ipv4_addrs
; i
++) {
4617 ds_put_format(&match
,
4618 "inport == %s && arp.tpa == %s && arp.op == 1",
4619 op
->json_key
, op
->lrp_networks
.ipv4_addrs
[i
].addr_s
);
4620 if (op
->od
->l3dgw_port
&& op
== op
->od
->l3dgw_port
4621 && op
->od
->l3redirect_port
) {
4622 /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
4623 * should only be sent from the "redirect-chassis", so that
4624 * upstream MAC learning points to the "redirect-chassis".
4625 * Also need to avoid generation of multiple ARP responses
4626 * from different chassis. */
4627 ds_put_format(&match
, " && is_chassis_resident(%s)",
4628 op
->od
->l3redirect_port
->json_key
);
4632 ds_put_format(&actions
,
4633 "eth.dst = eth.src; "
4635 "arp.op = 2; /* ARP reply */ "
4636 "arp.tha = arp.sha; "
4638 "arp.tpa = arp.spa; "
4641 "flags.loopback = 1; "
4643 op
->lrp_networks
.ea_s
,
4644 op
->lrp_networks
.ea_s
,
4645 op
->lrp_networks
.ipv4_addrs
[i
].addr_s
,
4647 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_IP_INPUT
, 90,
4648 ds_cstr(&match
), ds_cstr(&actions
));
4651 /* A set to hold all load-balancer vips that need ARP responses. */
4652 struct sset all_ips
= SSET_INITIALIZER(&all_ips
);
4654 get_router_load_balancer_ips(op
->od
, &all_ips
, &addr_family
);
4656 const char *ip_address
;
4657 SSET_FOR_EACH(ip_address
, &all_ips
) {
4659 if (addr_family
== AF_INET
) {
4660 ds_put_format(&match
,
4661 "inport == %s && arp.tpa == %s && arp.op == 1",
4662 op
->json_key
, ip_address
);
4664 ds_put_format(&match
,
4665 "inport == %s && nd_ns && nd.target == %s",
4666 op
->json_key
, ip_address
);
4670 if (addr_family
== AF_INET
) {
4671 ds_put_format(&actions
,
4672 "eth.dst = eth.src; "
4674 "arp.op = 2; /* ARP reply */ "
4675 "arp.tha = arp.sha; "
4677 "arp.tpa = arp.spa; "
4680 "flags.loopback = 1; "
4682 op
->lrp_networks
.ea_s
,
4683 op
->lrp_networks
.ea_s
,
4687 ds_put_format(&actions
,
4693 "outport = inport; "
4694 "flags.loopback = 1; "
4697 op
->lrp_networks
.ea_s
,
4700 op
->lrp_networks
.ea_s
);
4702 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_IP_INPUT
, 90,
4703 ds_cstr(&match
), ds_cstr(&actions
));
4706 sset_destroy(&all_ips
);
4708 /* A gateway router can have 2 SNAT IP addresses to force DNATed and
4709 * LBed traffic respectively to be SNATed. In addition, there can be
4710 * a number of SNAT rules in the NAT table. */
4711 ovs_be32
*snat_ips
= xmalloc(sizeof *snat_ips
*
4712 (op
->od
->nbr
->n_nat
+ 2));
4713 size_t n_snat_ips
= 0;
4716 const char *dnat_force_snat_ip
= get_force_snat_ip(op
->od
, "dnat",
4718 if (dnat_force_snat_ip
) {
4719 snat_ips
[n_snat_ips
++] = snat_ip
;
4722 const char *lb_force_snat_ip
= get_force_snat_ip(op
->od
, "lb",
4724 if (lb_force_snat_ip
) {
4725 snat_ips
[n_snat_ips
++] = snat_ip
;
4728 for (int i
= 0; i
< op
->od
->nbr
->n_nat
; i
++) {
4729 const struct nbrec_nat
*nat
;
4731 nat
= op
->od
->nbr
->nat
[i
];
4734 if (!ip_parse(nat
->external_ip
, &ip
) || !ip
) {
4735 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
4736 VLOG_WARN_RL(&rl
, "bad ip address %s in nat configuration "
4737 "for router %s", nat
->external_ip
, op
->key
);
4741 if (!strcmp(nat
->type
, "snat")) {
4742 snat_ips
[n_snat_ips
++] = ip
;
4746 /* ARP handling for external IP addresses.
4748 * DNAT IP addresses are external IP addresses that need ARP
4751 ds_put_format(&match
,
4752 "inport == %s && arp.tpa == "IP_FMT
" && arp.op == 1",
4753 op
->json_key
, IP_ARGS(ip
));
4756 ds_put_format(&actions
,
4757 "eth.dst = eth.src; "
4758 "arp.op = 2; /* ARP reply */ "
4759 "arp.tha = arp.sha; ");
4761 if (op
->od
->l3dgw_port
&& op
== op
->od
->l3dgw_port
) {
4762 struct eth_addr mac
;
4763 if (nat
->external_mac
&&
4764 eth_addr_from_string(nat
->external_mac
, &mac
)
4765 && nat
->logical_port
) {
4766 /* distributed NAT case, use nat->external_mac */
4767 ds_put_format(&actions
,
4768 "eth.src = "ETH_ADDR_FMT
"; "
4769 "arp.sha = "ETH_ADDR_FMT
"; ",
4771 ETH_ADDR_ARGS(mac
));
4772 /* Traffic with eth.src = nat->external_mac should only be
4773 * sent from the chassis where nat->logical_port is
4774 * resident, so that upstream MAC learning points to the
4775 * correct chassis. Also need to avoid generation of
4776 * multiple ARP responses from different chassis. */
4777 ds_put_format(&match
, " && is_chassis_resident(\"%s\")",
4780 ds_put_format(&actions
,
4783 op
->lrp_networks
.ea_s
,
4784 op
->lrp_networks
.ea_s
);
4785 /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
4786 * should only be sent from the "redirect-chassis", so that
4787 * upstream MAC learning points to the "redirect-chassis".
4788 * Also need to avoid generation of multiple ARP responses
4789 * from different chassis. */
4790 if (op
->od
->l3redirect_port
) {
4791 ds_put_format(&match
, " && is_chassis_resident(%s)",
4792 op
->od
->l3redirect_port
->json_key
);
4796 ds_put_format(&actions
,
4799 op
->lrp_networks
.ea_s
,
4800 op
->lrp_networks
.ea_s
);
4802 ds_put_format(&actions
,
4803 "arp.tpa = arp.spa; "
4804 "arp.spa = "IP_FMT
"; "
4806 "flags.loopback = 1; "
4810 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_IP_INPUT
, 90,
4811 ds_cstr(&match
), ds_cstr(&actions
));
4815 ds_put_cstr(&match
, "ip4.dst == {");
4816 bool has_drop_ips
= false;
4817 for (int i
= 0; i
< op
->lrp_networks
.n_ipv4_addrs
; i
++) {
4818 bool snat_ip_is_router_ip
= false;
4819 for (int j
= 0; j
< n_snat_ips
; j
++) {
4820 /* Packets to SNAT IPs should not be dropped. */
4821 if (op
->lrp_networks
.ipv4_addrs
[i
].addr
== snat_ips
[j
]) {
4822 snat_ip_is_router_ip
= true;
4826 if (snat_ip_is_router_ip
) {
4829 ds_put_format(&match
, "%s, ",
4830 op
->lrp_networks
.ipv4_addrs
[i
].addr_s
);
4831 has_drop_ips
= true;
4833 ds_chomp(&match
, ' ');
4834 ds_chomp(&match
, ',');
4835 ds_put_cstr(&match
, "}");
4838 /* Drop IP traffic to this router. */
4839 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_IP_INPUT
, 60,
4840 ds_cstr(&match
), "drop;");
4846 /* Logical router ingress table 1: IP Input for IPv6. */
4847 HMAP_FOR_EACH (op
, key_node
, ports
) {
4853 /* No ingress packets are accepted on a chassisredirect
4854 * port, so no need to program flows for that port. */
4858 if (op
->lrp_networks
.n_ipv6_addrs
) {
4859 /* L3 admission control: drop packets that originate from an
4860 * IPv6 address owned by the router (priority 100). */
4862 ds_put_cstr(&match
, "ip6.src == ");
4863 op_put_v6_networks(&match
, op
);
4864 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_IP_INPUT
, 100,
4865 ds_cstr(&match
), "drop;");
4867 /* ICMPv6 echo reply. These flows reply to echo requests
4868 * received for the router's IP address. */
4870 ds_put_cstr(&match
, "ip6.dst == ");
4871 op_put_v6_networks(&match
, op
);
4872 ds_put_cstr(&match
, " && icmp6.type == 128 && icmp6.code == 0");
4875 ds_put_cstr(&actions
,
4876 "ip6.dst <-> ip6.src; "
4878 "icmp6.type = 129; "
4879 "flags.loopback = 1; "
4881 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_IP_INPUT
, 90,
4882 ds_cstr(&match
), ds_cstr(&actions
));
4884 /* Drop IPv6 traffic to this router. */
4886 ds_put_cstr(&match
, "ip6.dst == ");
4887 op_put_v6_networks(&match
, op
);
4888 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_IP_INPUT
, 60,
4889 ds_cstr(&match
), "drop;");
4892 /* ND reply. These flows reply to ND solicitations for the
4893 * router's own IP address. */
4894 for (int i
= 0; i
< op
->lrp_networks
.n_ipv6_addrs
; i
++) {
4896 ds_put_format(&match
,
4897 "inport == %s && nd_ns && ip6.dst == {%s, %s} "
4898 "&& nd.target == %s",
4900 op
->lrp_networks
.ipv6_addrs
[i
].addr_s
,
4901 op
->lrp_networks
.ipv6_addrs
[i
].sn_addr_s
,
4902 op
->lrp_networks
.ipv6_addrs
[i
].addr_s
);
4903 if (op
->od
->l3dgw_port
&& op
== op
->od
->l3dgw_port
4904 && op
->od
->l3redirect_port
) {
4905 /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
4906 * should only be sent from the "redirect-chassis", so that
4907 * upstream MAC learning points to the "redirect-chassis".
4908 * Also need to avoid generation of multiple ND replies
4909 * from different chassis. */
4910 ds_put_format(&match
, " && is_chassis_resident(%s)",
4911 op
->od
->l3redirect_port
->json_key
);
4915 ds_put_format(&actions
,
4916 "put_nd(inport, ip6.src, nd.sll); "
4922 "outport = inport; "
4923 "flags.loopback = 1; "
4926 op
->lrp_networks
.ea_s
,
4927 op
->lrp_networks
.ipv6_addrs
[i
].addr_s
,
4928 op
->lrp_networks
.ipv6_addrs
[i
].addr_s
,
4929 op
->lrp_networks
.ea_s
);
4930 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_IP_INPUT
, 90,
4931 ds_cstr(&match
), ds_cstr(&actions
));
4935 /* NAT, Defrag and load balancing. */
4936 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
4941 /* Packets are allowed by default. */
4942 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_DEFRAG
, 0, "1", "next;");
4943 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_UNSNAT
, 0, "1", "next;");
4944 ovn_lflow_add(lflows
, od
, S_ROUTER_OUT_SNAT
, 0, "1", "next;");
4945 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_DNAT
, 0, "1", "next;");
4946 ovn_lflow_add(lflows
, od
, S_ROUTER_OUT_UNDNAT
, 0, "1", "next;");
4947 ovn_lflow_add(lflows
, od
, S_ROUTER_OUT_EGR_LOOP
, 0, "1", "next;");
4949 /* NAT rules are only valid on Gateway routers and routers with
4950 * l3dgw_port (router has a port with "redirect-chassis"
4952 if (!smap_get(&od
->nbr
->options
, "chassis") && !od
->l3dgw_port
) {
4957 const char *dnat_force_snat_ip
= get_force_snat_ip(od
, "dnat",
4959 const char *lb_force_snat_ip
= get_force_snat_ip(od
, "lb",
4962 for (int i
= 0; i
< od
->nbr
->n_nat
; i
++) {
4963 const struct nbrec_nat
*nat
;
4965 nat
= od
->nbr
->nat
[i
];
4969 char *error
= ip_parse_masked(nat
->external_ip
, &ip
, &mask
);
4970 if (error
|| mask
!= OVS_BE32_MAX
) {
4971 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 1);
4972 VLOG_WARN_RL(&rl
, "bad external ip %s for nat",
4978 /* Check the validity of nat->logical_ip. 'logical_ip' can
4979 * be a subnet when the type is "snat". */
4980 error
= ip_parse_masked(nat
->logical_ip
, &ip
, &mask
);
4981 if (!strcmp(nat
->type
, "snat")) {
4983 static struct vlog_rate_limit rl
=
4984 VLOG_RATE_LIMIT_INIT(5, 1);
4985 VLOG_WARN_RL(&rl
, "bad ip network or ip %s for snat "
4986 "in router "UUID_FMT
"",
4987 nat
->logical_ip
, UUID_ARGS(&od
->key
));
4992 if (error
|| mask
!= OVS_BE32_MAX
) {
4993 static struct vlog_rate_limit rl
=
4994 VLOG_RATE_LIMIT_INIT(5, 1);
4995 VLOG_WARN_RL(&rl
, "bad ip %s for dnat in router "
4996 ""UUID_FMT
"", nat
->logical_ip
, UUID_ARGS(&od
->key
));
5002 /* For distributed router NAT, determine whether this NAT rule
5003 * satisfies the conditions for distributed NAT processing. */
5004 bool distributed
= false;
5005 struct eth_addr mac
;
5006 if (od
->l3dgw_port
&& !strcmp(nat
->type
, "dnat_and_snat") &&
5007 nat
->logical_port
&& nat
->external_mac
) {
5008 if (eth_addr_from_string(nat
->external_mac
, &mac
)) {
5011 static struct vlog_rate_limit rl
=
5012 VLOG_RATE_LIMIT_INIT(5, 1);
5013 VLOG_WARN_RL(&rl
, "bad mac %s for dnat in router "
5014 ""UUID_FMT
"", nat
->external_mac
, UUID_ARGS(&od
->key
));
5019 /* Ingress UNSNAT table: It is for already established connections'
5020 * reverse traffic. i.e., SNAT has already been done in egress
5021 * pipeline and now the packet has entered the ingress pipeline as
5022 * part of a reply. We undo the SNAT here.
5024 * Undoing SNAT has to happen before DNAT processing. This is
5025 * because when the packet was DNATed in ingress pipeline, it did
5026 * not know about the possibility of eventual additional SNAT in
5027 * egress pipeline. */
5028 if (!strcmp(nat
->type
, "snat")
5029 || !strcmp(nat
->type
, "dnat_and_snat")) {
5030 if (!od
->l3dgw_port
) {
5031 /* Gateway router. */
5033 ds_put_format(&match
, "ip && ip4.dst == %s",
5035 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_UNSNAT
, 90,
5036 ds_cstr(&match
), "ct_snat; next;");
5038 /* Distributed router. */
5040 /* Traffic received on l3dgw_port is subject to NAT. */
5042 ds_put_format(&match
, "ip && ip4.dst == %s"
5045 od
->l3dgw_port
->json_key
);
5046 if (!distributed
&& od
->l3redirect_port
) {
5047 /* Flows for NAT rules that are centralized are only
5048 * programmed on the "redirect-chassis". */
5049 ds_put_format(&match
, " && is_chassis_resident(%s)",
5050 od
->l3redirect_port
->json_key
);
5052 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_UNSNAT
, 100,
5053 ds_cstr(&match
), "ct_snat;");
5055 /* Traffic received on other router ports must be
5056 * redirected to the central instance of the l3dgw_port
5057 * for NAT processing. */
5059 ds_put_format(&match
, "ip && ip4.dst == %s",
5061 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_UNSNAT
, 50,
5063 REGBIT_NAT_REDIRECT
" = 1; next;");
5067 /* Ingress DNAT table: Packets enter the pipeline with destination
5068 * IP address that needs to be DNATted from a external IP address
5069 * to a logical IP address. */
5070 if (!strcmp(nat
->type
, "dnat")
5071 || !strcmp(nat
->type
, "dnat_and_snat")) {
5072 if (!od
->l3dgw_port
) {
5073 /* Gateway router. */
5074 /* Packet when it goes from the initiator to destination.
5075 * We need to set flags.loopback because the router can
5076 * send the packet back through the same interface. */
5078 ds_put_format(&match
, "ip && ip4.dst == %s",
5081 if (dnat_force_snat_ip
) {
5082 /* Indicate to the future tables that a DNAT has taken
5083 * place and a force SNAT needs to be done in the
5084 * Egress SNAT table. */
5085 ds_put_format(&actions
,
5086 "flags.force_snat_for_dnat = 1; ");
5088 ds_put_format(&actions
, "flags.loopback = 1; ct_dnat(%s);",
5090 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_DNAT
, 100,
5091 ds_cstr(&match
), ds_cstr(&actions
));
5093 /* Distributed router. */
5095 /* Traffic received on l3dgw_port is subject to NAT. */
5097 ds_put_format(&match
, "ip && ip4.dst == %s"
5100 od
->l3dgw_port
->json_key
);
5101 if (!distributed
&& od
->l3redirect_port
) {
5102 /* Flows for NAT rules that are centralized are only
5103 * programmed on the "redirect-chassis". */
5104 ds_put_format(&match
, " && is_chassis_resident(%s)",
5105 od
->l3redirect_port
->json_key
);
5108 ds_put_format(&actions
, "ct_dnat(%s);",
5110 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_DNAT
, 100,
5111 ds_cstr(&match
), ds_cstr(&actions
));
5113 /* Traffic received on other router ports must be
5114 * redirected to the central instance of the l3dgw_port
5115 * for NAT processing. */
5117 ds_put_format(&match
, "ip && ip4.dst == %s",
5119 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_DNAT
, 50,
5121 REGBIT_NAT_REDIRECT
" = 1; next;");
5125 /* Egress UNDNAT table: It is for already established connections'
5126 * reverse traffic. i.e., DNAT has already been done in ingress
5127 * pipeline and now the packet has entered the egress pipeline as
5128 * part of a reply. We undo the DNAT here.
5130 * Note that this only applies for NAT on a distributed router.
5131 * Undo DNAT on a gateway router is done in the ingress DNAT
5132 * pipeline stage. */
5133 if (od
->l3dgw_port
&& (!strcmp(nat
->type
, "dnat")
5134 || !strcmp(nat
->type
, "dnat_and_snat"))) {
5136 ds_put_format(&match
, "ip && ip4.src == %s"
5137 " && outport == %s",
5139 od
->l3dgw_port
->json_key
);
5140 if (!distributed
&& od
->l3redirect_port
) {
5141 /* Flows for NAT rules that are centralized are only
5142 * programmed on the "redirect-chassis". */
5143 ds_put_format(&match
, " && is_chassis_resident(%s)",
5144 od
->l3redirect_port
->json_key
);
5148 ds_put_format(&actions
, "eth.src = "ETH_ADDR_FMT
"; ",
5149 ETH_ADDR_ARGS(mac
));
5151 ds_put_format(&actions
, "ct_dnat;");
5152 ovn_lflow_add(lflows
, od
, S_ROUTER_OUT_UNDNAT
, 100,
5153 ds_cstr(&match
), ds_cstr(&actions
));
5156 /* Egress SNAT table: Packets enter the egress pipeline with
5157 * source ip address that needs to be SNATted to a external ip
5159 if (!strcmp(nat
->type
, "snat")
5160 || !strcmp(nat
->type
, "dnat_and_snat")) {
5161 if (!od
->l3dgw_port
) {
5162 /* Gateway router. */
5164 ds_put_format(&match
, "ip && ip4.src == %s",
5167 ds_put_format(&actions
, "ct_snat(%s);", nat
->external_ip
);
5169 /* The priority here is calculated such that the
5170 * nat->logical_ip with the longest mask gets a higher
5172 ovn_lflow_add(lflows
, od
, S_ROUTER_OUT_SNAT
,
5173 count_1bits(ntohl(mask
)) + 1,
5174 ds_cstr(&match
), ds_cstr(&actions
));
5176 /* Distributed router. */
5178 ds_put_format(&match
, "ip && ip4.src == %s"
5179 " && outport == %s",
5181 od
->l3dgw_port
->json_key
);
5182 if (!distributed
&& od
->l3redirect_port
) {
5183 /* Flows for NAT rules that are centralized are only
5184 * programmed on the "redirect-chassis". */
5185 ds_put_format(&match
, " && is_chassis_resident(%s)",
5186 od
->l3redirect_port
->json_key
);
5190 ds_put_format(&actions
, "eth.src = "ETH_ADDR_FMT
"; ",
5191 ETH_ADDR_ARGS(mac
));
5193 ds_put_format(&actions
, "ct_snat(%s);", nat
->external_ip
);
5195 /* The priority here is calculated such that the
5196 * nat->logical_ip with the longest mask gets a higher
5198 ovn_lflow_add(lflows
, od
, S_ROUTER_OUT_SNAT
,
5199 count_1bits(ntohl(mask
)) + 1,
5200 ds_cstr(&match
), ds_cstr(&actions
));
5204 /* Logical router ingress table 0:
5205 * For NAT on a distributed router, add rules allowing
5206 * ingress traffic with eth.dst matching nat->external_mac
5207 * on the l3dgw_port instance where nat->logical_port is
5211 ds_put_format(&match
,
5212 "eth.dst == "ETH_ADDR_FMT
" && inport == %s"
5213 " && is_chassis_resident(\"%s\")",
5215 od
->l3dgw_port
->json_key
,
5217 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_ADMISSION
, 50,
5218 ds_cstr(&match
), "next;");
5221 /* Ingress Gateway Redirect Table: For NAT on a distributed
5222 * router, add flows that are specific to a NAT rule. These
5223 * flows indicate the presence of an applicable NAT rule that
5224 * can be applied in a distributed manner. */
5227 ds_put_format(&match
, "ip4.src == %s && outport == %s",
5229 od
->l3dgw_port
->json_key
);
5230 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_GW_REDIRECT
, 100,
5231 ds_cstr(&match
), "next;");
5234 /* Egress Loopback table: For NAT on a distributed router.
5235 * If packets in the egress pipeline on the distributed
5236 * gateway port have ip.dst matching a NAT external IP, then
5237 * loop a clone of the packet back to the beginning of the
5238 * ingress pipeline with inport = outport. */
5239 if (od
->l3dgw_port
) {
5240 /* Distributed router. */
5242 ds_put_format(&match
, "ip4.dst == %s && outport == %s",
5244 od
->l3dgw_port
->json_key
);
5246 ds_put_format(&actions
,
5247 "clone { ct_clear; "
5248 "inport = outport; outport = \"\"; "
5249 "flags = 0; flags.loopback = 1; ");
5250 for (int j
= 0; j
< MFF_N_LOG_REGS
; j
++) {
5251 ds_put_format(&actions
, "reg%d = 0; ", j
);
5253 ds_put_format(&actions
, REGBIT_EGRESS_LOOPBACK
" = 1; "
5254 "next(pipeline=ingress, table=0); };");
5255 ovn_lflow_add(lflows
, od
, S_ROUTER_OUT_EGR_LOOP
, 100,
5256 ds_cstr(&match
), ds_cstr(&actions
));
5260 /* Handle force SNAT options set in the gateway router. */
5261 if (dnat_force_snat_ip
&& !od
->l3dgw_port
) {
5262 /* If a packet with destination IP address as that of the
5263 * gateway router (as set in options:dnat_force_snat_ip) is seen,
5266 ds_put_format(&match
, "ip && ip4.dst == %s", dnat_force_snat_ip
);
5267 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_UNSNAT
, 110,
5268 ds_cstr(&match
), "ct_snat; next;");
5270 /* Higher priority rules to force SNAT with the IP addresses
5271 * configured in the Gateway router. This only takes effect
5272 * when the packet has already been DNATed once. */
5274 ds_put_format(&match
, "flags.force_snat_for_dnat == 1 && ip");
5276 ds_put_format(&actions
, "ct_snat(%s);", dnat_force_snat_ip
);
5277 ovn_lflow_add(lflows
, od
, S_ROUTER_OUT_SNAT
, 100,
5278 ds_cstr(&match
), ds_cstr(&actions
));
5280 if (lb_force_snat_ip
&& !od
->l3dgw_port
) {
5281 /* If a packet with destination IP address as that of the
5282 * gateway router (as set in options:lb_force_snat_ip) is seen,
5285 ds_put_format(&match
, "ip && ip4.dst == %s", lb_force_snat_ip
);
5286 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_UNSNAT
, 100,
5287 ds_cstr(&match
), "ct_snat; next;");
5289 /* Load balanced traffic will have flags.force_snat_for_lb set.
5292 ds_put_format(&match
, "flags.force_snat_for_lb == 1 && ip");
5294 ds_put_format(&actions
, "ct_snat(%s);", lb_force_snat_ip
);
5295 ovn_lflow_add(lflows
, od
, S_ROUTER_OUT_SNAT
, 100,
5296 ds_cstr(&match
), ds_cstr(&actions
));
5299 if (!od
->l3dgw_port
) {
5300 /* For gateway router, re-circulate every packet through
5301 * the DNAT zone. This helps with two things.
5303 * 1. Any packet that needs to be unDNATed in the reverse
5304 * direction gets unDNATed. Ideally this could be done in
5305 * the egress pipeline. But since the gateway router
5306 * does not have any feature that depends on the source
5307 * ip address being external IP address for IP routing,
5308 * we can do it here, saving a future re-circulation.
5310 * 2. Any packet that was sent through SNAT zone in the
5311 * previous table automatically gets re-circulated to get
5312 * back the new destination IP address that is needed for
5313 * routing in the openflow pipeline. */
5314 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_DNAT
, 50,
5315 "ip", "flags.loopback = 1; ct_dnat;");
5317 /* For NAT on a distributed router, add flows to Ingress
5318 * IP Routing table, Ingress ARP Resolution table, and
5319 * Ingress Gateway Redirect Table that are not specific to a
5322 /* The highest priority IN_IP_ROUTING rule matches packets
5323 * with REGBIT_NAT_REDIRECT (set in DNAT or UNSNAT stages),
5324 * with action "ip.ttl--; next;". The IN_GW_REDIRECT table
5325 * will take care of setting the outport. */
5326 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_IP_ROUTING
, 300,
5327 REGBIT_NAT_REDIRECT
" == 1", "ip.ttl--; next;");
5329 /* The highest priority IN_ARP_RESOLVE rule matches packets
5330 * with REGBIT_NAT_REDIRECT (set in DNAT or UNSNAT stages),
5331 * then sets eth.dst to the distributed gateway port's
5332 * ethernet address. */
5334 ds_put_format(&actions
, "eth.dst = %s; next;",
5335 od
->l3dgw_port
->lrp_networks
.ea_s
);
5336 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_ARP_RESOLVE
, 200,
5337 REGBIT_NAT_REDIRECT
" == 1", ds_cstr(&actions
));
5339 /* The highest priority IN_GW_REDIRECT rule redirects packets
5340 * with REGBIT_NAT_REDIRECT (set in DNAT or UNSNAT stages) to
5341 * the central instance of the l3dgw_port for NAT processing. */
5343 ds_put_format(&actions
, "outport = %s; next;",
5344 od
->l3redirect_port
->json_key
);
5345 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_GW_REDIRECT
, 200,
5346 REGBIT_NAT_REDIRECT
" == 1", ds_cstr(&actions
));
5349 /* Load balancing and packet defrag are only valid on
5350 * Gateway routers or router with gateway port. */
5351 if (!smap_get(&od
->nbr
->options
, "chassis") && !od
->l3dgw_port
) {
5355 /* A set to hold all ips that need defragmentation and tracking. */
5356 struct sset all_ips
= SSET_INITIALIZER(&all_ips
);
5358 for (int i
= 0; i
< od
->nbr
->n_load_balancer
; i
++) {
5359 struct nbrec_load_balancer
*lb
= od
->nbr
->load_balancer
[i
];
5360 struct smap
*vips
= &lb
->vips
;
5361 struct smap_node
*node
;
5363 SMAP_FOR_EACH (node
, vips
) {
5367 /* node->key contains IP:port or just IP. */
5368 char *ip_address
= NULL
;
5369 ip_address_and_port_from_lb_key(node
->key
, &ip_address
, &port
,
5375 if (!sset_contains(&all_ips
, ip_address
)) {
5376 sset_add(&all_ips
, ip_address
);
5377 /* If there are any load balancing rules, we should send
5378 * the packet to conntrack for defragmentation and
5379 * tracking. This helps with two things.
5381 * 1. With tracking, we can send only new connections to
5382 * pick a DNAT ip address from a group.
5383 * 2. If there are L4 ports in load balancing rules, we
5384 * need the defragmentation to match on L4 ports. */
5386 if (addr_family
== AF_INET
) {
5387 ds_put_format(&match
, "ip && ip4.dst == %s",
5390 ds_put_format(&match
, "ip && ip6.dst == %s",
5393 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_DEFRAG
,
5394 100, ds_cstr(&match
), "ct_next;");
5397 /* Higher priority rules are added for load-balancing in DNAT
5398 * table. For every match (on a VIP[:port]), we add two flows
5399 * via add_router_lb_flow(). One flow is for specific matching
5400 * on ct.new with an action of "ct_lb($targets);". The other
5401 * flow is for ct.est with an action of "ct_dnat;". */
5403 ds_put_format(&actions
, "ct_lb(%s);", node
->value
);
5406 if (addr_family
== AF_INET
) {
5407 ds_put_format(&match
, "ip && ip4.dst == %s",
5410 ds_put_format(&match
, "ip && ip6.dst == %s",
5416 bool is_udp
= lb
->protocol
&& !strcmp(lb
->protocol
, "udp") ?
5420 ds_put_format(&match
, " && udp && udp.dst == %d",
5423 ds_put_format(&match
, " && tcp && tcp.dst == %d",
5429 if (od
->l3redirect_port
) {
5430 ds_put_format(&match
, " && is_chassis_resident(%s)",
5431 od
->l3redirect_port
->json_key
);
5433 add_router_lb_flow(lflows
, od
, &match
, &actions
, prio
,
5434 lb_force_snat_ip
, node
->value
, is_udp
,
5438 sset_destroy(&all_ips
);
5441 /* Logical router ingress table 5 and 6: IPv6 Router Adv (RA) options and
5443 HMAP_FOR_EACH (op
, key_node
, ports
) {
5444 if (!op
->nbrp
|| op
->nbrp
->peer
|| !op
->peer
) {
5448 if (!op
->lrp_networks
.n_ipv6_addrs
) {
5452 const char *address_mode
= smap_get(
5453 &op
->nbrp
->ipv6_ra_configs
, "address_mode");
5455 if (!address_mode
) {
5458 if (strcmp(address_mode
, "slaac") &&
5459 strcmp(address_mode
, "dhcpv6_stateful") &&
5460 strcmp(address_mode
, "dhcpv6_stateless")) {
5461 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
5462 VLOG_WARN_RL(&rl
, "Invalid address mode [%s] defined",
5468 ds_put_format(&match
, "inport == %s && ip6.dst == ff02::2 && nd_rs",
5472 const char *mtu_s
= smap_get(
5473 &op
->nbrp
->ipv6_ra_configs
, "mtu");
5475 /* As per RFC 2460, 1280 is minimum IPv6 MTU. */
5476 uint32_t mtu
= (mtu_s
&& atoi(mtu_s
) >= 1280) ? atoi(mtu_s
) : 0;
5478 ds_put_format(&actions
, REGBIT_ND_RA_OPTS_RESULT
" = put_nd_ra_opts("
5479 "addr_mode = \"%s\", slla = %s",
5480 address_mode
, op
->lrp_networks
.ea_s
);
5482 ds_put_format(&actions
, ", mtu = %u", mtu
);
5485 bool add_rs_response_flow
= false;
5487 for (size_t i
= 0; i
< op
->lrp_networks
.n_ipv6_addrs
; i
++) {
5488 if (in6_is_lla(&op
->lrp_networks
.ipv6_addrs
[i
].network
)) {
5492 /* Add the prefix option if the address mode is slaac or
5493 * dhcpv6_stateless. */
5494 if (strcmp(address_mode
, "dhcpv6_stateful")) {
5495 ds_put_format(&actions
, ", prefix = %s/%u",
5496 op
->lrp_networks
.ipv6_addrs
[i
].network_s
,
5497 op
->lrp_networks
.ipv6_addrs
[i
].plen
);
5499 add_rs_response_flow
= true;
5502 if (add_rs_response_flow
) {
5503 ds_put_cstr(&actions
, "); next;");
5504 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_ND_RA_OPTIONS
, 50,
5505 ds_cstr(&match
), ds_cstr(&actions
));
5508 ds_put_format(&match
, "inport == %s && ip6.dst == ff02::2 && "
5509 "nd_ra && "REGBIT_ND_RA_OPTS_RESULT
, op
->json_key
);
5511 char ip6_str
[INET6_ADDRSTRLEN
+ 1];
5512 struct in6_addr lla
;
5513 in6_generate_lla(op
->lrp_networks
.ea
, &lla
);
5514 memset(ip6_str
, 0, sizeof(ip6_str
));
5515 ipv6_string_mapped(ip6_str
, &lla
);
5516 ds_put_format(&actions
, "eth.dst = eth.src; eth.src = %s; "
5517 "ip6.dst = ip6.src; ip6.src = %s; "
5518 "outport = inport; flags.loopback = 1; "
5520 op
->lrp_networks
.ea_s
, ip6_str
);
5521 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_IN_ND_RA_RESPONSE
, 50,
5522 ds_cstr(&match
), ds_cstr(&actions
));
5526 /* Logical router ingress table 5, 6: RS responder, by default goto next.
5528 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
5533 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_ND_RA_OPTIONS
, 0, "1", "next;");
5534 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_ND_RA_RESPONSE
, 0, "1", "next;");
5537 /* Logical router ingress table 7: IP Routing.
5539 * A packet that arrives at this table is an IP packet that should be
5540 * routed to the address in 'ip[46].dst'. This table sets outport to
5541 * the correct output port, eth.src to the output port's MAC
5542 * address, and '[xx]reg0' to the next-hop IP address (leaving
5543 * 'ip[46].dst', the packet’s final destination, unchanged), and
5544 * advances to the next table for ARP/ND resolution. */
5545 HMAP_FOR_EACH (op
, key_node
, ports
) {
5550 for (int i
= 0; i
< op
->lrp_networks
.n_ipv4_addrs
; i
++) {
5551 add_route(lflows
, op
, op
->lrp_networks
.ipv4_addrs
[i
].addr_s
,
5552 op
->lrp_networks
.ipv4_addrs
[i
].network_s
,
5553 op
->lrp_networks
.ipv4_addrs
[i
].plen
, NULL
, NULL
);
5556 for (int i
= 0; i
< op
->lrp_networks
.n_ipv6_addrs
; i
++) {
5557 add_route(lflows
, op
, op
->lrp_networks
.ipv6_addrs
[i
].addr_s
,
5558 op
->lrp_networks
.ipv6_addrs
[i
].network_s
,
5559 op
->lrp_networks
.ipv6_addrs
[i
].plen
, NULL
, NULL
);
5563 /* Convert the static routes to flows. */
5564 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
5569 for (int i
= 0; i
< od
->nbr
->n_static_routes
; i
++) {
5570 const struct nbrec_logical_router_static_route
*route
;
5572 route
= od
->nbr
->static_routes
[i
];
5573 build_static_route_flow(lflows
, od
, ports
, route
);
5577 /* XXX destination unreachable */
5579 /* Local router ingress table 8: ARP Resolution.
5581 * Any packet that reaches this table is an IP packet whose next-hop IP
5582 * address is in reg0. (ip4.dst is the final destination.) This table
5583 * resolves the IP address in reg0 into an output port in outport and an
5584 * Ethernet address in eth.dst. */
5585 HMAP_FOR_EACH (op
, key_node
, ports
) {
5586 if (op
->nbsp
&& !lsp_is_enabled(op
->nbsp
)) {
5591 /* This is a logical router port. If next-hop IP address in
5592 * '[xx]reg0' matches IP address of this router port, then
5593 * the packet is intended to eventually be sent to this
5594 * logical port. Set the destination mac address using this
5595 * port's mac address.
5597 * The packet is still in peer's logical pipeline. So the match
5598 * should be on peer's outport. */
5599 if (op
->peer
&& op
->nbrp
->peer
) {
5600 if (op
->lrp_networks
.n_ipv4_addrs
) {
5602 ds_put_format(&match
, "outport == %s && reg0 == ",
5603 op
->peer
->json_key
);
5604 op_put_v4_networks(&match
, op
, false);
5607 ds_put_format(&actions
, "eth.dst = %s; next;",
5608 op
->lrp_networks
.ea_s
);
5609 ovn_lflow_add(lflows
, op
->peer
->od
, S_ROUTER_IN_ARP_RESOLVE
,
5610 100, ds_cstr(&match
), ds_cstr(&actions
));
5613 if (op
->lrp_networks
.n_ipv6_addrs
) {
5615 ds_put_format(&match
, "outport == %s && xxreg0 == ",
5616 op
->peer
->json_key
);
5617 op_put_v6_networks(&match
, op
);
5620 ds_put_format(&actions
, "eth.dst = %s; next;",
5621 op
->lrp_networks
.ea_s
);
5622 ovn_lflow_add(lflows
, op
->peer
->od
, S_ROUTER_IN_ARP_RESOLVE
,
5623 100, ds_cstr(&match
), ds_cstr(&actions
));
5626 } else if (op
->od
->n_router_ports
&& strcmp(op
->nbsp
->type
, "router")) {
5627 /* This is a logical switch port that backs a VM or a container.
5628 * Extract its addresses. For each of the address, go through all
5629 * the router ports attached to the switch (to which this port
5630 * connects) and if the address in question is reachable from the
5631 * router port, add an ARP/ND entry in that router's pipeline. */
5633 for (size_t i
= 0; i
< op
->n_lsp_addrs
; i
++) {
5634 const char *ea_s
= op
->lsp_addrs
[i
].ea_s
;
5635 for (size_t j
= 0; j
< op
->lsp_addrs
[i
].n_ipv4_addrs
; j
++) {
5636 const char *ip_s
= op
->lsp_addrs
[i
].ipv4_addrs
[j
].addr_s
;
5637 for (size_t k
= 0; k
< op
->od
->n_router_ports
; k
++) {
5638 /* Get the Logical_Router_Port that the
5639 * Logical_Switch_Port is connected to, as
5641 const char *peer_name
= smap_get(
5642 &op
->od
->router_ports
[k
]->nbsp
->options
,
5648 struct ovn_port
*peer
= ovn_port_find(ports
, peer_name
);
5649 if (!peer
|| !peer
->nbrp
) {
5653 if (!find_lrp_member_ip(peer
, ip_s
)) {
5658 ds_put_format(&match
, "outport == %s && reg0 == %s",
5659 peer
->json_key
, ip_s
);
5662 ds_put_format(&actions
, "eth.dst = %s; next;", ea_s
);
5663 ovn_lflow_add(lflows
, peer
->od
,
5664 S_ROUTER_IN_ARP_RESOLVE
, 100,
5665 ds_cstr(&match
), ds_cstr(&actions
));
5669 for (size_t j
= 0; j
< op
->lsp_addrs
[i
].n_ipv6_addrs
; j
++) {
5670 const char *ip_s
= op
->lsp_addrs
[i
].ipv6_addrs
[j
].addr_s
;
5671 for (size_t k
= 0; k
< op
->od
->n_router_ports
; k
++) {
5672 /* Get the Logical_Router_Port that the
5673 * Logical_Switch_Port is connected to, as
5675 const char *peer_name
= smap_get(
5676 &op
->od
->router_ports
[k
]->nbsp
->options
,
5682 struct ovn_port
*peer
= ovn_port_find(ports
, peer_name
);
5683 if (!peer
|| !peer
->nbrp
) {
5687 if (!find_lrp_member_ip(peer
, ip_s
)) {
5692 ds_put_format(&match
, "outport == %s && xxreg0 == %s",
5693 peer
->json_key
, ip_s
);
5696 ds_put_format(&actions
, "eth.dst = %s; next;", ea_s
);
5697 ovn_lflow_add(lflows
, peer
->od
,
5698 S_ROUTER_IN_ARP_RESOLVE
, 100,
5699 ds_cstr(&match
), ds_cstr(&actions
));
5703 } else if (!strcmp(op
->nbsp
->type
, "router")) {
5704 /* This is a logical switch port that connects to a router. */
5706 /* The peer of this switch port is the router port for which
5707 * we need to add logical flows such that it can resolve
5708 * ARP entries for all the other router ports connected to
5709 * the switch in question. */
5711 const char *peer_name
= smap_get(&op
->nbsp
->options
,
5717 struct ovn_port
*peer
= ovn_port_find(ports
, peer_name
);
5718 if (!peer
|| !peer
->nbrp
) {
5722 for (size_t i
= 0; i
< op
->od
->n_router_ports
; i
++) {
5723 const char *router_port_name
= smap_get(
5724 &op
->od
->router_ports
[i
]->nbsp
->options
,
5726 struct ovn_port
*router_port
= ovn_port_find(ports
,
5728 if (!router_port
|| !router_port
->nbrp
) {
5732 /* Skip the router port under consideration. */
5733 if (router_port
== peer
) {
5737 if (router_port
->lrp_networks
.n_ipv4_addrs
) {
5739 ds_put_format(&match
, "outport == %s && reg0 == ",
5741 op_put_v4_networks(&match
, router_port
, false);
5744 ds_put_format(&actions
, "eth.dst = %s; next;",
5745 router_port
->lrp_networks
.ea_s
);
5746 ovn_lflow_add(lflows
, peer
->od
, S_ROUTER_IN_ARP_RESOLVE
,
5747 100, ds_cstr(&match
), ds_cstr(&actions
));
5750 if (router_port
->lrp_networks
.n_ipv6_addrs
) {
5752 ds_put_format(&match
, "outport == %s && xxreg0 == ",
5754 op_put_v6_networks(&match
, router_port
);
5757 ds_put_format(&actions
, "eth.dst = %s; next;",
5758 router_port
->lrp_networks
.ea_s
);
5759 ovn_lflow_add(lflows
, peer
->od
, S_ROUTER_IN_ARP_RESOLVE
,
5760 100, ds_cstr(&match
), ds_cstr(&actions
));
5766 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
5771 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_ARP_RESOLVE
, 0, "ip4",
5772 "get_arp(outport, reg0); next;");
5774 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_ARP_RESOLVE
, 0, "ip6",
5775 "get_nd(outport, xxreg0); next;");
5778 /* Logical router ingress table 9: Gateway redirect.
5780 * For traffic with outport equal to the l3dgw_port
5781 * on a distributed router, this table redirects a subset
5782 * of the traffic to the l3redirect_port which represents
5783 * the central instance of the l3dgw_port.
5785 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
5789 if (od
->l3dgw_port
&& od
->l3redirect_port
) {
5790 /* For traffic with outport == l3dgw_port, if the
5791 * packet did not match any higher priority redirect
5792 * rule, then the traffic is redirected to the central
5793 * instance of the l3dgw_port. */
5795 ds_put_format(&match
, "outport == %s",
5796 od
->l3dgw_port
->json_key
);
5798 ds_put_format(&actions
, "outport = %s; next;",
5799 od
->l3redirect_port
->json_key
);
5800 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_GW_REDIRECT
, 50,
5801 ds_cstr(&match
), ds_cstr(&actions
));
5803 /* If the Ethernet destination has not been resolved,
5804 * redirect to the central instance of the l3dgw_port.
5805 * Such traffic will be replaced by an ARP request or ND
5806 * Neighbor Solicitation in the ARP request ingress
5807 * table, before being redirected to the central instance.
5809 ds_put_format(&match
, " && eth.dst == 00:00:00:00:00:00");
5810 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_GW_REDIRECT
, 150,
5811 ds_cstr(&match
), ds_cstr(&actions
));
5814 /* Packets are allowed by default. */
5815 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_GW_REDIRECT
, 0, "1", "next;");
5818 /* Local router ingress table 10: ARP request.
5820 * In the common case where the Ethernet destination has been resolved,
5821 * this table outputs the packet (priority 0). Otherwise, it composes
5822 * and sends an ARP/IPv6 NA request (priority 100). */
5823 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
5828 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_ARP_REQUEST
, 100,
5829 "eth.dst == 00:00:00:00:00:00",
5831 "eth.dst = ff:ff:ff:ff:ff:ff; "
5834 "arp.op = 1; " /* ARP request */
5837 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_ARP_REQUEST
, 100,
5838 "eth.dst == 00:00:00:00:00:00",
5840 "nd.target = xxreg0; "
5843 ovn_lflow_add(lflows
, od
, S_ROUTER_IN_ARP_REQUEST
, 0, "1", "output;");
5846 /* Logical router egress table 1: Delivery (priority 100).
5848 * Priority 100 rules deliver packets to enabled logical ports. */
5849 HMAP_FOR_EACH (op
, key_node
, ports
) {
5854 if (!lrport_is_enabled(op
->nbrp
)) {
5855 /* Drop packets to disabled logical ports (since logical flow
5856 * tables are default-drop). */
5861 /* No egress packets should be processed in the context of
5862 * a chassisredirect port. The chassisredirect port should
5863 * be replaced by the l3dgw port in the local output
5864 * pipeline stage before egress processing. */
5869 ds_put_format(&match
, "outport == %s", op
->json_key
);
5870 ovn_lflow_add(lflows
, op
->od
, S_ROUTER_OUT_DELIVERY
, 100,
5871 ds_cstr(&match
), "output;");
5875 ds_destroy(&actions
);
5878 /* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
5879 * constructing their contents based on the OVN_NB database. */
5881 build_lflows(struct northd_context
*ctx
, struct hmap
*datapaths
,
5884 struct hmap lflows
= HMAP_INITIALIZER(&lflows
);
5885 struct hmap mcgroups
= HMAP_INITIALIZER(&mcgroups
);
5887 build_lswitch_flows(datapaths
, ports
, &lflows
, &mcgroups
);
5888 build_lrouter_flows(datapaths
, ports
, &lflows
);
5890 /* Push changes to the Logical_Flow table to database. */
5891 const struct sbrec_logical_flow
*sbflow
, *next_sbflow
;
5892 SBREC_LOGICAL_FLOW_FOR_EACH_SAFE (sbflow
, next_sbflow
, ctx
->ovnsb_idl
) {
5893 struct ovn_datapath
*od
5894 = ovn_datapath_from_sbrec(datapaths
, sbflow
->logical_datapath
);
5896 sbrec_logical_flow_delete(sbflow
);
5900 enum ovn_datapath_type dp_type
= od
->nbs
? DP_SWITCH
: DP_ROUTER
;
5901 enum ovn_pipeline pipeline
5902 = !strcmp(sbflow
->pipeline
, "ingress") ? P_IN
: P_OUT
;
5903 struct ovn_lflow
*lflow
= ovn_lflow_find(
5904 &lflows
, od
, ovn_stage_build(dp_type
, pipeline
, sbflow
->table_id
),
5905 sbflow
->priority
, sbflow
->match
, sbflow
->actions
);
5907 ovn_lflow_destroy(&lflows
, lflow
);
5909 sbrec_logical_flow_delete(sbflow
);
5912 struct ovn_lflow
*lflow
, *next_lflow
;
5913 HMAP_FOR_EACH_SAFE (lflow
, next_lflow
, hmap_node
, &lflows
) {
5914 enum ovn_pipeline pipeline
= ovn_stage_get_pipeline(lflow
->stage
);
5915 uint8_t table
= ovn_stage_get_table(lflow
->stage
);
5917 sbflow
= sbrec_logical_flow_insert(ctx
->ovnsb_txn
);
5918 sbrec_logical_flow_set_logical_datapath(sbflow
, lflow
->od
->sb
);
5919 sbrec_logical_flow_set_pipeline(
5920 sbflow
, pipeline
== P_IN
? "ingress" : "egress");
5921 sbrec_logical_flow_set_table_id(sbflow
, table
);
5922 sbrec_logical_flow_set_priority(sbflow
, lflow
->priority
);
5923 sbrec_logical_flow_set_match(sbflow
, lflow
->match
);
5924 sbrec_logical_flow_set_actions(sbflow
, lflow
->actions
);
5926 /* Trim the source locator lflow->where, which looks something like
5927 * "ovn/northd/ovn-northd.c:1234", down to just the part following the
5928 * last slash, e.g. "ovn-northd.c:1234". */
5929 const char *slash
= strrchr(lflow
->where
, '/');
5931 const char *backslash
= strrchr(lflow
->where
, '\\');
5932 if (!slash
|| backslash
> slash
) {
5936 const char *where
= slash
? slash
+ 1 : lflow
->where
;
5938 struct smap ids
= SMAP_INITIALIZER(&ids
);
5939 smap_add(&ids
, "stage-name", ovn_stage_to_str(lflow
->stage
));
5940 smap_add(&ids
, "source", where
);
5941 if (lflow
->stage_hint
) {
5942 smap_add(&ids
, "stage-hint", lflow
->stage_hint
);
5944 sbrec_logical_flow_set_external_ids(sbflow
, &ids
);
5947 ovn_lflow_destroy(&lflows
, lflow
);
5949 hmap_destroy(&lflows
);
5951 /* Push changes to the Multicast_Group table to database. */
5952 const struct sbrec_multicast_group
*sbmc
, *next_sbmc
;
5953 SBREC_MULTICAST_GROUP_FOR_EACH_SAFE (sbmc
, next_sbmc
, ctx
->ovnsb_idl
) {
5954 struct ovn_datapath
*od
= ovn_datapath_from_sbrec(datapaths
,
5957 sbrec_multicast_group_delete(sbmc
);
5961 struct multicast_group group
= { .name
= sbmc
->name
,
5962 .key
= sbmc
->tunnel_key
};
5963 struct ovn_multicast
*mc
= ovn_multicast_find(&mcgroups
, od
, &group
);
5965 ovn_multicast_update_sbrec(mc
, sbmc
);
5966 ovn_multicast_destroy(&mcgroups
, mc
);
5968 sbrec_multicast_group_delete(sbmc
);
5971 struct ovn_multicast
*mc
, *next_mc
;
5972 HMAP_FOR_EACH_SAFE (mc
, next_mc
, hmap_node
, &mcgroups
) {
5973 sbmc
= sbrec_multicast_group_insert(ctx
->ovnsb_txn
);
5974 sbrec_multicast_group_set_datapath(sbmc
, mc
->datapath
->sb
);
5975 sbrec_multicast_group_set_name(sbmc
, mc
->group
->name
);
5976 sbrec_multicast_group_set_tunnel_key(sbmc
, mc
->group
->key
);
5977 ovn_multicast_update_sbrec(mc
, sbmc
);
5978 ovn_multicast_destroy(&mcgroups
, mc
);
5980 hmap_destroy(&mcgroups
);
5983 /* OVN_Northbound and OVN_Southbound have an identical Address_Set table.
5984 * We always update OVN_Southbound to match the current data in
5985 * OVN_Northbound, so that the address sets used in Logical_Flows in
5986 * OVN_Southbound is checked against the proper set.*/
5988 sync_address_sets(struct northd_context
*ctx
)
5990 struct shash sb_address_sets
= SHASH_INITIALIZER(&sb_address_sets
);
5992 const struct sbrec_address_set
*sb_address_set
;
5993 SBREC_ADDRESS_SET_FOR_EACH (sb_address_set
, ctx
->ovnsb_idl
) {
5994 shash_add(&sb_address_sets
, sb_address_set
->name
, sb_address_set
);
5997 const struct nbrec_address_set
*nb_address_set
;
5998 NBREC_ADDRESS_SET_FOR_EACH (nb_address_set
, ctx
->ovnnb_idl
) {
5999 sb_address_set
= shash_find_and_delete(&sb_address_sets
,
6000 nb_address_set
->name
);
6001 if (!sb_address_set
) {
6002 sb_address_set
= sbrec_address_set_insert(ctx
->ovnsb_txn
);
6003 sbrec_address_set_set_name(sb_address_set
, nb_address_set
->name
);
6006 sbrec_address_set_set_addresses(sb_address_set
,
6007 /* "char **" is not compatible with "const char **" */
6008 (const char **) nb_address_set
->addresses
,
6009 nb_address_set
->n_addresses
);
6012 struct shash_node
*node
, *next
;
6013 SHASH_FOR_EACH_SAFE (node
, next
, &sb_address_sets
) {
6014 sbrec_address_set_delete(node
->data
);
6015 shash_delete(&sb_address_sets
, node
);
6017 shash_destroy(&sb_address_sets
);
6021 * struct 'dns_info' is used to sync the DNS records between OVN Northbound db
6022 * and Southbound db.
6025 struct hmap_node hmap_node
;
6026 const struct nbrec_dns
*nb_dns
; /* DNS record in the Northbound db. */
6027 const struct sbrec_dns
*sb_dns
; /* DNS record in the Soutbound db. */
6029 /* Datapaths to which the DNS entry is associated with it. */
6030 const struct sbrec_datapath_binding
**sbs
;
6034 static inline struct dns_info
*
6035 get_dns_info_from_hmap(struct hmap
*dns_map
, struct uuid
*uuid
)
6037 struct dns_info
*dns_info
;
6038 size_t hash
= uuid_hash(uuid
);
6039 HMAP_FOR_EACH_WITH_HASH (dns_info
, hmap_node
, hash
, dns_map
) {
6040 if (uuid_equals(&dns_info
->nb_dns
->header_
.uuid
, uuid
)) {
6049 sync_dns_entries(struct northd_context
*ctx
, struct hmap
*datapaths
)
6051 struct hmap dns_map
= HMAP_INITIALIZER(&dns_map
);
6052 struct ovn_datapath
*od
;
6053 HMAP_FOR_EACH (od
, key_node
, datapaths
) {
6054 if (!od
->nbs
|| !od
->nbs
->n_dns_records
) {
6058 for (size_t i
= 0; i
< od
->nbs
->n_dns_records
; i
++) {
6059 struct dns_info
*dns_info
= get_dns_info_from_hmap(
6060 &dns_map
, &od
->nbs
->dns_records
[i
]->header_
.uuid
);
6062 size_t hash
= uuid_hash(
6063 &od
->nbs
->dns_records
[i
]->header_
.uuid
);
6064 dns_info
= xzalloc(sizeof *dns_info
);;
6065 dns_info
->nb_dns
= od
->nbs
->dns_records
[i
];
6066 hmap_insert(&dns_map
, &dns_info
->hmap_node
, hash
);
6070 dns_info
->sbs
= xrealloc(dns_info
->sbs
,
6071 dns_info
->n_sbs
* sizeof *dns_info
->sbs
);
6072 dns_info
->sbs
[dns_info
->n_sbs
- 1] = od
->sb
;
6076 const struct sbrec_dns
*sbrec_dns
, *next
;
6077 SBREC_DNS_FOR_EACH_SAFE (sbrec_dns
, next
, ctx
->ovnsb_idl
) {
6078 const char *nb_dns_uuid
= smap_get(&sbrec_dns
->external_ids
, "dns_id");
6079 struct uuid dns_uuid
;
6080 if (!nb_dns_uuid
|| !uuid_from_string(&dns_uuid
, nb_dns_uuid
)) {
6081 sbrec_dns_delete(sbrec_dns
);
6085 struct dns_info
*dns_info
=
6086 get_dns_info_from_hmap(&dns_map
, &dns_uuid
);
6088 dns_info
->sb_dns
= sbrec_dns
;
6090 sbrec_dns_delete(sbrec_dns
);
6094 struct dns_info
*dns_info
;
6095 HMAP_FOR_EACH_POP (dns_info
, hmap_node
, &dns_map
) {
6096 if (!dns_info
->sb_dns
) {
6097 sbrec_dns
= sbrec_dns_insert(ctx
->ovnsb_txn
);
6098 dns_info
->sb_dns
= sbrec_dns
;
6099 char *dns_id
= xasprintf(
6100 UUID_FMT
, UUID_ARGS(&dns_info
->nb_dns
->header_
.uuid
));
6101 const struct smap external_ids
=
6102 SMAP_CONST1(&external_ids
, "dns_id", dns_id
);
6103 sbrec_dns_set_external_ids(sbrec_dns
, &external_ids
);
6107 /* Set the datapaths and records. If nothing has changed, then
6108 * this will be a no-op.
6110 sbrec_dns_set_datapaths(
6112 (struct sbrec_datapath_binding
**)dns_info
->sbs
,
6114 sbrec_dns_set_records(dns_info
->sb_dns
, &dns_info
->nb_dns
->records
);
6115 free(dns_info
->sbs
);
6118 hmap_destroy(&dns_map
);
6123 ovnnb_db_run(struct northd_context
*ctx
, struct chassis_index
*chassis_index
,
6124 struct ovsdb_idl_loop
*sb_loop
)
6126 if (!ctx
->ovnsb_txn
|| !ctx
->ovnnb_txn
) {
6129 struct hmap datapaths
, ports
;
6130 build_datapaths(ctx
, &datapaths
);
6131 build_ports(ctx
, &datapaths
, chassis_index
, &ports
);
6132 build_ipam(&datapaths
, &ports
);
6133 build_lflows(ctx
, &datapaths
, &ports
);
6135 sync_address_sets(ctx
);
6136 sync_dns_entries(ctx
, &datapaths
);
6138 struct ovn_datapath
*dp
, *next_dp
;
6139 HMAP_FOR_EACH_SAFE (dp
, next_dp
, key_node
, &datapaths
) {
6140 ovn_datapath_destroy(&datapaths
, dp
);
6142 hmap_destroy(&datapaths
);
6144 struct ovn_port
*port
, *next_port
;
6145 HMAP_FOR_EACH_SAFE (port
, next_port
, key_node
, &ports
) {
6146 ovn_port_destroy(&ports
, port
);
6148 hmap_destroy(&ports
);
6150 /* Copy nb_cfg from northbound to southbound database.
6152 * Also set up to update sb_cfg once our southbound transaction commits. */
6153 const struct nbrec_nb_global
*nb
= nbrec_nb_global_first(ctx
->ovnnb_idl
);
6155 nb
= nbrec_nb_global_insert(ctx
->ovnnb_txn
);
6157 const struct sbrec_sb_global
*sb
= sbrec_sb_global_first(ctx
->ovnsb_idl
);
6159 sb
= sbrec_sb_global_insert(ctx
->ovnsb_txn
);
6161 sbrec_sb_global_set_nb_cfg(sb
, nb
->nb_cfg
);
6162 sb_loop
->next_cfg
= nb
->nb_cfg
;
6164 cleanup_macam(&macam
);
6167 /* Handle changes to the 'chassis' column of the 'Port_Binding' table. When
6168 * this column is not empty, it means we need to set the corresponding logical
6169 * port as 'up' in the northbound DB. */
6171 update_logical_port_status(struct northd_context
*ctx
)
6173 struct hmap lports_hmap
;
6174 const struct sbrec_port_binding
*sb
;
6175 const struct nbrec_logical_switch_port
*nbsp
;
6177 struct lport_hash_node
{
6178 struct hmap_node node
;
6179 const struct nbrec_logical_switch_port
*nbsp
;
6182 hmap_init(&lports_hmap
);
6184 NBREC_LOGICAL_SWITCH_PORT_FOR_EACH(nbsp
, ctx
->ovnnb_idl
) {
6185 hash_node
= xzalloc(sizeof *hash_node
);
6186 hash_node
->nbsp
= nbsp
;
6187 hmap_insert(&lports_hmap
, &hash_node
->node
, hash_string(nbsp
->name
, 0));
6190 SBREC_PORT_BINDING_FOR_EACH(sb
, ctx
->ovnsb_idl
) {
6192 HMAP_FOR_EACH_WITH_HASH(hash_node
, node
,
6193 hash_string(sb
->logical_port
, 0),
6195 if (!strcmp(sb
->logical_port
, hash_node
->nbsp
->name
)) {
6196 nbsp
= hash_node
->nbsp
;
6202 /* The logical port doesn't exist for this port binding. This can
6203 * happen under normal circumstances when ovn-northd hasn't gotten
6204 * around to pruning the Port_Binding yet. */
6208 bool up
= (sb
->chassis
|| !strcmp(nbsp
->type
, "router"));
6209 if (!nbsp
->up
|| *nbsp
->up
!= up
) {
6210 nbrec_logical_switch_port_set_up(nbsp
, &up
, 1);
6214 HMAP_FOR_EACH_POP(hash_node
, node
, &lports_hmap
) {
6217 hmap_destroy(&lports_hmap
);
6220 static struct gen_opts_map supported_dhcp_opts
[] = {
6224 DHCP_OPT_DNS_SERVER
,
6225 DHCP_OPT_LOG_SERVER
,
6226 DHCP_OPT_LPR_SERVER
,
6227 DHCP_OPT_SWAP_SERVER
,
6228 DHCP_OPT_POLICY_FILTER
,
6229 DHCP_OPT_ROUTER_SOLICITATION
,
6230 DHCP_OPT_NIS_SERVER
,
6231 DHCP_OPT_NTP_SERVER
,
6233 DHCP_OPT_TFTP_SERVER
,
6234 DHCP_OPT_CLASSLESS_STATIC_ROUTE
,
6235 DHCP_OPT_MS_CLASSLESS_STATIC_ROUTE
,
6236 DHCP_OPT_IP_FORWARD_ENABLE
,
6237 DHCP_OPT_ROUTER_DISCOVERY
,
6238 DHCP_OPT_ETHERNET_ENCAP
,
6239 DHCP_OPT_DEFAULT_TTL
,
6242 DHCP_OPT_LEASE_TIME
,
6247 static struct gen_opts_map supported_dhcpv6_opts
[] = {
6249 DHCPV6_OPT_SERVER_ID
,
6250 DHCPV6_OPT_DOMAIN_SEARCH
,
6251 DHCPV6_OPT_DNS_SERVER
6255 check_and_add_supported_dhcp_opts_to_sb_db(struct northd_context
*ctx
)
6257 struct hmap dhcp_opts_to_add
= HMAP_INITIALIZER(&dhcp_opts_to_add
);
6258 for (size_t i
= 0; (i
< sizeof(supported_dhcp_opts
) /
6259 sizeof(supported_dhcp_opts
[0])); i
++) {
6260 hmap_insert(&dhcp_opts_to_add
, &supported_dhcp_opts
[i
].hmap_node
,
6261 dhcp_opt_hash(supported_dhcp_opts
[i
].name
));
6264 const struct sbrec_dhcp_options
*opt_row
, *opt_row_next
;
6265 SBREC_DHCP_OPTIONS_FOR_EACH_SAFE(opt_row
, opt_row_next
, ctx
->ovnsb_idl
) {
6266 struct gen_opts_map
*dhcp_opt
=
6267 dhcp_opts_find(&dhcp_opts_to_add
, opt_row
->name
);
6269 hmap_remove(&dhcp_opts_to_add
, &dhcp_opt
->hmap_node
);
6271 sbrec_dhcp_options_delete(opt_row
);
6275 struct gen_opts_map
*opt
;
6276 HMAP_FOR_EACH (opt
, hmap_node
, &dhcp_opts_to_add
) {
6277 struct sbrec_dhcp_options
*sbrec_dhcp_option
=
6278 sbrec_dhcp_options_insert(ctx
->ovnsb_txn
);
6279 sbrec_dhcp_options_set_name(sbrec_dhcp_option
, opt
->name
);
6280 sbrec_dhcp_options_set_code(sbrec_dhcp_option
, opt
->code
);
6281 sbrec_dhcp_options_set_type(sbrec_dhcp_option
, opt
->type
);
6284 hmap_destroy(&dhcp_opts_to_add
);
6288 check_and_add_supported_dhcpv6_opts_to_sb_db(struct northd_context
*ctx
)
6290 struct hmap dhcpv6_opts_to_add
= HMAP_INITIALIZER(&dhcpv6_opts_to_add
);
6291 for (size_t i
= 0; (i
< sizeof(supported_dhcpv6_opts
) /
6292 sizeof(supported_dhcpv6_opts
[0])); i
++) {
6293 hmap_insert(&dhcpv6_opts_to_add
, &supported_dhcpv6_opts
[i
].hmap_node
,
6294 dhcp_opt_hash(supported_dhcpv6_opts
[i
].name
));
6297 const struct sbrec_dhcpv6_options
*opt_row
, *opt_row_next
;
6298 SBREC_DHCPV6_OPTIONS_FOR_EACH_SAFE(opt_row
, opt_row_next
, ctx
->ovnsb_idl
) {
6299 struct gen_opts_map
*dhcp_opt
=
6300 dhcp_opts_find(&dhcpv6_opts_to_add
, opt_row
->name
);
6302 hmap_remove(&dhcpv6_opts_to_add
, &dhcp_opt
->hmap_node
);
6304 sbrec_dhcpv6_options_delete(opt_row
);
6308 struct gen_opts_map
*opt
;
6309 HMAP_FOR_EACH(opt
, hmap_node
, &dhcpv6_opts_to_add
) {
6310 struct sbrec_dhcpv6_options
*sbrec_dhcpv6_option
=
6311 sbrec_dhcpv6_options_insert(ctx
->ovnsb_txn
);
6312 sbrec_dhcpv6_options_set_name(sbrec_dhcpv6_option
, opt
->name
);
6313 sbrec_dhcpv6_options_set_code(sbrec_dhcpv6_option
, opt
->code
);
6314 sbrec_dhcpv6_options_set_type(sbrec_dhcpv6_option
, opt
->type
);
6317 hmap_destroy(&dhcpv6_opts_to_add
);
6320 static const char *rbac_chassis_auth
[] =
6322 static const char *rbac_chassis_update
[] =
6323 {"nb_cfg", "external_ids", "encaps", "vtep_logical_switches"};
6325 static const char *rbac_encap_auth
[] =
6327 static const char *rbac_encap_update
[] =
6328 {"type", "options", "ip"};
6330 static const char *rbac_port_binding_auth
[] =
6332 static const char *rbac_port_binding_update
[] =
6335 static const char *rbac_mac_binding_auth
[] =
6337 static const char *rbac_mac_binding_update
[] =
6338 {"logical_port", "ip", "mac", "datapath"};
6340 static struct rbac_perm_cfg
{
6345 const char **update
;
6347 const struct sbrec_rbac_permission
*row
;
6348 } rbac_perm_cfg
[] = {
6351 .auth
= rbac_chassis_auth
,
6352 .n_auth
= ARRAY_SIZE(rbac_chassis_auth
),
6354 .update
= rbac_chassis_update
,
6355 .n_update
= ARRAY_SIZE(rbac_chassis_update
),
6359 .auth
= rbac_encap_auth
,
6360 .n_auth
= ARRAY_SIZE(rbac_encap_auth
),
6362 .update
= rbac_encap_update
,
6363 .n_update
= ARRAY_SIZE(rbac_encap_update
),
6366 .table
= "Port_Binding",
6367 .auth
= rbac_port_binding_auth
,
6368 .n_auth
= ARRAY_SIZE(rbac_port_binding_auth
),
6370 .update
= rbac_port_binding_update
,
6371 .n_update
= ARRAY_SIZE(rbac_port_binding_update
),
6374 .table
= "MAC_Binding",
6375 .auth
= rbac_mac_binding_auth
,
6376 .n_auth
= ARRAY_SIZE(rbac_mac_binding_auth
),
6378 .update
= rbac_mac_binding_update
,
6379 .n_update
= ARRAY_SIZE(rbac_mac_binding_update
),
6393 ovn_rbac_validate_perm(const struct sbrec_rbac_permission
*perm
)
6395 struct rbac_perm_cfg
*pcfg
;
6398 for (pcfg
= rbac_perm_cfg
; pcfg
->table
; pcfg
++) {
6399 if (!strcmp(perm
->table
, pcfg
->table
)) {
6406 if (perm
->n_authorization
!= pcfg
->n_auth
||
6407 perm
->n_update
!= pcfg
->n_update
) {
6410 if (perm
->insert_delete
!= pcfg
->insdel
) {
6413 /* verify perm->authorization vs. pcfg->auth */
6415 for (i
= 0; i
< pcfg
->n_auth
; i
++) {
6416 for (j
= 0; j
< perm
->n_authorization
; j
++) {
6417 if (!strcmp(pcfg
->auth
[i
], perm
->authorization
[j
])) {
6423 if (n_found
!= pcfg
->n_auth
) {
6427 /* verify perm->update vs. pcfg->update */
6429 for (i
= 0; i
< pcfg
->n_update
; i
++) {
6430 for (j
= 0; j
< perm
->n_update
; j
++) {
6431 if (!strcmp(pcfg
->update
[i
], perm
->update
[j
])) {
6437 if (n_found
!= pcfg
->n_update
) {
6441 /* Success, db state matches expected state */
6447 ovn_rbac_create_perm(struct rbac_perm_cfg
*pcfg
,
6448 struct northd_context
*ctx
,
6449 const struct sbrec_rbac_role
*rbac_role
)
6451 struct sbrec_rbac_permission
*rbac_perm
;
6453 rbac_perm
= sbrec_rbac_permission_insert(ctx
->ovnsb_txn
);
6454 sbrec_rbac_permission_set_table(rbac_perm
, pcfg
->table
);
6455 sbrec_rbac_permission_set_authorization(rbac_perm
,
6458 sbrec_rbac_permission_set_insert_delete(rbac_perm
, pcfg
->insdel
);
6459 sbrec_rbac_permission_set_update(rbac_perm
,
6462 sbrec_rbac_role_update_permissions_setkey(rbac_role
, pcfg
->table
,
6467 check_and_update_rbac(struct northd_context
*ctx
)
6469 const struct sbrec_rbac_role
*rbac_role
= NULL
;
6470 const struct sbrec_rbac_permission
*perm_row
, *perm_next
;
6471 const struct sbrec_rbac_role
*role_row
, *role_row_next
;
6472 struct rbac_perm_cfg
*pcfg
;
6474 for (pcfg
= rbac_perm_cfg
; pcfg
->table
; pcfg
++) {
6478 SBREC_RBAC_PERMISSION_FOR_EACH_SAFE (perm_row
, perm_next
, ctx
->ovnsb_idl
) {
6479 if (!ovn_rbac_validate_perm(perm_row
)) {
6480 sbrec_rbac_permission_delete(perm_row
);
6483 SBREC_RBAC_ROLE_FOR_EACH_SAFE (role_row
, role_row_next
, ctx
->ovnsb_idl
) {
6484 if (strcmp(role_row
->name
, "ovn-controller")) {
6485 sbrec_rbac_role_delete(role_row
);
6487 rbac_role
= role_row
;
6492 rbac_role
= sbrec_rbac_role_insert(ctx
->ovnsb_txn
);
6493 sbrec_rbac_role_set_name(rbac_role
, "ovn-controller");
6496 for (pcfg
= rbac_perm_cfg
; pcfg
->table
; pcfg
++) {
6498 ovn_rbac_create_perm(pcfg
, ctx
, rbac_role
);
6503 /* Updates the sb_cfg and hv_cfg columns in the northbound NB_Global table. */
6505 update_northbound_cfg(struct northd_context
*ctx
,
6506 struct ovsdb_idl_loop
*sb_loop
)
6508 /* Update northbound sb_cfg if appropriate. */
6509 const struct nbrec_nb_global
*nbg
= nbrec_nb_global_first(ctx
->ovnnb_idl
);
6510 int64_t sb_cfg
= sb_loop
->cur_cfg
;
6511 if (nbg
&& sb_cfg
&& nbg
->sb_cfg
!= sb_cfg
) {
6512 nbrec_nb_global_set_sb_cfg(nbg
, sb_cfg
);
6515 /* Update northbound hv_cfg if appropriate. */
6517 /* Find minimum nb_cfg among all chassis. */
6518 const struct sbrec_chassis
*chassis
;
6519 int64_t hv_cfg
= nbg
->nb_cfg
;
6520 SBREC_CHASSIS_FOR_EACH (chassis
, ctx
->ovnsb_idl
) {
6521 if (chassis
->nb_cfg
< hv_cfg
) {
6522 hv_cfg
= chassis
->nb_cfg
;
6526 /* Update hv_cfg. */
6527 if (nbg
->hv_cfg
!= hv_cfg
) {
6528 nbrec_nb_global_set_hv_cfg(nbg
, hv_cfg
);
6533 /* Handle a fairly small set of changes in the southbound database. */
6535 ovnsb_db_run(struct northd_context
*ctx
, struct ovsdb_idl_loop
*sb_loop
)
6537 if (!ctx
->ovnnb_txn
|| !ovsdb_idl_has_ever_connected(ctx
->ovnsb_idl
)) {
6541 update_logical_port_status(ctx
);
6542 update_northbound_cfg(ctx
, sb_loop
);
6546 parse_options(int argc OVS_UNUSED
, char *argv
[] OVS_UNUSED
)
6549 DAEMON_OPTION_ENUMS
,
6553 static const struct option long_options
[] = {
6554 {"ovnsb-db", required_argument
, NULL
, 'd'},
6555 {"ovnnb-db", required_argument
, NULL
, 'D'},
6556 {"help", no_argument
, NULL
, 'h'},
6557 {"options", no_argument
, NULL
, 'o'},
6558 {"version", no_argument
, NULL
, 'V'},
6559 DAEMON_LONG_OPTIONS
,
6561 STREAM_SSL_LONG_OPTIONS
,
6564 char *short_options
= ovs_cmdl_long_options_to_short_options(long_options
);
6569 c
= getopt_long(argc
, argv
, short_options
, long_options
, NULL
);
6575 DAEMON_OPTION_HANDLERS
;
6576 VLOG_OPTION_HANDLERS
;
6577 STREAM_SSL_OPTION_HANDLERS
;
6592 ovs_cmdl_print_options(long_options
);
6596 ovs_print_version(0, 0);
6605 ovnsb_db
= default_sb_db();
6609 ovnnb_db
= default_nb_db();
6612 free(short_options
);
6616 add_column_noalert(struct ovsdb_idl
*idl
,
6617 const struct ovsdb_idl_column
*column
)
6619 ovsdb_idl_add_column(idl
, column
);
6620 ovsdb_idl_omit_alert(idl
, column
);
6624 main(int argc
, char *argv
[])
6626 int res
= EXIT_SUCCESS
;
6627 struct unixctl_server
*unixctl
;
6631 fatal_ignore_sigpipe();
6632 ovs_cmdl_proctitle_init(argc
, argv
);
6633 set_program_name(argv
[0]);
6634 service_start(&argc
, &argv
);
6635 parse_options(argc
, argv
);
6637 daemonize_start(false);
6639 retval
= unixctl_server_create(NULL
, &unixctl
);
6643 unixctl_command_register("exit", "", 0, 0, ovn_northd_exit
, &exiting
);
6645 daemonize_complete();
6647 /* We want to detect (almost) all changes to the ovn-nb db. */
6648 struct ovsdb_idl_loop ovnnb_idl_loop
= OVSDB_IDL_LOOP_INITIALIZER(
6649 ovsdb_idl_create(ovnnb_db
, &nbrec_idl_class
, true, true));
6650 ovsdb_idl_omit_alert(ovnnb_idl_loop
.idl
, &nbrec_nb_global_col_sb_cfg
);
6651 ovsdb_idl_omit_alert(ovnnb_idl_loop
.idl
, &nbrec_nb_global_col_hv_cfg
);
6653 /* We want to detect only selected changes to the ovn-sb db. */
6654 struct ovsdb_idl_loop ovnsb_idl_loop
= OVSDB_IDL_LOOP_INITIALIZER(
6655 ovsdb_idl_create(ovnsb_db
, &sbrec_idl_class
, false, true));
6657 ovsdb_idl_add_table(ovnsb_idl_loop
.idl
, &sbrec_table_sb_global
);
6658 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_sb_global_col_nb_cfg
);
6660 ovsdb_idl_add_table(ovnsb_idl_loop
.idl
, &sbrec_table_logical_flow
);
6661 add_column_noalert(ovnsb_idl_loop
.idl
,
6662 &sbrec_logical_flow_col_logical_datapath
);
6663 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_logical_flow_col_pipeline
);
6664 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_logical_flow_col_table_id
);
6665 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_logical_flow_col_priority
);
6666 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_logical_flow_col_match
);
6667 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_logical_flow_col_actions
);
6669 ovsdb_idl_add_table(ovnsb_idl_loop
.idl
, &sbrec_table_multicast_group
);
6670 add_column_noalert(ovnsb_idl_loop
.idl
,
6671 &sbrec_multicast_group_col_datapath
);
6672 add_column_noalert(ovnsb_idl_loop
.idl
,
6673 &sbrec_multicast_group_col_tunnel_key
);
6674 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_multicast_group_col_name
);
6675 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_multicast_group_col_ports
);
6677 ovsdb_idl_add_table(ovnsb_idl_loop
.idl
, &sbrec_table_datapath_binding
);
6678 add_column_noalert(ovnsb_idl_loop
.idl
,
6679 &sbrec_datapath_binding_col_tunnel_key
);
6680 add_column_noalert(ovnsb_idl_loop
.idl
,
6681 &sbrec_datapath_binding_col_external_ids
);
6683 ovsdb_idl_add_table(ovnsb_idl_loop
.idl
, &sbrec_table_port_binding
);
6684 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_port_binding_col_datapath
);
6685 add_column_noalert(ovnsb_idl_loop
.idl
,
6686 &sbrec_port_binding_col_logical_port
);
6687 add_column_noalert(ovnsb_idl_loop
.idl
,
6688 &sbrec_port_binding_col_tunnel_key
);
6689 add_column_noalert(ovnsb_idl_loop
.idl
,
6690 &sbrec_port_binding_col_parent_port
);
6691 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_port_binding_col_tag
);
6692 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_port_binding_col_type
);
6693 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_port_binding_col_options
);
6694 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_port_binding_col_mac
);
6695 add_column_noalert(ovnsb_idl_loop
.idl
,
6696 &sbrec_port_binding_col_nat_addresses
);
6697 ovsdb_idl_add_column(ovnsb_idl_loop
.idl
, &sbrec_port_binding_col_chassis
);
6698 ovsdb_idl_add_column(ovnsb_idl_loop
.idl
,
6699 &sbrec_port_binding_col_gateway_chassis
);
6700 ovsdb_idl_add_column(ovnsb_idl_loop
.idl
,
6701 &sbrec_gateway_chassis_col_chassis
);
6702 ovsdb_idl_add_column(ovnsb_idl_loop
.idl
, &sbrec_gateway_chassis_col_name
);
6703 ovsdb_idl_add_column(ovnsb_idl_loop
.idl
,
6704 &sbrec_gateway_chassis_col_priority
);
6705 ovsdb_idl_add_column(ovnsb_idl_loop
.idl
,
6706 &sbrec_gateway_chassis_col_external_ids
);
6707 ovsdb_idl_add_column(ovnsb_idl_loop
.idl
,
6708 &sbrec_gateway_chassis_col_options
);
6709 add_column_noalert(ovnsb_idl_loop
.idl
,
6710 &sbrec_port_binding_col_external_ids
);
6711 ovsdb_idl_add_table(ovnsb_idl_loop
.idl
, &sbrec_table_mac_binding
);
6712 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_mac_binding_col_datapath
);
6713 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_mac_binding_col_ip
);
6714 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_mac_binding_col_mac
);
6715 add_column_noalert(ovnsb_idl_loop
.idl
,
6716 &sbrec_mac_binding_col_logical_port
);
6717 ovsdb_idl_add_table(ovnsb_idl_loop
.idl
, &sbrec_table_dhcp_options
);
6718 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_dhcp_options_col_code
);
6719 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_dhcp_options_col_type
);
6720 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_dhcp_options_col_name
);
6721 ovsdb_idl_add_table(ovnsb_idl_loop
.idl
, &sbrec_table_dhcpv6_options
);
6722 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_dhcpv6_options_col_code
);
6723 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_dhcpv6_options_col_type
);
6724 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_dhcpv6_options_col_name
);
6725 ovsdb_idl_add_table(ovnsb_idl_loop
.idl
, &sbrec_table_address_set
);
6726 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_address_set_col_name
);
6727 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_address_set_col_addresses
);
6729 ovsdb_idl_add_table(ovnsb_idl_loop
.idl
, &sbrec_table_dns
);
6730 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_dns_col_datapaths
);
6731 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_dns_col_records
);
6732 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_dns_col_external_ids
);
6734 ovsdb_idl_add_table(ovnsb_idl_loop
.idl
, &sbrec_table_rbac_role
);
6735 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_rbac_role_col_name
);
6736 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_rbac_role_col_permissions
);
6738 ovsdb_idl_add_table(ovnsb_idl_loop
.idl
, &sbrec_table_rbac_permission
);
6739 add_column_noalert(ovnsb_idl_loop
.idl
,
6740 &sbrec_rbac_permission_col_table
);
6741 add_column_noalert(ovnsb_idl_loop
.idl
,
6742 &sbrec_rbac_permission_col_authorization
);
6743 add_column_noalert(ovnsb_idl_loop
.idl
,
6744 &sbrec_rbac_permission_col_insert_delete
);
6745 add_column_noalert(ovnsb_idl_loop
.idl
, &sbrec_rbac_permission_col_update
);
6747 ovsdb_idl_add_table(ovnsb_idl_loop
.idl
, &sbrec_table_chassis
);
6748 ovsdb_idl_add_column(ovnsb_idl_loop
.idl
, &sbrec_chassis_col_nb_cfg
);
6749 ovsdb_idl_add_column(ovnsb_idl_loop
.idl
, &sbrec_chassis_col_name
);
6751 /* Ensure that only a single ovn-northd is active in the deployment by
6752 * acquiring a lock called "ovn_northd" on the southbound database
6753 * and then only performing DB transactions if the lock is held. */
6754 ovsdb_idl_set_lock(ovnsb_idl_loop
.idl
, "ovn_northd");
6755 bool had_lock
= false;
6760 struct northd_context ctx
= {
6761 .ovnnb_idl
= ovnnb_idl_loop
.idl
,
6762 .ovnnb_txn
= ovsdb_idl_loop_run(&ovnnb_idl_loop
),
6763 .ovnsb_idl
= ovnsb_idl_loop
.idl
,
6764 .ovnsb_txn
= ovsdb_idl_loop_run(&ovnsb_idl_loop
),
6767 if (!had_lock
&& ovsdb_idl_has_lock(ovnsb_idl_loop
.idl
)) {
6768 VLOG_INFO("ovn-northd lock acquired. "
6769 "This ovn-northd instance is now active.");
6771 } else if (had_lock
&& !ovsdb_idl_has_lock(ovnsb_idl_loop
.idl
)) {
6772 VLOG_INFO("ovn-northd lock lost. "
6773 "This ovn-northd instance is now on standby.");
6777 struct chassis_index chassis_index
;
6778 bool destroy_chassis_index
= false;
6779 if (ovsdb_idl_has_lock(ovnsb_idl_loop
.idl
)) {
6780 chassis_index_init(&chassis_index
, ctx
.ovnsb_idl
);
6781 destroy_chassis_index
= true;
6783 ovnnb_db_run(&ctx
, &chassis_index
, &ovnsb_idl_loop
);
6784 ovnsb_db_run(&ctx
, &ovnsb_idl_loop
);
6785 if (ctx
.ovnsb_txn
) {
6786 check_and_add_supported_dhcp_opts_to_sb_db(&ctx
);
6787 check_and_add_supported_dhcpv6_opts_to_sb_db(&ctx
);
6788 check_and_update_rbac(&ctx
);
6792 unixctl_server_run(unixctl
);
6793 unixctl_server_wait(unixctl
);
6795 poll_immediate_wake();
6797 ovsdb_idl_loop_commit_and_wait(&ovnnb_idl_loop
);
6798 ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop
);
6801 if (should_service_stop()) {
6805 if (destroy_chassis_index
) {
6806 chassis_index_destroy(&chassis_index
);
6810 unixctl_server_destroy(unixctl
);
6811 ovsdb_idl_loop_destroy(&ovnnb_idl_loop
);
6812 ovsdb_idl_loop_destroy(&ovnsb_idl_loop
);
6819 ovn_northd_exit(struct unixctl_conn
*conn
, int argc OVS_UNUSED
,
6820 const char *argv
[] OVS_UNUSED
, void *exiting_
)
6822 bool *exiting
= exiting_
;
6825 unixctl_command_reply(conn
, NULL
);