1 /* Copyright (c) 2015, 2016, 2017 Red Hat, Inc.
2 * Copyright (c) 2017 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
24 #include "dp-packet.h"
29 #include "ovn-controller.h"
30 #include "lib/packets.h"
32 #include "openvswitch/ofp-actions.h"
33 #include "openvswitch/ofp-msgs.h"
34 #include "openvswitch/ofp-packet.h"
35 #include "openvswitch/ofp-print.h"
36 #include "openvswitch/ofp-switch.h"
37 #include "openvswitch/ofp-util.h"
38 #include "openvswitch/vlog.h"
41 #include "ovn-controller.h"
42 #include "ovn/actions.h"
44 #include "ovn/lib/acl-log.h"
45 #include "ovn/lib/logical-fields.h"
46 #include "ovn/lib/ovn-l7.h"
47 #include "ovn/lib/ovn-util.h"
48 #include "openvswitch/poll-loop.h"
49 #include "openvswitch/rconn.h"
50 #include "socket-util.h"
52 #include "vswitch-idl.h"
55 VLOG_DEFINE_THIS_MODULE(pinctrl
);
57 /* OpenFlow connection to the switch. */
58 static struct rconn
*swconn
;
60 /* Last seen sequence number for 'swconn'. When this differs from
61 * rconn_get_connection_seqno(rconn), 'swconn' has reconnected. */
62 static unsigned int conn_seq_no
;
64 static void init_buffered_packets_map(void);
65 static void destroy_buffered_packets_map(void);
67 static void pinctrl_handle_put_mac_binding(const struct flow
*md
,
68 const struct flow
*headers
,
70 static void init_put_mac_bindings(void);
71 static void destroy_put_mac_bindings(void);
72 static void run_put_mac_bindings(
73 struct ovsdb_idl_txn
*ovnsb_idl_txn
,
74 struct ovsdb_idl_index
*sbrec_datapath_binding_by_key
,
75 struct ovsdb_idl_index
*sbrec_port_binding_by_key
,
76 struct ovsdb_idl_index
*sbrec_mac_binding_by_lport_ip
);
77 static void wait_put_mac_bindings(struct ovsdb_idl_txn
*ovnsb_idl_txn
);
78 static void flush_put_mac_bindings(void);
80 static void init_send_garps(void);
81 static void destroy_send_garps(void);
82 static void send_garp_wait(void);
83 static void send_garp_run(
84 struct ovsdb_idl_index
*sbrec_chassis_by_name
,
85 struct ovsdb_idl_index
*sbrec_port_binding_by_datapath
,
86 struct ovsdb_idl_index
*sbrec_port_binding_by_name
,
87 const struct ovsrec_bridge
*,
88 const struct sbrec_chassis
*,
89 const struct hmap
*local_datapaths
,
90 const struct sset
*active_tunnels
);
91 static void pinctrl_handle_nd_na(const struct flow
*ip_flow
,
92 const struct match
*md
,
93 struct ofpbuf
*userdata
,
95 static void reload_metadata(struct ofpbuf
*ofpacts
,
96 const struct match
*md
);
97 static void pinctrl_handle_put_nd_ra_opts(
98 const struct flow
*ip_flow
, struct dp_packet
*pkt_in
,
99 struct ofputil_packet_in
*pin
, struct ofpbuf
*userdata
,
100 struct ofpbuf
*continuation
);
101 static void pinctrl_handle_nd_ns(const struct flow
*ip_flow
,
102 struct dp_packet
*pkt_in
,
103 const struct match
*md
,
104 struct ofpbuf
*userdata
);
105 static void init_ipv6_ras(void);
106 static void destroy_ipv6_ras(void);
107 static void ipv6_ra_wait(void);
108 static void send_ipv6_ras(
109 struct ovsdb_idl_index
*sbrec_port_binding_by_datapath
,
110 struct ovsdb_idl_index
*sbrec_port_binding_by_name
,
111 const struct hmap
*local_datapaths
);
114 COVERAGE_DEFINE(pinctrl_drop_put_mac_binding
);
115 COVERAGE_DEFINE(pinctrl_drop_buffered_packets_map
);
120 swconn
= rconn_create(5, 0, DSCP_DEFAULT
, 1 << OFP13_VERSION
);
122 init_put_mac_bindings();
125 init_buffered_packets_map();
129 queue_msg(struct ofpbuf
*msg
)
131 const struct ofp_header
*oh
= msg
->data
;
132 ovs_be32 xid
= oh
->xid
;
134 rconn_send(swconn
, msg
, NULL
);
138 /* Sets up global 'swconn', a newly (re)connected connection to a switch. */
142 /* Fetch the switch configuration. The response later will allow us to
143 * change the miss_send_len to UINT16_MAX, so that we can enable
144 * asynchronous messages. */
145 queue_msg(ofpraw_alloc(OFPRAW_OFPT_GET_CONFIG_REQUEST
,
146 rconn_get_version(swconn
), 0));
148 /* Set a packet-in format that supports userdata. */
149 queue_msg(ofputil_encode_set_packet_in_format(rconn_get_version(swconn
),
150 OFPUTIL_PACKET_IN_NXT2
));
154 set_switch_config(struct rconn
*swconn_
,
155 const struct ofputil_switch_config
*config
)
157 enum ofp_version version
= rconn_get_version(swconn_
);
158 struct ofpbuf
*request
= ofputil_encode_set_config(config
, version
);
163 set_actions_and_enqueue_msg(const struct dp_packet
*packet
,
164 const struct match
*md
,
165 struct ofpbuf
*userdata
)
167 /* Copy metadata from 'md' into the packet-out via "set_field"
168 * actions, then add actions from 'userdata'.
170 uint64_t ofpacts_stub
[4096 / 8];
171 struct ofpbuf ofpacts
= OFPBUF_STUB_INITIALIZER(ofpacts_stub
);
172 enum ofp_version version
= rconn_get_version(swconn
);
174 reload_metadata(&ofpacts
, md
);
175 enum ofperr error
= ofpacts_pull_openflow_actions(userdata
, userdata
->size
,
179 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
180 VLOG_WARN_RL(&rl
, "failed to parse actions from userdata (%s)",
181 ofperr_to_string(error
));
182 ofpbuf_uninit(&ofpacts
);
186 struct ofputil_packet_out po
= {
187 .packet
= dp_packet_data(packet
),
188 .packet_len
= dp_packet_size(packet
),
189 .buffer_id
= UINT32_MAX
,
190 .ofpacts
= ofpacts
.data
,
191 .ofpacts_len
= ofpacts
.size
,
193 match_set_in_port(&po
.flow_metadata
, OFPP_CONTROLLER
);
194 enum ofputil_protocol proto
= ofputil_protocol_from_ofp_version(version
);
195 queue_msg(ofputil_encode_packet_out(&po
, proto
));
196 ofpbuf_uninit(&ofpacts
);
200 struct ofpbuf ofpacts
;
204 #define BUFFER_QUEUE_DEPTH 4
205 struct buffered_packets
{
206 struct hmap_node hmap_node
;
211 long long int timestamp
;
213 struct buffer_info data
[BUFFER_QUEUE_DEPTH
];
217 static struct hmap buffered_packets_map
;
220 init_buffered_packets_map(void)
222 hmap_init(&buffered_packets_map
);
226 destroy_buffered_packets(struct buffered_packets
*bp
)
228 struct buffer_info
*bi
;
230 while (bp
->head
!= bp
->tail
) {
231 bi
= &bp
->data
[bp
->head
];
232 dp_packet_delete(bi
->p
);
233 ofpbuf_uninit(&bi
->ofpacts
);
235 bp
->head
= (bp
->head
+ 1) % BUFFER_QUEUE_DEPTH
;
237 hmap_remove(&buffered_packets_map
, &bp
->hmap_node
);
242 destroy_buffered_packets_map(void)
244 struct buffered_packets
*bp
, *next
;
245 HMAP_FOR_EACH_SAFE (bp
, next
, hmap_node
, &buffered_packets_map
) {
246 destroy_buffered_packets(bp
);
248 hmap_destroy(&buffered_packets_map
);
252 buffered_push_packet(struct buffered_packets
*bp
,
253 struct dp_packet
*packet
,
254 const struct match
*md
)
256 uint32_t next
= (bp
->tail
+ 1) % BUFFER_QUEUE_DEPTH
;
257 struct buffer_info
*bi
= &bp
->data
[bp
->tail
];
259 ofpbuf_init(&bi
->ofpacts
, 4096);
261 reload_metadata(&bi
->ofpacts
, md
);
262 struct ofpact_resubmit
*resubmit
= ofpact_put_RESUBMIT(&bi
->ofpacts
);
263 resubmit
->in_port
= OFPP_CONTROLLER
;
264 resubmit
->table_id
= OFTABLE_REMOTE_OUTPUT
;
268 if (next
== bp
->head
) {
269 bi
= &bp
->data
[bp
->head
];
270 dp_packet_delete(bi
->p
);
271 ofpbuf_uninit(&bi
->ofpacts
);
272 bp
->head
= (bp
->head
+ 1) % BUFFER_QUEUE_DEPTH
;
278 buffered_send_packets(struct buffered_packets
*bp
, struct eth_addr
*addr
)
280 enum ofp_version version
= rconn_get_version(swconn
);
281 enum ofputil_protocol proto
= ofputil_protocol_from_ofp_version(version
);
283 while (bp
->head
!= bp
->tail
) {
284 struct buffer_info
*bi
= &bp
->data
[bp
->head
];
285 struct eth_header
*eth
= dp_packet_data(bi
->p
);
287 eth
->eth_dst
= *addr
;
288 struct ofputil_packet_out po
= {
289 .packet
= dp_packet_data(bi
->p
),
290 .packet_len
= dp_packet_size(bi
->p
),
291 .buffer_id
= UINT32_MAX
,
292 .ofpacts
= bi
->ofpacts
.data
,
293 .ofpacts_len
= bi
->ofpacts
.size
,
295 match_set_in_port(&po
.flow_metadata
, OFPP_CONTROLLER
);
296 queue_msg(ofputil_encode_packet_out(&po
, proto
));
298 ofpbuf_uninit(&bi
->ofpacts
);
299 dp_packet_delete(bi
->p
);
301 bp
->head
= (bp
->head
+ 1) % BUFFER_QUEUE_DEPTH
;
305 #define BUFFER_MAP_TIMEOUT 10000
307 buffered_packets_map_gc(void)
309 struct buffered_packets
*cur_qp
, *next_qp
;
310 long long int now
= time_msec();
312 HMAP_FOR_EACH_SAFE (cur_qp
, next_qp
, hmap_node
, &buffered_packets_map
) {
313 if (now
> cur_qp
->timestamp
+ BUFFER_MAP_TIMEOUT
) {
314 destroy_buffered_packets(cur_qp
);
319 static struct buffered_packets
*
320 pinctrl_find_buffered_packets(const struct in6_addr
*ip
, uint32_t hash
)
322 struct buffered_packets
*qp
;
324 HMAP_FOR_EACH_WITH_HASH (qp
, hmap_node
, hash
,
325 &buffered_packets_map
) {
326 if (IN6_ARE_ADDR_EQUAL(&qp
->ip
, ip
)) {
334 pinctrl_handle_buffered_packets(const struct flow
*ip_flow
,
335 struct dp_packet
*pkt_in
,
336 const struct match
*md
, bool is_arp
)
338 struct buffered_packets
*bp
;
339 struct dp_packet
*clone
;
340 struct in6_addr addr
;
343 addr
= in6_addr_mapped_ipv4(ip_flow
->nw_dst
);
345 addr
= ip_flow
->ipv6_dst
;
348 uint32_t hash
= hash_bytes(&addr
, sizeof addr
, 0);
349 bp
= pinctrl_find_buffered_packets(&addr
, hash
);
351 if (hmap_count(&buffered_packets_map
) >= 1000) {
352 COVERAGE_INC(pinctrl_drop_buffered_packets_map
);
356 bp
= xmalloc(sizeof *bp
);
357 hmap_insert(&buffered_packets_map
, &bp
->hmap_node
, hash
);
358 bp
->head
= bp
->tail
= 0;
361 bp
->timestamp
= time_msec();
362 /* clone the packet to send it later with correct L2 address */
363 clone
= dp_packet_clone_data(dp_packet_data(pkt_in
),
364 dp_packet_size(pkt_in
));
365 buffered_push_packet(bp
, clone
, md
);
371 pinctrl_handle_arp(const struct flow
*ip_flow
, struct dp_packet
*pkt_in
,
372 const struct match
*md
, struct ofpbuf
*userdata
)
374 /* This action only works for IP packets, and the switch should only send
375 * us IP packets this way, but check here just to be sure. */
376 if (ip_flow
->dl_type
!= htons(ETH_TYPE_IP
)) {
377 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
378 VLOG_WARN_RL(&rl
, "ARP action on non-IP packet (Ethertype %"PRIx16
")",
379 ntohs(ip_flow
->dl_type
));
383 pinctrl_handle_buffered_packets(ip_flow
, pkt_in
, md
, true);
385 /* Compose an ARP packet. */
386 uint64_t packet_stub
[128 / 8];
387 struct dp_packet packet
;
388 dp_packet_use_stub(&packet
, packet_stub
, sizeof packet_stub
);
389 compose_arp__(&packet
);
391 struct eth_header
*eth
= dp_packet_eth(&packet
);
392 eth
->eth_dst
= ip_flow
->dl_dst
;
393 eth
->eth_src
= ip_flow
->dl_src
;
395 struct arp_eth_header
*arp
= dp_packet_l3(&packet
);
396 arp
->ar_op
= htons(ARP_OP_REQUEST
);
397 arp
->ar_sha
= ip_flow
->dl_src
;
398 put_16aligned_be32(&arp
->ar_spa
, ip_flow
->nw_src
);
399 arp
->ar_tha
= eth_addr_zero
;
400 put_16aligned_be32(&arp
->ar_tpa
, ip_flow
->nw_dst
);
402 if (ip_flow
->vlans
[0].tci
& htons(VLAN_CFI
)) {
403 eth_push_vlan(&packet
, htons(ETH_TYPE_VLAN_8021Q
),
404 ip_flow
->vlans
[0].tci
);
407 set_actions_and_enqueue_msg(&packet
, md
, userdata
);
408 dp_packet_uninit(&packet
);
412 pinctrl_handle_icmp(const struct flow
*ip_flow
, struct dp_packet
*pkt_in
,
413 const struct match
*md
, struct ofpbuf
*userdata
)
415 /* This action only works for IP packets, and the switch should only send
416 * us IP packets this way, but check here just to be sure. */
417 if (ip_flow
->dl_type
!= htons(ETH_TYPE_IP
) &&
418 ip_flow
->dl_type
!= htons(ETH_TYPE_IPV6
)) {
419 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
421 "ICMP action on non-IP packet (eth_type 0x%"PRIx16
")",
422 ntohs(ip_flow
->dl_type
));
426 uint64_t packet_stub
[128 / 8];
427 struct dp_packet packet
;
429 dp_packet_use_stub(&packet
, packet_stub
, sizeof packet_stub
);
430 dp_packet_clear(&packet
);
431 packet
.packet_type
= htonl(PT_ETH
);
433 struct eth_header
*eh
= dp_packet_put_zeros(&packet
, sizeof *eh
);
434 eh
->eth_dst
= ip_flow
->dl_dst
;
435 eh
->eth_src
= ip_flow
->dl_src
;
437 if (get_dl_type(ip_flow
) == htons(ETH_TYPE_IP
)) {
438 struct ip_header
*nh
= dp_packet_put_zeros(&packet
, sizeof *nh
);
440 eh
->eth_type
= htons(ETH_TYPE_IP
);
441 dp_packet_set_l3(&packet
, nh
);
442 nh
->ip_ihl_ver
= IP_IHL_VER(5, 4);
443 nh
->ip_tot_len
= htons(sizeof(struct ip_header
) +
444 sizeof(struct icmp_header
));
445 nh
->ip_proto
= IPPROTO_ICMP
;
446 nh
->ip_frag_off
= htons(IP_DF
);
447 packet_set_ipv4(&packet
, ip_flow
->nw_src
, ip_flow
->nw_dst
,
448 ip_flow
->nw_tos
, 255);
450 struct icmp_header
*ih
= dp_packet_put_zeros(&packet
, sizeof *ih
);
451 dp_packet_set_l4(&packet
, ih
);
452 packet_set_icmp(&packet
, ICMP4_DST_UNREACH
, 1);
454 struct ip6_hdr
*nh
= dp_packet_put_zeros(&packet
, sizeof *nh
);
455 struct icmp6_error_header
*ih
;
456 uint32_t icmpv6_csum
;
458 eh
->eth_type
= htons(ETH_TYPE_IPV6
);
459 dp_packet_set_l3(&packet
, nh
);
461 nh
->ip6_nxt
= IPPROTO_ICMPV6
;
462 nh
->ip6_plen
= htons(sizeof(*nh
) + ICMP6_ERROR_HEADER_LEN
);
463 packet_set_ipv6(&packet
, &ip_flow
->ipv6_src
, &ip_flow
->ipv6_dst
,
464 ip_flow
->nw_tos
, ip_flow
->ipv6_label
, 255);
466 ih
= dp_packet_put_zeros(&packet
, sizeof *ih
);
467 dp_packet_set_l4(&packet
, ih
);
468 ih
->icmp6_base
.icmp6_type
= ICMP6_DST_UNREACH
;
469 ih
->icmp6_base
.icmp6_code
= 1;
470 ih
->icmp6_base
.icmp6_cksum
= 0;
472 uint8_t *data
= dp_packet_put_zeros(&packet
, sizeof *nh
);
473 memcpy(data
, dp_packet_l3(pkt_in
), sizeof(*nh
));
475 icmpv6_csum
= packet_csum_pseudoheader6(dp_packet_l3(&packet
));
476 ih
->icmp6_base
.icmp6_cksum
= csum_finish(
477 csum_continue(icmpv6_csum
, ih
,
478 sizeof(*nh
) + ICMP6_ERROR_HEADER_LEN
));
481 if (ip_flow
->vlans
[0].tci
& htons(VLAN_CFI
)) {
482 eth_push_vlan(&packet
, htons(ETH_TYPE_VLAN_8021Q
),
483 ip_flow
->vlans
[0].tci
);
486 set_actions_and_enqueue_msg(&packet
, md
, userdata
);
487 dp_packet_uninit(&packet
);
491 pinctrl_handle_tcp_reset(const struct flow
*ip_flow
, struct dp_packet
*pkt_in
,
492 const struct match
*md
, struct ofpbuf
*userdata
)
494 /* This action only works for TCP segments, and the switch should only send
495 * us TCP segments this way, but check here just to be sure. */
496 if (ip_flow
->nw_proto
!= IPPROTO_TCP
) {
497 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
498 VLOG_WARN_RL(&rl
, "TCP_RESET action on non-TCP packet");
502 uint64_t packet_stub
[128 / 8];
503 struct dp_packet packet
;
505 dp_packet_use_stub(&packet
, packet_stub
, sizeof packet_stub
);
506 dp_packet_clear(&packet
);
507 packet
.packet_type
= htonl(PT_ETH
);
509 struct eth_header
*eh
= dp_packet_put_zeros(&packet
, sizeof *eh
);
510 eh
->eth_dst
= ip_flow
->dl_dst
;
511 eh
->eth_src
= ip_flow
->dl_src
;
513 if (get_dl_type(ip_flow
) == htons(ETH_TYPE_IPV6
)) {
514 struct ip6_hdr
*nh
= dp_packet_put_zeros(&packet
, sizeof *nh
);
516 eh
->eth_type
= htons(ETH_TYPE_IPV6
);
517 dp_packet_set_l3(&packet
, nh
);
519 nh
->ip6_nxt
= IPPROTO_TCP
;
520 nh
->ip6_plen
= htons(TCP_HEADER_LEN
);
521 packet_set_ipv6(&packet
, &ip_flow
->ipv6_src
, &ip_flow
->ipv6_dst
,
522 ip_flow
->nw_tos
, ip_flow
->ipv6_label
, 255);
524 struct ip_header
*nh
= dp_packet_put_zeros(&packet
, sizeof *nh
);
526 eh
->eth_type
= htons(ETH_TYPE_IP
);
527 dp_packet_set_l3(&packet
, nh
);
528 nh
->ip_ihl_ver
= IP_IHL_VER(5, 4);
529 nh
->ip_tot_len
= htons(IP_HEADER_LEN
+ TCP_HEADER_LEN
);
530 nh
->ip_proto
= IPPROTO_TCP
;
531 nh
->ip_frag_off
= htons(IP_DF
);
532 packet_set_ipv4(&packet
, ip_flow
->nw_src
, ip_flow
->nw_dst
,
533 ip_flow
->nw_tos
, 255);
536 struct tcp_header
*th
= dp_packet_put_zeros(&packet
, sizeof *th
);
537 struct tcp_header
*tcp_in
= dp_packet_l4(pkt_in
);
538 dp_packet_set_l4(&packet
, th
);
539 th
->tcp_ctl
= TCP_CTL(TCP_RST
, 5);
540 if (ip_flow
->tcp_flags
& htons(TCP_ACK
)) {
541 th
->tcp_seq
= tcp_in
->tcp_ack
;
543 uint32_t tcp_seq
, ack_seq
, tcp_len
;
545 tcp_seq
= ntohl(get_16aligned_be32(&tcp_in
->tcp_seq
));
546 tcp_len
= TCP_OFFSET(tcp_in
->tcp_ctl
) * 4;
547 ack_seq
= tcp_seq
+ dp_packet_l4_size(pkt_in
) - tcp_len
;
548 put_16aligned_be32(&th
->tcp_ack
, htonl(ack_seq
));
549 put_16aligned_be32(&th
->tcp_seq
, 0);
551 packet_set_tcp_port(&packet
, ip_flow
->tp_dst
, ip_flow
->tp_src
);
553 if (ip_flow
->vlans
[0].tci
& htons(VLAN_CFI
)) {
554 eth_push_vlan(&packet
, htons(ETH_TYPE_VLAN_8021Q
),
555 ip_flow
->vlans
[0].tci
);
558 set_actions_and_enqueue_msg(&packet
, md
, userdata
);
559 dp_packet_uninit(&packet
);
563 pinctrl_handle_put_dhcp_opts(
564 struct dp_packet
*pkt_in
, struct ofputil_packet_in
*pin
,
565 struct ofpbuf
*userdata
, struct ofpbuf
*continuation
)
567 enum ofp_version version
= rconn_get_version(swconn
);
568 enum ofputil_protocol proto
= ofputil_protocol_from_ofp_version(version
);
569 struct dp_packet
*pkt_out_ptr
= NULL
;
570 uint32_t success
= 0;
572 /* Parse result field. */
573 const struct mf_field
*f
;
574 enum ofperr ofperr
= nx_pull_header(userdata
, NULL
, &f
, NULL
);
576 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
577 VLOG_WARN_RL(&rl
, "bad result OXM (%s)", ofperr_to_string(ofperr
));
581 /* Parse result offset and offer IP. */
582 ovs_be32
*ofsp
= ofpbuf_try_pull(userdata
, sizeof *ofsp
);
583 ovs_be32
*offer_ip
= ofpbuf_try_pull(userdata
, sizeof *offer_ip
);
584 if (!ofsp
|| !offer_ip
) {
585 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
586 VLOG_WARN_RL(&rl
, "offset or offer_ip not present in the userdata");
590 /* Check that the result is valid and writable. */
591 struct mf_subfield dst
= { .field
= f
, .ofs
= ntohl(*ofsp
), .n_bits
= 1 };
592 ofperr
= mf_check_dst(&dst
, NULL
);
594 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
595 VLOG_WARN_RL(&rl
, "bad result bit (%s)", ofperr_to_string(ofperr
));
599 if (!userdata
->size
) {
600 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
601 VLOG_WARN_RL(&rl
, "DHCP options not present in the userdata");
605 /* Validate the DHCP request packet.
606 * Format of the DHCP packet is
607 * ------------------------------------------------------------------------
608 *| UDP HEADER | DHCP HEADER | 4 Byte DHCP Cookie | DHCP OPTIONS(var len)|
609 * ------------------------------------------------------------------------
612 const char *end
= (char *)dp_packet_l4(pkt_in
) + dp_packet_l4_size(pkt_in
);
613 const char *in_dhcp_ptr
= dp_packet_get_udp_payload(pkt_in
);
615 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
616 VLOG_WARN_RL(&rl
, "Invalid or incomplete DHCP packet received");
620 const struct dhcp_header
*in_dhcp_data
621 = (const struct dhcp_header
*) in_dhcp_ptr
;
622 in_dhcp_ptr
+= sizeof *in_dhcp_data
;
623 if (in_dhcp_ptr
> end
) {
624 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
625 VLOG_WARN_RL(&rl
, "Invalid or incomplete DHCP packet received, "
629 if (in_dhcp_data
->op
!= DHCP_OP_REQUEST
) {
630 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
631 VLOG_WARN_RL(&rl
, "Invalid opcode in the DHCP packet: %d",
636 /* DHCP options follow the DHCP header. The first 4 bytes of the DHCP
637 * options is the DHCP magic cookie followed by the actual DHCP options.
639 ovs_be32 magic_cookie
= htonl(DHCP_MAGIC_COOKIE
);
640 if (in_dhcp_ptr
+ sizeof magic_cookie
> end
||
641 get_unaligned_be32((const void *) in_dhcp_ptr
) != magic_cookie
) {
642 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
643 VLOG_WARN_RL(&rl
, "DHCP magic cookie not present in the DHCP packet");
646 in_dhcp_ptr
+= sizeof magic_cookie
;
648 const uint8_t *in_dhcp_msg_type
= NULL
;
649 ovs_be32 request_ip
= in_dhcp_data
->ciaddr
;
650 while (in_dhcp_ptr
< end
) {
651 const struct dhcp_opt_header
*in_dhcp_opt
=
652 (const struct dhcp_opt_header
*)in_dhcp_ptr
;
653 if (in_dhcp_opt
->code
== DHCP_OPT_END
) {
656 if (in_dhcp_opt
->code
== DHCP_OPT_PAD
) {
660 in_dhcp_ptr
+= sizeof *in_dhcp_opt
;
661 if (in_dhcp_ptr
> end
) {
664 in_dhcp_ptr
+= in_dhcp_opt
->len
;
665 if (in_dhcp_ptr
> end
) {
669 switch (in_dhcp_opt
->code
) {
670 case DHCP_OPT_MSG_TYPE
:
671 if (in_dhcp_opt
->len
== 1) {
672 in_dhcp_msg_type
= DHCP_OPT_PAYLOAD(in_dhcp_opt
);
675 case DHCP_OPT_REQ_IP
:
676 if (in_dhcp_opt
->len
== 4) {
677 request_ip
= get_unaligned_be32(DHCP_OPT_PAYLOAD(in_dhcp_opt
));
685 /* Check that the DHCP Message Type (opt 53) is present or not with
686 * valid values - DHCP_MSG_DISCOVER or DHCP_MSG_REQUEST.
688 if (!in_dhcp_msg_type
) {
689 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
690 VLOG_WARN_RL(&rl
, "Missing DHCP message type");
693 if (*in_dhcp_msg_type
!= DHCP_MSG_DISCOVER
&&
694 *in_dhcp_msg_type
!= DHCP_MSG_REQUEST
) {
695 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
696 VLOG_WARN_RL(&rl
, "Invalid DHCP message type: %d", *in_dhcp_msg_type
);
701 if (*in_dhcp_msg_type
== DHCP_MSG_DISCOVER
) {
702 msg_type
= DHCP_MSG_OFFER
;
704 /* This is a DHCPREQUEST. If the client has requested an IP that
705 * does not match the offered IP address, reply with a NAK. The
706 * requested IP address may be supplied either via Requested IP Address
707 * (opt 50) or via ciaddr, depending on the client's state.
709 msg_type
= DHCP_MSG_ACK
;
710 if (request_ip
!= *offer_ip
) {
711 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
712 VLOG_WARN_RL(&rl
, "DHCPREQUEST requested IP "IP_FMT
" does not "
713 "match offer "IP_FMT
, IP_ARGS(request_ip
),
715 msg_type
= DHCP_MSG_NAK
;
719 /* Frame the DHCP reply packet
720 * Total DHCP options length will be options stored in the userdata +
721 * 16 bytes. Note that the DHCP options stored in userdata are not included
722 * in DHCPNAK messages.
724 * --------------------------------------------------------------
725 *| 4 Bytes (dhcp cookie) | 3 Bytes (option type) | DHCP options |
726 * --------------------------------------------------------------
727 *| 4 Bytes padding | 1 Byte (option end 0xFF ) | 4 Bytes padding|
728 * --------------------------------------------------------------
730 uint16_t new_l4_size
= UDP_HEADER_LEN
+ DHCP_HEADER_LEN
+ 16;
731 if (msg_type
!= DHCP_MSG_NAK
) {
732 new_l4_size
+= userdata
->size
;
734 size_t new_packet_size
= pkt_in
->l4_ofs
+ new_l4_size
;
736 struct dp_packet pkt_out
;
737 dp_packet_init(&pkt_out
, new_packet_size
);
738 dp_packet_clear(&pkt_out
);
739 dp_packet_prealloc_tailroom(&pkt_out
, new_packet_size
);
740 pkt_out_ptr
= &pkt_out
;
742 /* Copy the L2 and L3 headers from the pkt_in as they would remain same*/
744 &pkt_out
, dp_packet_pull(pkt_in
, pkt_in
->l4_ofs
), pkt_in
->l4_ofs
);
746 pkt_out
.l2_5_ofs
= pkt_in
->l2_5_ofs
;
747 pkt_out
.l2_pad_size
= pkt_in
->l2_pad_size
;
748 pkt_out
.l3_ofs
= pkt_in
->l3_ofs
;
749 pkt_out
.l4_ofs
= pkt_in
->l4_ofs
;
751 struct udp_header
*udp
= dp_packet_put(
752 &pkt_out
, dp_packet_pull(pkt_in
, UDP_HEADER_LEN
), UDP_HEADER_LEN
);
754 struct dhcp_header
*dhcp_data
= dp_packet_put(
755 &pkt_out
, dp_packet_pull(pkt_in
, DHCP_HEADER_LEN
), DHCP_HEADER_LEN
);
756 dhcp_data
->op
= DHCP_OP_REPLY
;
757 dhcp_data
->yiaddr
= (msg_type
== DHCP_MSG_NAK
) ? 0 : *offer_ip
;
758 dp_packet_put(&pkt_out
, &magic_cookie
, sizeof(ovs_be32
));
760 uint16_t out_dhcp_opts_size
= 12;
761 if (msg_type
!= DHCP_MSG_NAK
) {
762 out_dhcp_opts_size
+= userdata
->size
;
764 uint8_t *out_dhcp_opts
= dp_packet_put_zeros(&pkt_out
,
766 /* DHCP option - type */
767 out_dhcp_opts
[0] = DHCP_OPT_MSG_TYPE
;
768 out_dhcp_opts
[1] = 1;
769 out_dhcp_opts
[2] = msg_type
;
772 if (msg_type
!= DHCP_MSG_NAK
) {
773 memcpy(out_dhcp_opts
, userdata
->data
, userdata
->size
);
774 out_dhcp_opts
+= userdata
->size
;
780 out_dhcp_opts
[0] = DHCP_OPT_END
;
782 udp
->udp_len
= htons(new_l4_size
);
784 struct ip_header
*out_ip
= dp_packet_l3(&pkt_out
);
785 out_ip
->ip_tot_len
= htons(pkt_out
.l4_ofs
- pkt_out
.l3_ofs
+ new_l4_size
);
787 /* Checksum needs to be initialized to zero. */
789 out_ip
->ip_csum
= csum(out_ip
, sizeof *out_ip
);
791 pin
->packet
= dp_packet_data(&pkt_out
);
792 pin
->packet_len
= dp_packet_size(&pkt_out
);
794 /* Log the response. */
795 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(20, 40);
796 const struct eth_header
*l2
= dp_packet_eth(&pkt_out
);
797 VLOG_INFO_RL(&rl
, "DHCP%s "ETH_ADDR_FMT
" "IP_FMT
"",
798 msg_type
== DHCP_MSG_OFFER
? "OFFER" :
799 (msg_type
== DHCP_MSG_ACK
? "ACK": "NAK"),
800 ETH_ADDR_ARGS(l2
->eth_src
), IP_ARGS(*offer_ip
));
805 union mf_subvalue sv
;
807 mf_write_subfield(&dst
, &sv
, &pin
->flow_metadata
);
809 queue_msg(ofputil_encode_resume(pin
, continuation
, proto
));
811 dp_packet_uninit(pkt_out_ptr
);
816 compose_out_dhcpv6_opts(struct ofpbuf
*userdata
,
817 struct ofpbuf
*out_dhcpv6_opts
, ovs_be32 iaid
)
819 while (userdata
->size
) {
820 struct dhcp_opt6_header
*userdata_opt
= ofpbuf_try_pull(
821 userdata
, sizeof *userdata_opt
);
826 size_t size
= ntohs(userdata_opt
->size
);
827 uint8_t *userdata_opt_data
= ofpbuf_try_pull(userdata
, size
);
828 if (!userdata_opt_data
) {
832 switch (ntohs(userdata_opt
->opt_code
)) {
833 case DHCPV6_OPT_SERVER_ID_CODE
:
835 /* The Server Identifier option carries a DUID
836 * identifying a server between a client and a server.
837 * See RFC 3315 Sec 9 and Sec 22.3.
839 * We use DUID Based on Link-layer Address [DUID-LL].
842 struct dhcpv6_opt_server_id
*opt_server_id
= ofpbuf_put_zeros(
843 out_dhcpv6_opts
, sizeof *opt_server_id
);
845 opt_server_id
->opt
.code
= htons(DHCPV6_OPT_SERVER_ID_CODE
);
846 opt_server_id
->opt
.len
= htons(size
+ 4);
847 opt_server_id
->duid_type
= htons(DHCPV6_DUID_LL
);
848 opt_server_id
->hw_type
= htons(DHCPV6_HW_TYPE_ETH
);
849 memcpy(&opt_server_id
->mac
, userdata_opt_data
,
850 sizeof(struct eth_addr
));
854 case DHCPV6_OPT_IA_ADDR_CODE
:
856 if (size
!= sizeof(struct in6_addr
)) {
861 /* If iaid is None, it means its an DHCPv6 information request.
862 * Don't put IA_NA option in the response. */
865 /* IA Address option is used to specify IPv6 addresses associated
866 * with an IA_NA or IA_TA. The IA Address option must be
867 * encapsulated in the Options field of an IA_NA or IA_TA option.
869 * We will encapsulate the IA Address within the IA_NA option.
870 * Please see RFC 3315 section 22.5 and 22.6
872 struct dhcpv6_opt_ia_na
*opt_ia_na
= ofpbuf_put_zeros(
873 out_dhcpv6_opts
, sizeof *opt_ia_na
);
874 opt_ia_na
->opt
.code
= htons(DHCPV6_OPT_IA_NA_CODE
);
875 /* IA_NA length (in bytes)-
879 * IA Address - sizeof(struct dhcpv6_opt_ia_addr)
881 opt_ia_na
->opt
.len
= htons(12 + sizeof(struct dhcpv6_opt_ia_addr
));
882 opt_ia_na
->iaid
= iaid
;
883 /* Set the lifetime of the address(es) to infinity */
884 opt_ia_na
->t1
= OVS_BE32_MAX
;
885 opt_ia_na
->t2
= OVS_BE32_MAX
;
887 struct dhcpv6_opt_ia_addr
*opt_ia_addr
= ofpbuf_put_zeros(
888 out_dhcpv6_opts
, sizeof *opt_ia_addr
);
889 opt_ia_addr
->opt
.code
= htons(DHCPV6_OPT_IA_ADDR_CODE
);
890 opt_ia_addr
->opt
.len
= htons(size
+ 8);
891 memcpy(opt_ia_addr
->ipv6
.s6_addr
, userdata_opt_data
, size
);
892 opt_ia_addr
->t1
= OVS_BE32_MAX
;
893 opt_ia_addr
->t2
= OVS_BE32_MAX
;
897 case DHCPV6_OPT_DNS_SERVER_CODE
:
899 struct dhcpv6_opt_header
*opt_dns
= ofpbuf_put_zeros(
900 out_dhcpv6_opts
, sizeof *opt_dns
);
901 opt_dns
->code
= htons(DHCPV6_OPT_DNS_SERVER_CODE
);
902 opt_dns
->len
= htons(size
);
903 ofpbuf_put(out_dhcpv6_opts
, userdata_opt_data
, size
);
907 case DHCPV6_OPT_DOMAIN_SEARCH_CODE
:
909 struct dhcpv6_opt_header
*opt_dsl
= ofpbuf_put_zeros(
910 out_dhcpv6_opts
, sizeof *opt_dsl
);
911 opt_dsl
->code
= htons(DHCPV6_OPT_DOMAIN_SEARCH_CODE
);
912 opt_dsl
->len
= htons(size
+ 2);
913 uint8_t *data
= ofpbuf_put_zeros(out_dhcpv6_opts
, size
+ 2);
915 memcpy(data
+ 1, userdata_opt_data
, size
);
927 pinctrl_handle_put_dhcpv6_opts(
928 struct dp_packet
*pkt_in
, struct ofputil_packet_in
*pin
,
929 struct ofpbuf
*userdata
, struct ofpbuf
*continuation OVS_UNUSED
)
931 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
932 enum ofp_version version
= rconn_get_version(swconn
);
933 enum ofputil_protocol proto
= ofputil_protocol_from_ofp_version(version
);
934 struct dp_packet
*pkt_out_ptr
= NULL
;
935 uint32_t success
= 0;
937 /* Parse result field. */
938 const struct mf_field
*f
;
939 enum ofperr ofperr
= nx_pull_header(userdata
, NULL
, &f
, NULL
);
941 VLOG_WARN_RL(&rl
, "bad result OXM (%s)", ofperr_to_string(ofperr
));
945 /* Parse result offset. */
946 ovs_be32
*ofsp
= ofpbuf_try_pull(userdata
, sizeof *ofsp
);
948 VLOG_WARN_RL(&rl
, "offset not present in the userdata");
952 /* Check that the result is valid and writable. */
953 struct mf_subfield dst
= { .field
= f
, .ofs
= ntohl(*ofsp
), .n_bits
= 1 };
954 ofperr
= mf_check_dst(&dst
, NULL
);
956 VLOG_WARN_RL(&rl
, "bad result bit (%s)", ofperr_to_string(ofperr
));
960 if (!userdata
->size
) {
961 VLOG_WARN_RL(&rl
, "DHCPv6 options not present in the userdata");
965 struct udp_header
*in_udp
= dp_packet_l4(pkt_in
);
966 const uint8_t *in_dhcpv6_data
= dp_packet_get_udp_payload(pkt_in
);
967 if (!in_udp
|| !in_dhcpv6_data
) {
968 VLOG_WARN_RL(&rl
, "truncated dhcpv6 packet");
972 uint8_t out_dhcpv6_msg_type
;
973 uint8_t in_dhcpv6_msg_type
= *in_dhcpv6_data
;
974 switch (in_dhcpv6_msg_type
) {
975 case DHCPV6_MSG_TYPE_SOLICIT
:
976 out_dhcpv6_msg_type
= DHCPV6_MSG_TYPE_ADVT
;
979 case DHCPV6_MSG_TYPE_REQUEST
:
980 case DHCPV6_MSG_TYPE_CONFIRM
:
981 case DHCPV6_MSG_TYPE_DECLINE
:
982 case DHCPV6_MSG_TYPE_INFO_REQ
:
983 out_dhcpv6_msg_type
= DHCPV6_MSG_TYPE_REPLY
;
987 /* Invalid or unsupported DHCPv6 message type */
991 /* Skip 4 bytes (message type (1 byte) + transaction ID (3 bytes). */
993 /* We need to extract IAID from the IA-NA option of the client's DHCPv6
994 * solicit/request/confirm packet and copy the same IAID in the Server's
996 * DHCPv6 information packet (for stateless request will not have IA-NA
997 * option. So we don't need to copy that in the Server's response.
1000 struct dhcpv6_opt_header
const *in_opt_client_id
= NULL
;
1001 size_t udp_len
= ntohs(in_udp
->udp_len
);
1002 size_t l4_len
= dp_packet_l4_size(pkt_in
);
1003 uint8_t *end
= (uint8_t *)in_udp
+ MIN(udp_len
, l4_len
);
1004 while (in_dhcpv6_data
< end
) {
1005 struct dhcpv6_opt_header
const *in_opt
=
1006 (struct dhcpv6_opt_header
*)in_dhcpv6_data
;
1007 switch(ntohs(in_opt
->code
)) {
1008 case DHCPV6_OPT_IA_NA_CODE
:
1010 struct dhcpv6_opt_ia_na
*opt_ia_na
= (
1011 struct dhcpv6_opt_ia_na
*)in_opt
;
1012 iaid
= opt_ia_na
->iaid
;
1016 case DHCPV6_OPT_CLIENT_ID_CODE
:
1017 in_opt_client_id
= in_opt
;
1023 in_dhcpv6_data
+= sizeof *in_opt
+ ntohs(in_opt
->len
);
1026 if (!in_opt_client_id
) {
1027 VLOG_WARN_RL(&rl
, "DHCPv6 option - Client id not present in the "
1032 if (!iaid
&& in_dhcpv6_msg_type
!= DHCPV6_MSG_TYPE_INFO_REQ
) {
1033 VLOG_WARN_RL(&rl
, "DHCPv6 option - IA NA not present in the "
1038 uint64_t out_ofpacts_dhcpv6_opts_stub
[256 / 8];
1039 struct ofpbuf out_dhcpv6_opts
=
1040 OFPBUF_STUB_INITIALIZER(out_ofpacts_dhcpv6_opts_stub
);
1042 if (!compose_out_dhcpv6_opts(userdata
, &out_dhcpv6_opts
, iaid
)) {
1043 VLOG_WARN_RL(&rl
, "Invalid userdata");
1047 uint16_t new_l4_size
1048 = (UDP_HEADER_LEN
+ 4 + sizeof *in_opt_client_id
+
1049 ntohs(in_opt_client_id
->len
) + out_dhcpv6_opts
.size
);
1050 size_t new_packet_size
= pkt_in
->l4_ofs
+ new_l4_size
;
1052 struct dp_packet pkt_out
;
1053 dp_packet_init(&pkt_out
, new_packet_size
);
1054 dp_packet_clear(&pkt_out
);
1055 dp_packet_prealloc_tailroom(&pkt_out
, new_packet_size
);
1056 pkt_out_ptr
= &pkt_out
;
1058 /* Copy L2 and L3 headers from pkt_in. */
1059 dp_packet_put(&pkt_out
, dp_packet_pull(pkt_in
, pkt_in
->l4_ofs
),
1062 pkt_out
.l2_5_ofs
= pkt_in
->l2_5_ofs
;
1063 pkt_out
.l2_pad_size
= pkt_in
->l2_pad_size
;
1064 pkt_out
.l3_ofs
= pkt_in
->l3_ofs
;
1065 pkt_out
.l4_ofs
= pkt_in
->l4_ofs
;
1067 /* Pull the DHCPv6 message type and transaction id from the pkt_in.
1068 * Need to preserve the transaction id in the DHCPv6 reply packet. */
1069 struct udp_header
*out_udp
= dp_packet_put(
1070 &pkt_out
, dp_packet_pull(pkt_in
, UDP_HEADER_LEN
), UDP_HEADER_LEN
);
1071 uint8_t *out_dhcpv6
= dp_packet_put(&pkt_out
, dp_packet_pull(pkt_in
, 4), 4);
1073 /* Set the proper DHCPv6 message type. */
1074 *out_dhcpv6
= out_dhcpv6_msg_type
;
1076 /* Copy the Client Identifier. */
1077 dp_packet_put(&pkt_out
, in_opt_client_id
,
1078 sizeof *in_opt_client_id
+ ntohs(in_opt_client_id
->len
));
1080 /* Copy the DHCPv6 Options. */
1081 dp_packet_put(&pkt_out
, out_dhcpv6_opts
.data
, out_dhcpv6_opts
.size
);
1082 out_udp
->udp_len
= htons(new_l4_size
);
1083 out_udp
->udp_csum
= 0;
1085 struct ovs_16aligned_ip6_hdr
*out_ip6
= dp_packet_l3(&pkt_out
);
1086 out_ip6
->ip6_ctlun
.ip6_un1
.ip6_un1_plen
= out_udp
->udp_len
;
1089 csum
= packet_csum_pseudoheader6(dp_packet_l3(&pkt_out
));
1090 csum
= csum_continue(csum
, out_udp
, dp_packet_size(&pkt_out
) -
1091 ((const unsigned char *)out_udp
-
1092 (const unsigned char *)dp_packet_eth(&pkt_out
)));
1093 out_udp
->udp_csum
= csum_finish(csum
);
1094 if (!out_udp
->udp_csum
) {
1095 out_udp
->udp_csum
= htons(0xffff);
1098 pin
->packet
= dp_packet_data(&pkt_out
);
1099 pin
->packet_len
= dp_packet_size(&pkt_out
);
1100 ofpbuf_uninit(&out_dhcpv6_opts
);
1104 union mf_subvalue sv
;
1105 sv
.u8_val
= success
;
1106 mf_write_subfield(&dst
, &sv
, &pin
->flow_metadata
);
1108 queue_msg(ofputil_encode_resume(pin
, continuation
, proto
));
1109 dp_packet_uninit(pkt_out_ptr
);
1113 put_be16(struct ofpbuf
*buf
, ovs_be16 x
)
1115 ofpbuf_put(buf
, &x
, sizeof x
);
1119 put_be32(struct ofpbuf
*buf
, ovs_be32 x
)
1121 ofpbuf_put(buf
, &x
, sizeof x
);
1125 pinctrl_handle_dns_lookup(
1126 const struct sbrec_dns_table
*dns_table
,
1127 struct dp_packet
*pkt_in
, struct ofputil_packet_in
*pin
,
1128 struct ofpbuf
*userdata
, struct ofpbuf
*continuation
)
1130 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
1131 enum ofp_version version
= rconn_get_version(swconn
);
1132 enum ofputil_protocol proto
= ofputil_protocol_from_ofp_version(version
);
1133 struct dp_packet
*pkt_out_ptr
= NULL
;
1134 uint32_t success
= 0;
1136 /* Parse result field. */
1137 const struct mf_field
*f
;
1138 enum ofperr ofperr
= nx_pull_header(userdata
, NULL
, &f
, NULL
);
1140 VLOG_WARN_RL(&rl
, "bad result OXM (%s)", ofperr_to_string(ofperr
));
1144 /* Parse result offset. */
1145 ovs_be32
*ofsp
= ofpbuf_try_pull(userdata
, sizeof *ofsp
);
1147 VLOG_WARN_RL(&rl
, "offset not present in the userdata");
1151 /* Check that the result is valid and writable. */
1152 struct mf_subfield dst
= { .field
= f
, .ofs
= ntohl(*ofsp
), .n_bits
= 1 };
1153 ofperr
= mf_check_dst(&dst
, NULL
);
1155 VLOG_WARN_RL(&rl
, "bad result bit (%s)", ofperr_to_string(ofperr
));
1159 /* Extract the DNS header */
1160 struct dns_header
const *in_dns_header
= dp_packet_get_udp_payload(pkt_in
);
1161 if (!in_dns_header
) {
1162 VLOG_WARN_RL(&rl
, "truncated dns packet");
1166 /* Check if it is DNS request or not */
1167 if (in_dns_header
->lo_flag
& 0x80) {
1168 /* It's a DNS response packet which we are not interested in */
1172 /* Check if at least one query request is present */
1173 if (!in_dns_header
->qdcount
) {
1177 struct udp_header
*in_udp
= dp_packet_l4(pkt_in
);
1178 size_t udp_len
= ntohs(in_udp
->udp_len
);
1179 size_t l4_len
= dp_packet_l4_size(pkt_in
);
1180 uint8_t *end
= (uint8_t *)in_udp
+ MIN(udp_len
, l4_len
);
1181 uint8_t *in_dns_data
= (uint8_t *)(in_dns_header
+ 1);
1182 uint8_t *in_queryname
= in_dns_data
;
1184 struct ds query_name
;
1185 ds_init(&query_name
);
1186 /* Extract the query_name. If the query name is - 'www.ovn.org' it would be
1187 * encoded as (in hex) - 03 77 77 77 03 6f 76 63 03 6f 72 67 00.
1189 while ((in_dns_data
+ idx
) < end
&& in_dns_data
[idx
]) {
1190 uint8_t label_len
= in_dns_data
[idx
++];
1191 if (in_dns_data
+ idx
+ label_len
> end
) {
1192 ds_destroy(&query_name
);
1195 ds_put_buffer(&query_name
, (const char *) in_dns_data
+ idx
, label_len
);
1197 ds_put_char(&query_name
, '.');
1201 ds_chomp(&query_name
, '.');
1204 /* Query should have TYPE and CLASS fields */
1205 if (in_dns_data
+ (2 * sizeof(ovs_be16
)) > end
) {
1206 ds_destroy(&query_name
);
1210 uint16_t query_type
= ntohs(*ALIGNED_CAST(const ovs_be16
*, in_dns_data
));
1211 /* Supported query types - A, AAAA and ANY */
1212 if (!(query_type
== DNS_QUERY_TYPE_A
|| query_type
== DNS_QUERY_TYPE_AAAA
1213 || query_type
== DNS_QUERY_TYPE_ANY
)) {
1214 ds_destroy(&query_name
);
1218 uint64_t dp_key
= ntohll(pin
->flow_metadata
.flow
.metadata
);
1219 const struct sbrec_dns
*sbrec_dns
;
1220 const char *answer_ips
= NULL
;
1221 SBREC_DNS_TABLE_FOR_EACH (sbrec_dns
, dns_table
) {
1222 for (size_t i
= 0; i
< sbrec_dns
->n_datapaths
; i
++) {
1223 if (sbrec_dns
->datapaths
[i
]->tunnel_key
== dp_key
) {
1224 answer_ips
= smap_get(&sbrec_dns
->records
,
1225 ds_cstr(&query_name
));
1237 ds_destroy(&query_name
);
1242 struct lport_addresses ip_addrs
;
1243 if (!extract_ip_addresses(answer_ips
, &ip_addrs
)) {
1247 uint16_t ancount
= 0;
1248 uint64_t dns_ans_stub
[128 / 8];
1249 struct ofpbuf dns_answer
= OFPBUF_STUB_INITIALIZER(dns_ans_stub
);
1251 if (query_type
== DNS_QUERY_TYPE_A
|| query_type
== DNS_QUERY_TYPE_ANY
) {
1252 for (size_t i
= 0; i
< ip_addrs
.n_ipv4_addrs
; i
++) {
1253 /* Copy the answer section */
1254 /* Format of the answer section is
1255 * - NAME -> The domain name
1256 * - TYPE -> 2 octets containing one of the RR type codes
1257 * - CLASS -> 2 octets which specify the class of the data
1258 * in the RDATA field.
1259 * - TTL -> 32 bit unsigned int specifying the time
1260 * interval (in secs) that the resource record
1261 * may be cached before it should be discarded.
1262 * - RDLENGTH -> 16 bit integer specifying the length of the
1264 * - RDATA -> a variable length string of octets that
1265 * describes the resource. In our case it will
1266 * be IP address of the domain name.
1268 ofpbuf_put(&dns_answer
, in_queryname
, idx
);
1269 put_be16(&dns_answer
, htons(DNS_QUERY_TYPE_A
));
1270 put_be16(&dns_answer
, htons(DNS_CLASS_IN
));
1271 put_be32(&dns_answer
, htonl(DNS_DEFAULT_RR_TTL
));
1272 put_be16(&dns_answer
, htons(sizeof(ovs_be32
)));
1273 put_be32(&dns_answer
, ip_addrs
.ipv4_addrs
[i
].addr
);
1278 if (query_type
== DNS_QUERY_TYPE_AAAA
||
1279 query_type
== DNS_QUERY_TYPE_ANY
) {
1280 for (size_t i
= 0; i
< ip_addrs
.n_ipv6_addrs
; i
++) {
1281 ofpbuf_put(&dns_answer
, in_queryname
, idx
);
1282 put_be16(&dns_answer
, htons(DNS_QUERY_TYPE_AAAA
));
1283 put_be16(&dns_answer
, htons(DNS_CLASS_IN
));
1284 put_be32(&dns_answer
, htonl(DNS_DEFAULT_RR_TTL
));
1285 const struct in6_addr
*ip6
= &ip_addrs
.ipv6_addrs
[i
].addr
;
1286 put_be16(&dns_answer
, htons(sizeof *ip6
));
1287 ofpbuf_put(&dns_answer
, ip6
, sizeof *ip6
);
1292 destroy_lport_addresses(&ip_addrs
);
1295 ofpbuf_uninit(&dns_answer
);
1299 uint16_t new_l4_size
= ntohs(in_udp
->udp_len
) + dns_answer
.size
;
1300 size_t new_packet_size
= pkt_in
->l4_ofs
+ new_l4_size
;
1301 struct dp_packet pkt_out
;
1302 dp_packet_init(&pkt_out
, new_packet_size
);
1303 dp_packet_clear(&pkt_out
);
1304 dp_packet_prealloc_tailroom(&pkt_out
, new_packet_size
);
1305 pkt_out_ptr
= &pkt_out
;
1307 /* Copy the L2 and L3 headers from the pkt_in as they would remain same.*/
1309 &pkt_out
, dp_packet_pull(pkt_in
, pkt_in
->l4_ofs
), pkt_in
->l4_ofs
);
1311 pkt_out
.l2_5_ofs
= pkt_in
->l2_5_ofs
;
1312 pkt_out
.l2_pad_size
= pkt_in
->l2_pad_size
;
1313 pkt_out
.l3_ofs
= pkt_in
->l3_ofs
;
1314 pkt_out
.l4_ofs
= pkt_in
->l4_ofs
;
1316 struct udp_header
*out_udp
= dp_packet_put(
1317 &pkt_out
, dp_packet_pull(pkt_in
, UDP_HEADER_LEN
), UDP_HEADER_LEN
);
1319 /* Copy the DNS header. */
1320 struct dns_header
*out_dns_header
= dp_packet_put(
1321 &pkt_out
, dp_packet_pull(pkt_in
, sizeof *out_dns_header
),
1322 sizeof *out_dns_header
);
1324 /* Set the response bit to 1 in the flags. */
1325 out_dns_header
->lo_flag
|= 0x80;
1327 /* Set the answer RR. */
1328 out_dns_header
->ancount
= htons(ancount
);
1330 /* Copy the Query section. */
1331 dp_packet_put(&pkt_out
, dp_packet_data(pkt_in
), dp_packet_size(pkt_in
));
1333 /* Copy the answer sections. */
1334 dp_packet_put(&pkt_out
, dns_answer
.data
, dns_answer
.size
);
1335 ofpbuf_uninit(&dns_answer
);
1337 out_udp
->udp_len
= htons(new_l4_size
);
1338 out_udp
->udp_csum
= 0;
1340 struct eth_header
*eth
= dp_packet_data(&pkt_out
);
1341 if (eth
->eth_type
== htons(ETH_TYPE_IP
)) {
1342 struct ip_header
*out_ip
= dp_packet_l3(&pkt_out
);
1343 out_ip
->ip_tot_len
= htons(pkt_out
.l4_ofs
- pkt_out
.l3_ofs
1345 /* Checksum needs to be initialized to zero. */
1346 out_ip
->ip_csum
= 0;
1347 out_ip
->ip_csum
= csum(out_ip
, sizeof *out_ip
);
1349 struct ovs_16aligned_ip6_hdr
*nh
= dp_packet_l3(&pkt_out
);
1350 nh
->ip6_plen
= htons(new_l4_size
);
1352 /* IPv6 needs UDP checksum calculated */
1354 csum
= packet_csum_pseudoheader6(nh
);
1355 csum
= csum_continue(csum
, out_udp
, dp_packet_size(&pkt_out
) -
1356 ((const unsigned char *)out_udp
-
1357 (const unsigned char *)eth
));
1358 out_udp
->udp_csum
= csum_finish(csum
);
1359 if (!out_udp
->udp_csum
) {
1360 out_udp
->udp_csum
= htons(0xffff);
1364 pin
->packet
= dp_packet_data(&pkt_out
);
1365 pin
->packet_len
= dp_packet_size(&pkt_out
);
1370 union mf_subvalue sv
;
1371 sv
.u8_val
= success
;
1372 mf_write_subfield(&dst
, &sv
, &pin
->flow_metadata
);
1374 queue_msg(ofputil_encode_resume(pin
, continuation
, proto
));
1375 dp_packet_uninit(pkt_out_ptr
);
1379 process_packet_in(const struct ofp_header
*msg
,
1380 const struct sbrec_dns_table
*dns_table
)
1382 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
1384 struct ofputil_packet_in pin
;
1385 struct ofpbuf continuation
;
1386 enum ofperr error
= ofputil_decode_packet_in(msg
, true, NULL
, NULL
, &pin
,
1387 NULL
, NULL
, &continuation
);
1390 VLOG_WARN_RL(&rl
, "error decoding packet-in: %s",
1391 ofperr_to_string(error
));
1394 if (pin
.reason
!= OFPR_ACTION
) {
1398 struct ofpbuf userdata
= ofpbuf_const_initializer(pin
.userdata
,
1400 const struct action_header
*ah
= ofpbuf_pull(&userdata
, sizeof *ah
);
1402 VLOG_WARN_RL(&rl
, "packet-in userdata lacks action header");
1406 struct dp_packet packet
;
1407 dp_packet_use_const(&packet
, pin
.packet
, pin
.packet_len
);
1408 struct flow headers
;
1409 flow_extract(&packet
, &headers
);
1411 switch (ntohl(ah
->opcode
)) {
1412 case ACTION_OPCODE_ARP
:
1413 pinctrl_handle_arp(&headers
, &packet
, &pin
.flow_metadata
, &userdata
);
1416 case ACTION_OPCODE_PUT_ARP
:
1417 pinctrl_handle_put_mac_binding(&pin
.flow_metadata
.flow
, &headers
,
1421 case ACTION_OPCODE_PUT_DHCP_OPTS
:
1422 pinctrl_handle_put_dhcp_opts(&packet
, &pin
, &userdata
, &continuation
);
1425 case ACTION_OPCODE_ND_NA
:
1426 pinctrl_handle_nd_na(&headers
, &pin
.flow_metadata
, &userdata
, false);
1429 case ACTION_OPCODE_ND_NA_ROUTER
:
1430 pinctrl_handle_nd_na(&headers
, &pin
.flow_metadata
, &userdata
, true);
1433 case ACTION_OPCODE_PUT_ND
:
1434 pinctrl_handle_put_mac_binding(&pin
.flow_metadata
.flow
, &headers
,
1438 case ACTION_OPCODE_PUT_DHCPV6_OPTS
:
1439 pinctrl_handle_put_dhcpv6_opts(&packet
, &pin
, &userdata
,
1443 case ACTION_OPCODE_DNS_LOOKUP
:
1444 pinctrl_handle_dns_lookup(dns_table
,
1445 &packet
, &pin
, &userdata
, &continuation
);
1448 case ACTION_OPCODE_LOG
:
1449 handle_acl_log(&headers
, &userdata
);
1452 case ACTION_OPCODE_PUT_ND_RA_OPTS
:
1453 pinctrl_handle_put_nd_ra_opts(&headers
, &packet
, &pin
, &userdata
,
1457 case ACTION_OPCODE_ND_NS
:
1458 pinctrl_handle_nd_ns(&headers
, &packet
, &pin
.flow_metadata
,
1462 case ACTION_OPCODE_ICMP
:
1463 pinctrl_handle_icmp(&headers
, &packet
, &pin
.flow_metadata
,
1467 case ACTION_OPCODE_TCP_RESET
:
1468 pinctrl_handle_tcp_reset(&headers
, &packet
, &pin
.flow_metadata
,
1473 VLOG_WARN_RL(&rl
, "unrecognized packet-in opcode %"PRIu32
,
1480 pinctrl_recv(const struct sbrec_dns_table
*dns_table
,
1481 const struct ofp_header
*oh
, enum ofptype type
)
1483 if (type
== OFPTYPE_ECHO_REQUEST
) {
1484 queue_msg(ofputil_encode_echo_reply(oh
));
1485 } else if (type
== OFPTYPE_GET_CONFIG_REPLY
) {
1486 /* Enable asynchronous messages */
1487 struct ofputil_switch_config config
;
1489 ofputil_decode_get_config_reply(oh
, &config
);
1490 config
.miss_send_len
= UINT16_MAX
;
1491 set_switch_config(swconn
, &config
);
1492 } else if (type
== OFPTYPE_PACKET_IN
) {
1493 process_packet_in(oh
, dns_table
);
1495 if (VLOG_IS_DBG_ENABLED()) {
1496 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(30, 300);
1498 char *s
= ofp_to_string(oh
, ntohs(oh
->length
), NULL
, NULL
, 2);
1500 VLOG_DBG_RL(&rl
, "OpenFlow packet ignored: %s", s
);
1507 pinctrl_run(struct ovsdb_idl_txn
*ovnsb_idl_txn
,
1508 struct ovsdb_idl_index
*sbrec_chassis_by_name
,
1509 struct ovsdb_idl_index
*sbrec_datapath_binding_by_key
,
1510 struct ovsdb_idl_index
*sbrec_port_binding_by_datapath
,
1511 struct ovsdb_idl_index
*sbrec_port_binding_by_key
,
1512 struct ovsdb_idl_index
*sbrec_port_binding_by_name
,
1513 struct ovsdb_idl_index
*sbrec_mac_binding_by_lport_ip
,
1514 const struct sbrec_dns_table
*dns_table
,
1515 const struct ovsrec_bridge
*br_int
,
1516 const struct sbrec_chassis
*chassis
,
1517 const struct hmap
*local_datapaths
,
1518 const struct sset
*active_tunnels
)
1520 char *target
= xasprintf("unix:%s/%s.mgmt", ovs_rundir(), br_int
->name
);
1521 if (strcmp(target
, rconn_get_target(swconn
))) {
1522 VLOG_INFO("%s: connecting to switch", target
);
1523 rconn_connect(swconn
, target
, target
);
1529 if (!rconn_is_connected(swconn
)) {
1533 if (conn_seq_no
!= rconn_get_connection_seqno(swconn
)) {
1535 conn_seq_no
= rconn_get_connection_seqno(swconn
);
1536 flush_put_mac_bindings();
1539 /* Process a limited number of messages per call. */
1540 for (int i
= 0; i
< 50; i
++) {
1541 struct ofpbuf
*msg
= rconn_recv(swconn
);
1546 const struct ofp_header
*oh
= msg
->data
;
1549 ofptype_decode(&type
, oh
);
1550 pinctrl_recv(dns_table
, oh
, type
);
1554 run_put_mac_bindings(ovnsb_idl_txn
, sbrec_datapath_binding_by_key
,
1555 sbrec_port_binding_by_key
,
1556 sbrec_mac_binding_by_lport_ip
);
1557 send_garp_run(sbrec_chassis_by_name
, sbrec_port_binding_by_datapath
,
1558 sbrec_port_binding_by_name
, br_int
, chassis
,
1559 local_datapaths
, active_tunnels
);
1560 send_ipv6_ras(sbrec_port_binding_by_datapath
,
1561 sbrec_port_binding_by_name
, local_datapaths
);
1562 buffered_packets_map_gc();
1565 /* Table of ipv6_ra_state structures, keyed on logical port name */
1566 static struct shash ipv6_ras
;
1568 /* Next IPV6 RA in seconds. */
1569 static long long int send_ipv6_ra_time
;
1571 struct ipv6_ra_config
{
1572 time_t min_interval
;
1573 time_t max_interval
;
1574 struct eth_addr eth_src
;
1575 struct eth_addr eth_dst
;
1576 struct in6_addr ipv6_src
;
1577 struct in6_addr ipv6_dst
;
1579 uint8_t mo_flags
; /* Managed/Other flags for RAs */
1580 uint8_t la_flags
; /* On-link/autonomous flags for address prefixes */
1581 struct lport_addresses prefixes
;
1584 struct ipv6_ra_state
{
1585 long long int next_announce
;
1586 struct ipv6_ra_config
*config
;
1595 shash_init(&ipv6_ras
);
1596 send_ipv6_ra_time
= LLONG_MAX
;
1600 ipv6_ra_config_delete(struct ipv6_ra_config
*config
)
1603 destroy_lport_addresses(&config
->prefixes
);
1609 ipv6_ra_delete(struct ipv6_ra_state
*ra
)
1612 ipv6_ra_config_delete(ra
->config
);
1618 destroy_ipv6_ras(void)
1620 struct shash_node
*iter
, *next
;
1621 SHASH_FOR_EACH_SAFE (iter
, next
, &ipv6_ras
) {
1622 struct ipv6_ra_state
*ra
= iter
->data
;
1624 shash_delete(&ipv6_ras
, iter
);
1626 shash_destroy(&ipv6_ras
);
1629 static struct ipv6_ra_config
*
1630 ipv6_ra_update_config(const struct sbrec_port_binding
*pb
)
1632 struct ipv6_ra_config
*config
;
1634 config
= xzalloc(sizeof *config
);
1636 config
->max_interval
= smap_get_int(&pb
->options
, "ipv6_ra_max_interval",
1637 ND_RA_MAX_INTERVAL_DEFAULT
);
1638 config
->min_interval
= smap_get_int(&pb
->options
, "ipv6_ra_min_interval",
1639 nd_ra_min_interval_default(config
->max_interval
));
1640 config
->mtu
= smap_get_int(&pb
->options
, "ipv6_ra_mtu", ND_MTU_DEFAULT
);
1641 config
->la_flags
= ND_PREFIX_ON_LINK
;
1643 const char *address_mode
= smap_get(&pb
->options
, "ipv6_ra_address_mode");
1644 if (!address_mode
) {
1645 VLOG_WARN("No address mode specified");
1648 if (!strcmp(address_mode
, "dhcpv6_stateless")) {
1649 config
->mo_flags
= IPV6_ND_RA_FLAG_OTHER_ADDR_CONFIG
;
1650 } else if (!strcmp(address_mode
, "dhcpv6_stateful")) {
1651 config
->mo_flags
= IPV6_ND_RA_FLAG_MANAGED_ADDR_CONFIG
;
1652 } else if (!strcmp(address_mode
, "slaac")) {
1653 config
->la_flags
|= ND_PREFIX_AUTONOMOUS_ADDRESS
;
1655 VLOG_WARN("Invalid address mode %s", address_mode
);
1659 const char *prefixes
= smap_get(&pb
->options
, "ipv6_ra_prefixes");
1660 if (prefixes
&& !extract_ip_addresses(prefixes
, &config
->prefixes
)) {
1661 VLOG_WARN("Invalid IPv6 prefixes: %s", prefixes
);
1665 /* All nodes multicast addresses */
1666 config
->eth_dst
= (struct eth_addr
) ETH_ADDR_C(33,33,00,00,00,01);
1667 ipv6_parse("ff02::1", &config
->ipv6_dst
);
1669 const char *eth_addr
= smap_get(&pb
->options
, "ipv6_ra_src_eth");
1670 if (!eth_addr
|| !eth_addr_from_string(eth_addr
, &config
->eth_src
)) {
1671 VLOG_WARN("Invalid ethernet source %s", eth_addr
);
1674 const char *ip_addr
= smap_get(&pb
->options
, "ipv6_ra_src_addr");
1675 if (!ip_addr
|| !ipv6_parse(ip_addr
, &config
->ipv6_src
)) {
1676 VLOG_WARN("Invalid IP source %s", ip_addr
);
1683 ipv6_ra_config_delete(config
);
1687 static long long int
1688 ipv6_ra_calc_next_announce(time_t min_interval
, time_t max_interval
)
1690 long long int min_interval_ms
= min_interval
* 1000LL;
1691 long long int max_interval_ms
= max_interval
* 1000LL;
1693 return time_msec() + min_interval_ms
+
1694 random_range(max_interval_ms
- min_interval_ms
);
1698 put_load(uint64_t value
, enum mf_field_id dst
, int ofs
, int n_bits
,
1699 struct ofpbuf
*ofpacts
)
1701 struct ofpact_set_field
*sf
= ofpact_put_set_field(ofpacts
,
1702 mf_from_id(dst
), NULL
,
1704 ovs_be64 n_value
= htonll(value
);
1705 bitwise_copy(&n_value
, 8, 0, sf
->value
, sf
->field
->n_bytes
, ofs
, n_bits
);
1706 bitwise_one(ofpact_set_field_mask(sf
), sf
->field
->n_bytes
, ofs
, n_bits
);
1709 static long long int
1710 ipv6_ra_send(struct ipv6_ra_state
*ra
)
1712 if (time_msec() < ra
->next_announce
) {
1713 return ra
->next_announce
;
1716 uint64_t packet_stub
[128 / 8];
1717 struct dp_packet packet
;
1718 dp_packet_use_stub(&packet
, packet_stub
, sizeof packet_stub
);
1719 compose_nd_ra(&packet
, ra
->config
->eth_src
, ra
->config
->eth_dst
,
1720 &ra
->config
->ipv6_src
, &ra
->config
->ipv6_dst
,
1721 255, ra
->config
->mo_flags
, htons(IPV6_ND_RA_LIFETIME
), 0, 0,
1724 for (int i
= 0; i
< ra
->config
->prefixes
.n_ipv6_addrs
; i
++) {
1726 memcpy(&addr
, &ra
->config
->prefixes
.ipv6_addrs
[i
].addr
, sizeof addr
);
1727 packet_put_ra_prefix_opt(&packet
,
1728 ra
->config
->prefixes
.ipv6_addrs
[i
].plen
,
1729 ra
->config
->la_flags
, htonl(IPV6_ND_RA_OPT_PREFIX_VALID_LIFETIME
),
1730 htonl(IPV6_ND_RA_OPT_PREFIX_PREFERRED_LIFETIME
), addr
);
1733 uint64_t ofpacts_stub
[4096 / 8];
1734 struct ofpbuf ofpacts
= OFPBUF_STUB_INITIALIZER(ofpacts_stub
);
1736 /* Set MFF_LOG_DATAPATH and MFF_LOG_INPORT. */
1737 uint32_t dp_key
= ra
->metadata
;
1738 uint32_t port_key
= ra
->port_key
;
1739 put_load(dp_key
, MFF_LOG_DATAPATH
, 0, 64, &ofpacts
);
1740 put_load(port_key
, MFF_LOG_INPORT
, 0, 32, &ofpacts
);
1741 put_load(1, MFF_LOG_FLAGS
, MLF_LOCAL_ONLY_BIT
, 1, &ofpacts
);
1742 struct ofpact_resubmit
*resubmit
= ofpact_put_RESUBMIT(&ofpacts
);
1743 resubmit
->in_port
= OFPP_CONTROLLER
;
1744 resubmit
->table_id
= OFTABLE_LOG_INGRESS_PIPELINE
;
1746 struct ofputil_packet_out po
= {
1747 .packet
= dp_packet_data(&packet
),
1748 .packet_len
= dp_packet_size(&packet
),
1749 .buffer_id
= UINT32_MAX
,
1750 .ofpacts
= ofpacts
.data
,
1751 .ofpacts_len
= ofpacts
.size
,
1754 match_set_in_port(&po
.flow_metadata
, OFPP_CONTROLLER
);
1755 enum ofp_version version
= rconn_get_version(swconn
);
1756 enum ofputil_protocol proto
= ofputil_protocol_from_ofp_version(version
);
1757 queue_msg(ofputil_encode_packet_out(&po
, proto
));
1758 dp_packet_uninit(&packet
);
1759 ofpbuf_uninit(&ofpacts
);
1761 ra
->next_announce
= ipv6_ra_calc_next_announce(ra
->config
->min_interval
,
1762 ra
->config
->max_interval
);
1764 return ra
->next_announce
;
1770 poll_timer_wait_until(send_ipv6_ra_time
);
1774 send_ipv6_ras(struct ovsdb_idl_index
*sbrec_port_binding_by_datapath
,
1775 struct ovsdb_idl_index
*sbrec_port_binding_by_name
,
1776 const struct hmap
*local_datapaths
)
1778 struct shash_node
*iter
, *iter_next
;
1780 send_ipv6_ra_time
= LLONG_MAX
;
1782 SHASH_FOR_EACH (iter
, &ipv6_ras
) {
1783 struct ipv6_ra_state
*ra
= iter
->data
;
1784 ra
->delete_me
= true;
1787 const struct local_datapath
*ld
;
1788 HMAP_FOR_EACH (ld
, hmap_node
, local_datapaths
) {
1789 struct sbrec_port_binding
*target
= sbrec_port_binding_index_init_row(
1790 sbrec_port_binding_by_datapath
);
1791 sbrec_port_binding_index_set_datapath(target
, ld
->datapath
);
1793 struct sbrec_port_binding
*pb
;
1794 SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb
, target
,
1795 sbrec_port_binding_by_datapath
) {
1796 if (!smap_get_bool(&pb
->options
, "ipv6_ra_send_periodic", false)) {
1800 const char *peer_s
= smap_get(&pb
->options
, "peer");
1805 const struct sbrec_port_binding
*peer
1806 = lport_lookup_by_name(sbrec_port_binding_by_name
, peer_s
);
1811 struct ipv6_ra_config
*config
= ipv6_ra_update_config(pb
);
1816 struct ipv6_ra_state
*ra
1817 = shash_find_data(&ipv6_ras
, pb
->logical_port
);
1819 ra
= xzalloc(sizeof *ra
);
1820 ra
->config
= config
;
1821 ra
->next_announce
= ipv6_ra_calc_next_announce(
1822 ra
->config
->min_interval
,
1823 ra
->config
->max_interval
);
1824 shash_add(&ipv6_ras
, pb
->logical_port
, ra
);
1826 if (config
->min_interval
!= ra
->config
->min_interval
||
1827 config
->max_interval
!= ra
->config
->max_interval
)
1828 ra
->next_announce
= ipv6_ra_calc_next_announce(
1829 config
->min_interval
,
1830 config
->max_interval
);
1831 ipv6_ra_config_delete(ra
->config
);
1832 ra
->config
= config
;
1835 /* Peer is the logical switch port that the logical
1836 * router port is connected to. The RA is injected
1837 * into that logical switch port.
1839 ra
->port_key
= peer
->tunnel_key
;
1840 ra
->metadata
= peer
->datapath
->tunnel_key
;
1841 ra
->delete_me
= false;
1843 long long int next_ra
= ipv6_ra_send(ra
);
1844 if (send_ipv6_ra_time
> next_ra
) {
1845 send_ipv6_ra_time
= next_ra
;
1848 sbrec_port_binding_index_destroy_row(target
);
1851 /* Remove those that are no longer in the SB database */
1852 SHASH_FOR_EACH_SAFE (iter
, iter_next
, &ipv6_ras
) {
1853 struct ipv6_ra_state
*ra
= iter
->data
;
1854 if (ra
->delete_me
) {
1855 shash_delete(&ipv6_ras
, iter
);
1862 pinctrl_wait(struct ovsdb_idl_txn
*ovnsb_idl_txn
)
1864 wait_put_mac_bindings(ovnsb_idl_txn
);
1865 rconn_run_wait(swconn
);
1866 rconn_recv_wait(swconn
);
1872 pinctrl_destroy(void)
1874 rconn_destroy(swconn
);
1875 destroy_put_mac_bindings();
1876 destroy_send_garps();
1878 destroy_buffered_packets_map();
1881 /* Implementation of the "put_arp" and "put_nd" OVN actions. These
1882 * actions send a packet to ovn-controller, using the flow as an API
1883 * (see actions.h for details). This code implements the actions by
1884 * updating the MAC_Binding table in the southbound database.
1886 * This code could be a lot simpler if the database could always be updated,
1887 * but in fact we can only update it when 'ovnsb_idl_txn' is nonnull. Thus,
1888 * we buffer up a few put_mac_bindings (but we don't keep them longer
1889 * than 1 second) and apply them whenever a database transaction is
1892 /* Buffered "put_mac_binding" operation. */
1893 struct put_mac_binding
{
1894 struct hmap_node hmap_node
; /* In 'put_mac_bindings'. */
1896 long long int timestamp
; /* In milliseconds. */
1901 struct in6_addr ip_key
;
1904 struct eth_addr mac
;
1907 /* Contains "struct put_mac_binding"s. */
1908 static struct hmap put_mac_bindings
;
1911 init_put_mac_bindings(void)
1913 hmap_init(&put_mac_bindings
);
1917 destroy_put_mac_bindings(void)
1919 flush_put_mac_bindings();
1920 hmap_destroy(&put_mac_bindings
);
1923 static struct put_mac_binding
*
1924 pinctrl_find_put_mac_binding(uint32_t dp_key
, uint32_t port_key
,
1925 const struct in6_addr
*ip_key
, uint32_t hash
)
1927 struct put_mac_binding
*pa
;
1928 HMAP_FOR_EACH_WITH_HASH (pa
, hmap_node
, hash
, &put_mac_bindings
) {
1929 if (pa
->dp_key
== dp_key
1930 && pa
->port_key
== port_key
1931 && IN6_ARE_ADDR_EQUAL(&pa
->ip_key
, ip_key
)) {
1939 pinctrl_handle_put_mac_binding(const struct flow
*md
,
1940 const struct flow
*headers
, bool is_arp
)
1942 uint32_t dp_key
= ntohll(md
->metadata
);
1943 uint32_t port_key
= md
->regs
[MFF_LOG_INPORT
- MFF_REG0
];
1944 struct buffered_packets
*bp
;
1945 struct in6_addr ip_key
;
1948 ip_key
= in6_addr_mapped_ipv4(htonl(md
->regs
[0]));
1950 ovs_be128 ip6
= hton128(flow_get_xxreg(md
, 0));
1951 memcpy(&ip_key
, &ip6
, sizeof ip_key
);
1953 uint32_t hash
= hash_bytes(&ip_key
, sizeof ip_key
,
1954 hash_2words(dp_key
, port_key
));
1955 struct put_mac_binding
*pmb
1956 = pinctrl_find_put_mac_binding(dp_key
, port_key
, &ip_key
, hash
);
1958 if (hmap_count(&put_mac_bindings
) >= 1000) {
1959 COVERAGE_INC(pinctrl_drop_put_mac_binding
);
1963 pmb
= xmalloc(sizeof *pmb
);
1964 hmap_insert(&put_mac_bindings
, &pmb
->hmap_node
, hash
);
1965 pmb
->dp_key
= dp_key
;
1966 pmb
->port_key
= port_key
;
1967 pmb
->ip_key
= ip_key
;
1969 pmb
->timestamp
= time_msec();
1970 pmb
->mac
= headers
->dl_src
;
1972 /* send queued pkts */
1973 uint32_t bhash
= hash_bytes(&ip_key
, sizeof ip_key
, 0);
1974 bp
= pinctrl_find_buffered_packets(&ip_key
, bhash
);
1976 buffered_send_packets(bp
, &pmb
->mac
);
1980 static const struct sbrec_mac_binding
*
1981 mac_binding_lookup(struct ovsdb_idl_index
*sbrec_mac_binding_by_lport_ip
,
1982 const char *logical_port
,
1985 struct sbrec_mac_binding
*mb
= sbrec_mac_binding_index_init_row(
1986 sbrec_mac_binding_by_lport_ip
);
1987 sbrec_mac_binding_index_set_logical_port(mb
, logical_port
);
1988 sbrec_mac_binding_index_set_ip(mb
, ip
);
1990 const struct sbrec_mac_binding
*retval
1991 = sbrec_mac_binding_index_find(sbrec_mac_binding_by_lport_ip
,
1994 sbrec_mac_binding_index_destroy_row(mb
);
2000 run_put_mac_binding(struct ovsdb_idl_txn
*ovnsb_idl_txn
,
2001 struct ovsdb_idl_index
*sbrec_datapath_binding_by_key
,
2002 struct ovsdb_idl_index
*sbrec_port_binding_by_key
,
2003 struct ovsdb_idl_index
*sbrec_mac_binding_by_lport_ip
,
2004 const struct put_mac_binding
*pmb
)
2006 if (time_msec() > pmb
->timestamp
+ 1000) {
2010 /* Convert logical datapath and logical port key into lport. */
2011 const struct sbrec_port_binding
*pb
= lport_lookup_by_key(
2012 sbrec_datapath_binding_by_key
, sbrec_port_binding_by_key
,
2013 pmb
->dp_key
, pmb
->port_key
);
2015 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
2017 VLOG_WARN_RL(&rl
, "unknown logical port with datapath %"PRIu32
" "
2018 "and port %"PRIu32
, pmb
->dp_key
, pmb
->port_key
);
2022 /* Convert ethernet argument to string form for database. */
2023 char mac_string
[ETH_ADDR_STRLEN
+ 1];
2024 snprintf(mac_string
, sizeof mac_string
,
2025 ETH_ADDR_FMT
, ETH_ADDR_ARGS(pmb
->mac
));
2027 struct ds ip_s
= DS_EMPTY_INITIALIZER
;
2028 ipv6_format_mapped(&pmb
->ip_key
, &ip_s
);
2030 /* Update or add an IP-MAC binding for this logical port. */
2031 const struct sbrec_mac_binding
*b
=
2032 mac_binding_lookup(sbrec_mac_binding_by_lport_ip
, pb
->logical_port
,
2035 b
= sbrec_mac_binding_insert(ovnsb_idl_txn
);
2036 sbrec_mac_binding_set_logical_port(b
, pb
->logical_port
);
2037 sbrec_mac_binding_set_ip(b
, ds_cstr(&ip_s
));
2038 sbrec_mac_binding_set_mac(b
, mac_string
);
2039 sbrec_mac_binding_set_datapath(b
, pb
->datapath
);
2040 } else if (strcmp(b
->mac
, mac_string
)) {
2041 sbrec_mac_binding_set_mac(b
, mac_string
);
2047 run_put_mac_bindings(struct ovsdb_idl_txn
*ovnsb_idl_txn
,
2048 struct ovsdb_idl_index
*sbrec_datapath_binding_by_key
,
2049 struct ovsdb_idl_index
*sbrec_port_binding_by_key
,
2050 struct ovsdb_idl_index
*sbrec_mac_binding_by_lport_ip
)
2052 if (!ovnsb_idl_txn
) {
2056 const struct put_mac_binding
*pmb
;
2057 HMAP_FOR_EACH (pmb
, hmap_node
, &put_mac_bindings
) {
2058 run_put_mac_binding(ovnsb_idl_txn
, sbrec_datapath_binding_by_key
,
2059 sbrec_port_binding_by_key
,
2060 sbrec_mac_binding_by_lport_ip
,
2063 flush_put_mac_bindings();
2067 wait_put_mac_bindings(struct ovsdb_idl_txn
*ovnsb_idl_txn
)
2069 if (ovnsb_idl_txn
&& !hmap_is_empty(&put_mac_bindings
)) {
2070 poll_immediate_wake();
2075 flush_put_mac_bindings(void)
2077 struct put_mac_binding
*pmb
;
2078 HMAP_FOR_EACH_POP (pmb
, hmap_node
, &put_mac_bindings
) {
2084 * Send gratuitous ARP for vif on localnet.
2086 * When a new vif on localnet is added, gratuitous ARPs are sent announcing
2087 * the port's mac,ip mapping. On localnet, such announcements are needed for
2088 * switches and routers on the broadcast segment to update their port-mac
2092 struct eth_addr ea
; /* Ethernet address of port. */
2093 ovs_be32 ipv4
; /* Ipv4 address of port. */
2094 long long int announce_time
; /* Next announcement in ms. */
2095 int backoff
; /* Backoff for the next announcement. */
2096 uint32_t dp_key
; /* Datapath used to output this GARP. */
2097 uint32_t port_key
; /* Port to inject the GARP into. */
2100 /* Contains GARPs to be sent. */
2101 static struct shash send_garp_data
;
2103 /* Next GARP announcement in ms. */
2104 static long long int send_garp_time
;
2107 init_send_garps(void)
2109 shash_init(&send_garp_data
);
2110 send_garp_time
= LLONG_MAX
;
2114 destroy_send_garps(void)
2116 shash_destroy_free_data(&send_garp_data
);
2120 add_garp(const char *name
, const struct eth_addr ea
, ovs_be32 ip
,
2121 uint32_t dp_key
, uint32_t port_key
)
2123 struct garp_data
*garp
= xmalloc(sizeof *garp
);
2126 garp
->announce_time
= time_msec() + 1000;
2128 garp
->dp_key
= dp_key
;
2129 garp
->port_key
= port_key
;
2130 shash_add(&send_garp_data
, name
, garp
);
2133 /* Add or update a vif for which GARPs need to be announced. */
2135 send_garp_update(const struct sbrec_port_binding
*binding_rec
,
2136 struct shash
*nat_addresses
)
2138 volatile struct garp_data
*garp
= NULL
;
2139 /* Update GARP for NAT IP if it exists. Consider port bindings with type
2140 * "l3gateway" for logical switch ports attached to gateway routers, and
2141 * port bindings with type "patch" for logical switch ports attached to
2142 * distributed gateway ports. */
2143 if (!strcmp(binding_rec
->type
, "l3gateway")
2144 || !strcmp(binding_rec
->type
, "patch")) {
2145 struct lport_addresses
*laddrs
= NULL
;
2146 while ((laddrs
= shash_find_and_delete(nat_addresses
,
2147 binding_rec
->logical_port
))) {
2149 for (i
= 0; i
< laddrs
->n_ipv4_addrs
; i
++) {
2150 char *name
= xasprintf("%s-%s", binding_rec
->logical_port
,
2151 laddrs
->ipv4_addrs
[i
].addr_s
);
2152 garp
= shash_find_data(&send_garp_data
, name
);
2154 garp
->dp_key
= binding_rec
->datapath
->tunnel_key
;
2155 garp
->port_key
= binding_rec
->tunnel_key
;
2157 add_garp(name
, laddrs
->ea
,
2158 laddrs
->ipv4_addrs
[i
].addr
,
2159 binding_rec
->datapath
->tunnel_key
,
2160 binding_rec
->tunnel_key
);
2164 destroy_lport_addresses(laddrs
);
2170 /* Update GARP for vif if it exists. */
2171 garp
= shash_find_data(&send_garp_data
, binding_rec
->logical_port
);
2173 garp
->dp_key
= binding_rec
->datapath
->tunnel_key
;
2174 garp
->port_key
= binding_rec
->tunnel_key
;
2178 /* Add GARP for new vif. */
2180 for (i
= 0; i
< binding_rec
->n_mac
; i
++) {
2181 struct lport_addresses laddrs
;
2182 if (!extract_lsp_addresses(binding_rec
->mac
[i
], &laddrs
)
2183 || !laddrs
.n_ipv4_addrs
) {
2187 add_garp(binding_rec
->logical_port
,
2188 laddrs
.ea
, laddrs
.ipv4_addrs
[0].addr
,
2189 binding_rec
->datapath
->tunnel_key
, binding_rec
->tunnel_key
);
2191 destroy_lport_addresses(&laddrs
);
2196 /* Remove a vif from GARP announcements. */
2198 send_garp_delete(const char *lport
)
2200 struct garp_data
*garp
= shash_find_and_delete(&send_garp_data
, lport
);
2204 static long long int
2205 send_garp(struct garp_data
*garp
, long long int current_time
)
2207 if (current_time
< garp
->announce_time
) {
2208 return garp
->announce_time
;
2211 /* Compose a GARP request packet. */
2212 uint64_t packet_stub
[128 / 8];
2213 struct dp_packet packet
;
2214 dp_packet_use_stub(&packet
, packet_stub
, sizeof packet_stub
);
2215 compose_arp(&packet
, ARP_OP_REQUEST
, garp
->ea
, eth_addr_zero
,
2216 true, garp
->ipv4
, garp
->ipv4
);
2218 /* Inject GARP request. */
2219 uint64_t ofpacts_stub
[4096 / 8];
2220 struct ofpbuf ofpacts
= OFPBUF_STUB_INITIALIZER(ofpacts_stub
);
2221 enum ofp_version version
= rconn_get_version(swconn
);
2222 put_load(garp
->dp_key
, MFF_LOG_DATAPATH
, 0, 64, &ofpacts
);
2223 put_load(garp
->port_key
, MFF_LOG_INPORT
, 0, 32, &ofpacts
);
2224 struct ofpact_resubmit
*resubmit
= ofpact_put_RESUBMIT(&ofpacts
);
2225 resubmit
->in_port
= OFPP_CONTROLLER
;
2226 resubmit
->table_id
= OFTABLE_LOG_INGRESS_PIPELINE
;
2228 struct ofputil_packet_out po
= {
2229 .packet
= dp_packet_data(&packet
),
2230 .packet_len
= dp_packet_size(&packet
),
2231 .buffer_id
= UINT32_MAX
,
2232 .ofpacts
= ofpacts
.data
,
2233 .ofpacts_len
= ofpacts
.size
,
2235 match_set_in_port(&po
.flow_metadata
, OFPP_CONTROLLER
);
2236 enum ofputil_protocol proto
= ofputil_protocol_from_ofp_version(version
);
2237 queue_msg(ofputil_encode_packet_out(&po
, proto
));
2238 dp_packet_uninit(&packet
);
2239 ofpbuf_uninit(&ofpacts
);
2241 /* Set the next announcement. At most 5 announcements are sent for a
2243 if (garp
->backoff
< 16) {
2245 garp
->announce_time
= current_time
+ garp
->backoff
* 1000;
2247 garp
->announce_time
= LLONG_MAX
;
2249 return garp
->announce_time
;
2252 /* Get localnet vifs, local l3gw ports and ofport for localnet patch ports. */
2254 get_localnet_vifs_l3gwports(
2255 struct ovsdb_idl_index
*sbrec_port_binding_by_datapath
,
2256 struct ovsdb_idl_index
*sbrec_port_binding_by_name
,
2257 const struct ovsrec_bridge
*br_int
,
2258 const struct sbrec_chassis
*chassis
,
2259 const struct hmap
*local_datapaths
,
2260 struct sset
*localnet_vifs
,
2261 struct sset
*local_l3gw_ports
)
2263 for (int i
= 0; i
< br_int
->n_ports
; i
++) {
2264 const struct ovsrec_port
*port_rec
= br_int
->ports
[i
];
2265 if (!strcmp(port_rec
->name
, br_int
->name
)) {
2268 const char *tunnel_id
= smap_get(&port_rec
->external_ids
,
2270 if (tunnel_id
&& strstr(tunnel_id
, chassis
->name
)) {
2273 const char *localnet
= smap_get(&port_rec
->external_ids
,
2274 "ovn-localnet-port");
2278 for (int j
= 0; j
< port_rec
->n_interfaces
; j
++) {
2279 const struct ovsrec_interface
*iface_rec
= port_rec
->interfaces
[j
];
2280 if (!iface_rec
->n_ofport
) {
2283 /* Get localnet vif. */
2284 const char *iface_id
= smap_get(&iface_rec
->external_ids
,
2289 const struct sbrec_port_binding
*pb
2290 = lport_lookup_by_name(sbrec_port_binding_by_name
, iface_id
);
2294 struct local_datapath
*ld
2295 = get_local_datapath(local_datapaths
,
2296 pb
->datapath
->tunnel_key
);
2297 if (ld
&& ld
->localnet_port
) {
2298 sset_add(localnet_vifs
, iface_id
);
2303 struct sbrec_port_binding
*target
= sbrec_port_binding_index_init_row(
2304 sbrec_port_binding_by_datapath
);
2306 const struct local_datapath
*ld
;
2307 HMAP_FOR_EACH (ld
, hmap_node
, local_datapaths
) {
2308 const struct sbrec_port_binding
*pb
;
2310 if (!ld
->localnet_port
) {
2314 /* Get l3gw ports. Consider port bindings with type "l3gateway"
2315 * that connect to gateway routers (if local), and consider port
2316 * bindings of type "patch" since they might connect to
2317 * distributed gateway ports with NAT addresses. */
2319 sbrec_port_binding_index_set_datapath(target
, ld
->datapath
);
2320 SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb
, target
,
2321 sbrec_port_binding_by_datapath
) {
2322 if ((ld
->has_local_l3gateway
&& !strcmp(pb
->type
, "l3gateway"))
2323 || !strcmp(pb
->type
, "patch")) {
2324 sset_add(local_l3gw_ports
, pb
->logical_port
);
2328 sbrec_port_binding_index_destroy_row(target
);
2332 pinctrl_is_chassis_resident(struct ovsdb_idl_index
*sbrec_chassis_by_name
,
2333 struct ovsdb_idl_index
*sbrec_port_binding_by_name
,
2334 const struct sbrec_chassis
*chassis
,
2335 const struct sset
*active_tunnels
,
2336 const char *port_name
)
2338 const struct sbrec_port_binding
*pb
2339 = lport_lookup_by_name(sbrec_port_binding_by_name
, port_name
);
2340 if (!pb
|| !pb
->chassis
) {
2343 if (strcmp(pb
->type
, "chassisredirect")) {
2344 return pb
->chassis
== chassis
;
2346 struct ovs_list
*gateway_chassis
=
2347 gateway_chassis_get_ordered(sbrec_chassis_by_name
, pb
);
2348 bool active
= gateway_chassis_is_active(gateway_chassis
,
2351 gateway_chassis_destroy(gateway_chassis
);
2356 /* Extracts the mac, IPv4 and IPv6 addresses, and logical port from
2357 * 'addresses' which should be of the format 'MAC [IP1 IP2 ..]
2358 * [is_chassis_resident("LPORT_NAME")]', where IPn should be a valid IPv4
2359 * or IPv6 address, and stores them in the 'ipv4_addrs' and 'ipv6_addrs'
2360 * fields of 'laddrs'. The logical port name is stored in 'lport'.
2362 * Returns true if at least 'MAC' is found in 'address', false otherwise.
2364 * The caller must call destroy_lport_addresses() and free(*lport). */
2366 extract_addresses_with_port(const char *addresses
,
2367 struct lport_addresses
*laddrs
,
2371 if (!extract_addresses(addresses
, laddrs
, &ofs
)) {
2373 } else if (ofs
>= strlen(addresses
)) {
2378 lexer_init(&lexer
, addresses
+ ofs
);
2381 if (lexer
.error
|| lexer
.token
.type
!= LEX_T_ID
2382 || !lexer_match_id(&lexer
, "is_chassis_resident")) {
2383 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
2384 VLOG_INFO_RL(&rl
, "invalid syntax '%s' in address", addresses
);
2385 lexer_destroy(&lexer
);
2389 if (!lexer_match(&lexer
, LEX_T_LPAREN
)) {
2390 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
2391 VLOG_INFO_RL(&rl
, "Syntax error: expecting '(' after "
2392 "'is_chassis_resident' in address '%s'", addresses
);
2393 lexer_destroy(&lexer
);
2397 if (lexer
.token
.type
!= LEX_T_STRING
) {
2398 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
2400 "Syntax error: expecting quoted string after "
2401 "'is_chassis_resident' in address '%s'", addresses
);
2402 lexer_destroy(&lexer
);
2406 *lport
= xstrdup(lexer
.token
.s
);
2409 if (!lexer_match(&lexer
, LEX_T_RPAREN
)) {
2410 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
2411 VLOG_INFO_RL(&rl
, "Syntax error: expecting ')' after quoted string in "
2412 "'is_chassis_resident()' in address '%s'",
2414 lexer_destroy(&lexer
);
2418 lexer_destroy(&lexer
);
2423 consider_nat_address(struct ovsdb_idl_index
*sbrec_chassis_by_name
,
2424 struct ovsdb_idl_index
*sbrec_port_binding_by_name
,
2425 const char *nat_address
,
2426 const struct sbrec_port_binding
*pb
,
2427 struct sset
*nat_address_keys
,
2428 const struct sbrec_chassis
*chassis
,
2429 const struct sset
*active_tunnels
,
2430 struct shash
*nat_addresses
)
2432 struct lport_addresses
*laddrs
= xmalloc(sizeof *laddrs
);
2434 if (!extract_addresses_with_port(nat_address
, laddrs
, &lport
)
2435 || (!lport
&& !strcmp(pb
->type
, "patch"))
2436 || (lport
&& !pinctrl_is_chassis_resident(
2437 sbrec_chassis_by_name
, sbrec_port_binding_by_name
, chassis
,
2438 active_tunnels
, lport
))) {
2439 destroy_lport_addresses(laddrs
);
2447 for (i
= 0; i
< laddrs
->n_ipv4_addrs
; i
++) {
2448 char *name
= xasprintf("%s-%s", pb
->logical_port
,
2449 laddrs
->ipv4_addrs
[i
].addr_s
);
2450 sset_add(nat_address_keys
, name
);
2453 shash_add(nat_addresses
, pb
->logical_port
, laddrs
);
2457 get_nat_addresses_and_keys(struct ovsdb_idl_index
*sbrec_chassis_by_name
,
2458 struct ovsdb_idl_index
*sbrec_port_binding_by_name
,
2459 struct sset
*nat_address_keys
,
2460 struct sset
*local_l3gw_ports
,
2461 const struct sbrec_chassis
*chassis
,
2462 const struct sset
*active_tunnels
,
2463 struct shash
*nat_addresses
)
2465 const char *gw_port
;
2466 SSET_FOR_EACH(gw_port
, local_l3gw_ports
) {
2467 const struct sbrec_port_binding
*pb
;
2469 pb
= lport_lookup_by_name(sbrec_port_binding_by_name
, gw_port
);
2474 if (pb
->n_nat_addresses
) {
2475 for (int i
= 0; i
< pb
->n_nat_addresses
; i
++) {
2476 consider_nat_address(sbrec_chassis_by_name
,
2477 sbrec_port_binding_by_name
,
2478 pb
->nat_addresses
[i
], pb
,
2479 nat_address_keys
, chassis
,
2484 /* Continue to support options:nat-addresses for version
2486 const char *nat_addresses_options
= smap_get(&pb
->options
,
2488 if (nat_addresses_options
) {
2489 consider_nat_address(sbrec_chassis_by_name
,
2490 sbrec_port_binding_by_name
,
2491 nat_addresses_options
, pb
,
2492 nat_address_keys
, chassis
,
2501 send_garp_wait(void)
2503 poll_timer_wait_until(send_garp_time
);
2507 send_garp_run(struct ovsdb_idl_index
*sbrec_chassis_by_name
,
2508 struct ovsdb_idl_index
*sbrec_port_binding_by_datapath
,
2509 struct ovsdb_idl_index
*sbrec_port_binding_by_name
,
2510 const struct ovsrec_bridge
*br_int
,
2511 const struct sbrec_chassis
*chassis
,
2512 const struct hmap
*local_datapaths
,
2513 const struct sset
*active_tunnels
)
2515 struct sset localnet_vifs
= SSET_INITIALIZER(&localnet_vifs
);
2516 struct sset local_l3gw_ports
= SSET_INITIALIZER(&local_l3gw_ports
);
2517 struct sset nat_ip_keys
= SSET_INITIALIZER(&nat_ip_keys
);
2518 struct shash nat_addresses
;
2520 shash_init(&nat_addresses
);
2522 get_localnet_vifs_l3gwports(sbrec_port_binding_by_datapath
,
2523 sbrec_port_binding_by_name
,
2524 br_int
, chassis
, local_datapaths
,
2525 &localnet_vifs
, &local_l3gw_ports
);
2527 get_nat_addresses_and_keys(sbrec_chassis_by_name
,
2528 sbrec_port_binding_by_name
,
2529 &nat_ip_keys
, &local_l3gw_ports
,
2530 chassis
, active_tunnels
,
2532 /* For deleted ports and deleted nat ips, remove from send_garp_data. */
2533 struct shash_node
*iter
, *next
;
2534 SHASH_FOR_EACH_SAFE (iter
, next
, &send_garp_data
) {
2535 if (!sset_contains(&localnet_vifs
, iter
->name
) &&
2536 !sset_contains(&nat_ip_keys
, iter
->name
)) {
2537 send_garp_delete(iter
->name
);
2541 /* Update send_garp_data. */
2542 const char *iface_id
;
2543 SSET_FOR_EACH (iface_id
, &localnet_vifs
) {
2544 const struct sbrec_port_binding
*pb
= lport_lookup_by_name(
2545 sbrec_port_binding_by_name
, iface_id
);
2547 send_garp_update(pb
, &nat_addresses
);
2551 /* Update send_garp_data for nat-addresses. */
2552 const char *gw_port
;
2553 SSET_FOR_EACH (gw_port
, &local_l3gw_ports
) {
2554 const struct sbrec_port_binding
*pb
2555 = lport_lookup_by_name(sbrec_port_binding_by_name
, gw_port
);
2557 send_garp_update(pb
, &nat_addresses
);
2561 /* Send GARPs, and update the next announcement. */
2562 long long int current_time
= time_msec();
2563 send_garp_time
= LLONG_MAX
;
2564 SHASH_FOR_EACH (iter
, &send_garp_data
) {
2565 long long int next_announce
= send_garp(iter
->data
, current_time
);
2566 if (send_garp_time
> next_announce
) {
2567 send_garp_time
= next_announce
;
2570 sset_destroy(&localnet_vifs
);
2571 sset_destroy(&local_l3gw_ports
);
2573 SHASH_FOR_EACH_SAFE (iter
, next
, &nat_addresses
) {
2574 struct lport_addresses
*laddrs
= iter
->data
;
2575 destroy_lport_addresses(laddrs
);
2576 shash_delete(&nat_addresses
, iter
);
2579 shash_destroy(&nat_addresses
);
2581 sset_destroy(&nat_ip_keys
);
2585 reload_metadata(struct ofpbuf
*ofpacts
, const struct match
*md
)
2587 enum mf_field_id md_fields
[] = {
2588 #if FLOW_N_REGS == 16
2610 for (size_t i
= 0; i
< ARRAY_SIZE(md_fields
); i
++) {
2611 const struct mf_field
*field
= mf_from_id(md_fields
[i
]);
2612 if (!mf_is_all_wild(field
, &md
->wc
)) {
2613 union mf_value value
;
2614 mf_get_value(field
, &md
->flow
, &value
);
2615 ofpact_put_set_field(ofpacts
, field
, &value
, NULL
);
2621 pinctrl_handle_nd_na(const struct flow
*ip_flow
, const struct match
*md
,
2622 struct ofpbuf
*userdata
, bool is_router
)
2624 /* This action only works for IPv6 ND packets, and the switch should only
2625 * send us ND packets this way, but check here just to be sure. */
2626 if (!is_nd(ip_flow
, NULL
)) {
2627 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
2628 VLOG_WARN_RL(&rl
, "NA action on non-ND packet");
2632 uint64_t packet_stub
[128 / 8];
2633 struct dp_packet packet
;
2634 dp_packet_use_stub(&packet
, packet_stub
, sizeof packet_stub
);
2636 /* These flags are not exactly correct. Look at section 7.2.4
2638 uint32_t rso_flags
= ND_RSO_SOLICITED
| ND_RSO_OVERRIDE
;
2640 rso_flags
|= ND_RSO_ROUTER
;
2642 compose_nd_na(&packet
, ip_flow
->dl_dst
, ip_flow
->dl_src
,
2643 &ip_flow
->nd_target
, &ip_flow
->ipv6_src
,
2646 /* Reload previous packet metadata and set actions from userdata. */
2647 set_actions_and_enqueue_msg(&packet
, md
, userdata
);
2648 dp_packet_uninit(&packet
);
2652 pinctrl_handle_nd_ns(const struct flow
*ip_flow
, struct dp_packet
*pkt_in
,
2653 const struct match
*md
, struct ofpbuf
*userdata
)
2655 /* This action only works for IPv6 packets. */
2656 if (get_dl_type(ip_flow
) != htons(ETH_TYPE_IPV6
)) {
2657 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
2658 VLOG_WARN_RL(&rl
, "NS action on non-IPv6 packet");
2662 pinctrl_handle_buffered_packets(ip_flow
, pkt_in
, md
, false);
2664 uint64_t packet_stub
[128 / 8];
2665 struct dp_packet packet
;
2666 dp_packet_use_stub(&packet
, packet_stub
, sizeof packet_stub
);
2668 compose_nd_ns(&packet
, ip_flow
->dl_src
, &ip_flow
->ipv6_src
,
2669 &ip_flow
->ipv6_dst
);
2671 /* Reload previous packet metadata and set actions from userdata. */
2672 set_actions_and_enqueue_msg(&packet
, md
, userdata
);
2673 dp_packet_uninit(&packet
);
2677 pinctrl_handle_put_nd_ra_opts(
2678 const struct flow
*in_flow
, struct dp_packet
*pkt_in
,
2679 struct ofputil_packet_in
*pin
, struct ofpbuf
*userdata
,
2680 struct ofpbuf
*continuation
)
2682 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
2683 enum ofp_version version
= rconn_get_version(swconn
);
2684 enum ofputil_protocol proto
= ofputil_protocol_from_ofp_version(version
);
2685 struct dp_packet
*pkt_out_ptr
= NULL
;
2686 uint32_t success
= 0;
2688 /* Parse result field. */
2689 const struct mf_field
*f
;
2690 enum ofperr ofperr
= nx_pull_header(userdata
, NULL
, &f
, NULL
);
2692 VLOG_WARN_RL(&rl
, "bad result OXM (%s)", ofperr_to_string(ofperr
));
2696 /* Parse result offset. */
2697 ovs_be32
*ofsp
= ofpbuf_try_pull(userdata
, sizeof *ofsp
);
2699 VLOG_WARN_RL(&rl
, "offset not present in the userdata");
2703 /* Check that the result is valid and writable. */
2704 struct mf_subfield dst
= { .field
= f
, .ofs
= ntohl(*ofsp
), .n_bits
= 1 };
2705 ofperr
= mf_check_dst(&dst
, NULL
);
2707 VLOG_WARN_RL(&rl
, "bad result bit (%s)", ofperr_to_string(ofperr
));
2711 if (!userdata
->size
) {
2712 VLOG_WARN_RL(&rl
, "IPv6 ND RA options not present in the userdata");
2716 if (!is_icmpv6(in_flow
, NULL
) || in_flow
->tp_dst
!= htons(0) ||
2717 in_flow
->tp_src
!= htons(ND_ROUTER_SOLICIT
)) {
2718 VLOG_WARN_RL(&rl
, "put_nd_ra action on invalid or unsupported packet");
2722 size_t new_packet_size
= pkt_in
->l4_ofs
+ userdata
->size
;
2723 struct dp_packet pkt_out
;
2724 dp_packet_init(&pkt_out
, new_packet_size
);
2725 dp_packet_clear(&pkt_out
);
2726 dp_packet_prealloc_tailroom(&pkt_out
, new_packet_size
);
2727 pkt_out_ptr
= &pkt_out
;
2729 /* Copy L2 and L3 headers from pkt_in. */
2730 dp_packet_put(&pkt_out
, dp_packet_pull(pkt_in
, pkt_in
->l4_ofs
),
2733 pkt_out
.l2_5_ofs
= pkt_in
->l2_5_ofs
;
2734 pkt_out
.l2_pad_size
= pkt_in
->l2_pad_size
;
2735 pkt_out
.l3_ofs
= pkt_in
->l3_ofs
;
2736 pkt_out
.l4_ofs
= pkt_in
->l4_ofs
;
2738 /* Copy the ICMPv6 Router Advertisement data from 'userdata' field. */
2739 dp_packet_put(&pkt_out
, userdata
->data
, userdata
->size
);
2741 /* Set the IPv6 payload length and calculate the ICMPv6 checksum. */
2742 struct ovs_16aligned_ip6_hdr
*nh
= dp_packet_l3(&pkt_out
);
2743 nh
->ip6_plen
= htons(userdata
->size
);
2744 struct ovs_ra_msg
*ra
= dp_packet_l4(&pkt_out
);
2745 ra
->icmph
.icmp6_cksum
= 0;
2746 uint32_t icmp_csum
= packet_csum_pseudoheader6(nh
);
2747 ra
->icmph
.icmp6_cksum
= csum_finish(csum_continue(
2748 icmp_csum
, ra
, userdata
->size
));
2749 pin
->packet
= dp_packet_data(&pkt_out
);
2750 pin
->packet_len
= dp_packet_size(&pkt_out
);
2755 union mf_subvalue sv
;
2756 sv
.u8_val
= success
;
2757 mf_write_subfield(&dst
, &sv
, &pin
->flow_metadata
);
2759 queue_msg(ofputil_encode_resume(pin
, continuation
, proto
));
2760 dp_packet_uninit(pkt_out_ptr
);