1 /* Copyright (c) 2015, 2016, 2017 Red Hat, Inc.
2 * Copyright (c) 2017 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
24 #include "dp-packet.h"
29 #include "ovn-controller.h"
30 #include "lib/packets.h"
32 #include "openvswitch/ofp-actions.h"
33 #include "openvswitch/ofp-msgs.h"
34 #include "openvswitch/ofp-packet.h"
35 #include "openvswitch/ofp-print.h"
36 #include "openvswitch/ofp-switch.h"
37 #include "openvswitch/ofp-util.h"
38 #include "openvswitch/vlog.h"
41 #include "ovn-controller.h"
42 #include "ovn/actions.h"
44 #include "ovn/lib/acl-log.h"
45 #include "ovn/lib/logical-fields.h"
46 #include "ovn/lib/ovn-l7.h"
47 #include "ovn/lib/ovn-util.h"
48 #include "openvswitch/poll-loop.h"
49 #include "openvswitch/rconn.h"
50 #include "socket-util.h"
52 #include "vswitch-idl.h"
55 VLOG_DEFINE_THIS_MODULE(pinctrl
);
57 /* OpenFlow connection to the switch. */
58 static struct rconn
*swconn
;
60 /* Last seen sequence number for 'swconn'. When this differs from
61 * rconn_get_connection_seqno(rconn), 'swconn' has reconnected. */
62 static unsigned int conn_seq_no
;
64 static void pinctrl_handle_put_mac_binding(const struct flow
*md
,
65 const struct flow
*headers
,
67 static void init_put_mac_bindings(void);
68 static void destroy_put_mac_bindings(void);
69 static void run_put_mac_bindings(struct controller_ctx
*);
70 static void wait_put_mac_bindings(struct controller_ctx
*);
71 static void flush_put_mac_bindings(void);
73 static void init_send_garps(void);
74 static void destroy_send_garps(void);
75 static void send_garp_wait(void);
76 static void send_garp_run(struct controller_ctx
*ctx
,
77 const struct ovsrec_bridge
*,
78 const struct sbrec_chassis
*,
79 const struct chassis_index
*chassis_index
,
80 struct hmap
*local_datapaths
,
81 struct sset
*active_tunnels
);
82 static void pinctrl_handle_nd_na(const struct flow
*ip_flow
,
83 const struct match
*md
,
84 struct ofpbuf
*userdata
);
85 static void reload_metadata(struct ofpbuf
*ofpacts
,
86 const struct match
*md
);
87 static void pinctrl_handle_put_nd_ra_opts(
88 const struct flow
*ip_flow
, struct dp_packet
*pkt_in
,
89 struct ofputil_packet_in
*pin
, struct ofpbuf
*userdata
,
90 struct ofpbuf
*continuation
);
91 static void pinctrl_handle_nd_ns(const struct flow
*ip_flow
,
92 const struct match
*md
,
93 struct ofpbuf
*userdata
);
94 static void init_ipv6_ras(void);
95 static void destroy_ipv6_ras(void);
96 static void ipv6_ra_wait(void);
97 static void send_ipv6_ras(const struct controller_ctx
*,
98 struct hmap
*local_datapaths
);
100 COVERAGE_DEFINE(pinctrl_drop_put_mac_binding
);
105 swconn
= rconn_create(5, 0, DSCP_DEFAULT
, 1 << OFP13_VERSION
);
107 init_put_mac_bindings();
113 queue_msg(struct ofpbuf
*msg
)
115 const struct ofp_header
*oh
= msg
->data
;
116 ovs_be32 xid
= oh
->xid
;
118 rconn_send(swconn
, msg
, NULL
);
122 /* Sets up global 'swconn', a newly (re)connected connection to a switch. */
126 /* Fetch the switch configuration. The response later will allow us to
127 * change the miss_send_len to UINT16_MAX, so that we can enable
128 * asynchronous messages. */
129 queue_msg(ofpraw_alloc(OFPRAW_OFPT_GET_CONFIG_REQUEST
,
130 rconn_get_version(swconn
), 0));
132 /* Set a packet-in format that supports userdata. */
133 queue_msg(ofputil_encode_set_packet_in_format(rconn_get_version(swconn
),
134 OFPUTIL_PACKET_IN_NXT2
));
138 set_switch_config(struct rconn
*swconn_
,
139 const struct ofputil_switch_config
*config
)
141 enum ofp_version version
= rconn_get_version(swconn_
);
142 struct ofpbuf
*request
= ofputil_encode_set_config(config
, version
);
147 set_actions_and_enqueue_msg(const struct dp_packet
*packet
,
148 const struct match
*md
,
149 struct ofpbuf
*userdata
)
151 /* Copy metadata from 'md' into the packet-out via "set_field"
152 * actions, then add actions from 'userdata'.
154 uint64_t ofpacts_stub
[4096 / 8];
155 struct ofpbuf ofpacts
= OFPBUF_STUB_INITIALIZER(ofpacts_stub
);
156 enum ofp_version version
= rconn_get_version(swconn
);
158 reload_metadata(&ofpacts
, md
);
159 enum ofperr error
= ofpacts_pull_openflow_actions(userdata
, userdata
->size
,
163 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
164 VLOG_WARN_RL(&rl
, "failed to parse actions from userdata (%s)",
165 ofperr_to_string(error
));
166 ofpbuf_uninit(&ofpacts
);
170 struct ofputil_packet_out po
= {
171 .packet
= dp_packet_data(packet
),
172 .packet_len
= dp_packet_size(packet
),
173 .buffer_id
= UINT32_MAX
,
174 .ofpacts
= ofpacts
.data
,
175 .ofpacts_len
= ofpacts
.size
,
177 match_set_in_port(&po
.flow_metadata
, OFPP_CONTROLLER
);
178 enum ofputil_protocol proto
= ofputil_protocol_from_ofp_version(version
);
179 queue_msg(ofputil_encode_packet_out(&po
, proto
));
180 ofpbuf_uninit(&ofpacts
);
184 pinctrl_handle_arp(const struct flow
*ip_flow
, const struct match
*md
,
185 struct ofpbuf
*userdata
)
187 /* This action only works for IP packets, and the switch should only send
188 * us IP packets this way, but check here just to be sure. */
189 if (ip_flow
->dl_type
!= htons(ETH_TYPE_IP
)) {
190 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
191 VLOG_WARN_RL(&rl
, "ARP action on non-IP packet (Ethertype %"PRIx16
")",
192 ntohs(ip_flow
->dl_type
));
196 /* Compose an ARP packet. */
197 uint64_t packet_stub
[128 / 8];
198 struct dp_packet packet
;
199 dp_packet_use_stub(&packet
, packet_stub
, sizeof packet_stub
);
200 compose_arp__(&packet
);
202 struct eth_header
*eth
= dp_packet_eth(&packet
);
203 eth
->eth_dst
= ip_flow
->dl_dst
;
204 eth
->eth_src
= ip_flow
->dl_src
;
206 struct arp_eth_header
*arp
= dp_packet_l3(&packet
);
207 arp
->ar_op
= htons(ARP_OP_REQUEST
);
208 arp
->ar_sha
= ip_flow
->dl_src
;
209 put_16aligned_be32(&arp
->ar_spa
, ip_flow
->nw_src
);
210 arp
->ar_tha
= eth_addr_zero
;
211 put_16aligned_be32(&arp
->ar_tpa
, ip_flow
->nw_dst
);
213 if (ip_flow
->vlans
[0].tci
& htons(VLAN_CFI
)) {
214 eth_push_vlan(&packet
, htons(ETH_TYPE_VLAN_8021Q
),
215 ip_flow
->vlans
[0].tci
);
218 set_actions_and_enqueue_msg(&packet
, md
, userdata
);
219 dp_packet_uninit(&packet
);
223 pinctrl_handle_icmp4(const struct flow
*ip_flow
, const struct match
*md
,
224 struct ofpbuf
*userdata
)
226 /* This action only works for IP packets, and the switch should only send
227 * us IP packets this way, but check here just to be sure. */
228 if (ip_flow
->dl_type
!= htons(ETH_TYPE_IP
)) {
229 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
231 "ICMP4 action on non-IP packet (eth_type 0x%"PRIx16
")",
232 ntohs(ip_flow
->dl_type
));
236 uint64_t packet_stub
[128 / 8];
237 struct dp_packet packet
;
239 dp_packet_use_stub(&packet
, packet_stub
, sizeof packet_stub
);
240 dp_packet_clear(&packet
);
241 packet
.packet_type
= htonl(PT_ETH
);
243 struct eth_header
*eh
= dp_packet_put_zeros(&packet
, sizeof *eh
);
244 eh
->eth_dst
= ip_flow
->dl_dst
;
245 eh
->eth_src
= ip_flow
->dl_src
;
246 eh
->eth_type
= htons(ETH_TYPE_IP
);
248 struct ip_header
*nh
= dp_packet_put_zeros(&packet
, sizeof *nh
);
249 dp_packet_set_l3(&packet
, nh
);
250 nh
->ip_ihl_ver
= IP_IHL_VER(5, 4);
251 nh
->ip_tot_len
= htons(sizeof(struct ip_header
) +
252 sizeof(struct icmp_header
));
253 nh
->ip_proto
= IPPROTO_ICMP
;
254 nh
->ip_frag_off
= htons(IP_DF
);
255 packet_set_ipv4(&packet
, ip_flow
->nw_src
, ip_flow
->nw_dst
,
256 ip_flow
->nw_tos
, 255);
258 struct icmp_header
*ih
= dp_packet_put_zeros(&packet
, sizeof *ih
);
259 dp_packet_set_l4(&packet
, ih
);
260 packet_set_icmp(&packet
, ICMP4_DST_UNREACH
, 1);
262 if (ip_flow
->vlans
[0].tci
& htons(VLAN_CFI
)) {
263 eth_push_vlan(&packet
, htons(ETH_TYPE_VLAN_8021Q
),
264 ip_flow
->vlans
[0].tci
);
267 set_actions_and_enqueue_msg(&packet
, md
, userdata
);
268 dp_packet_uninit(&packet
);
272 pinctrl_handle_tcp_reset(const struct flow
*ip_flow
, struct dp_packet
*pkt_in
,
273 const struct match
*md
, struct ofpbuf
*userdata
)
275 /* This action only works for TCP segments, and the switch should only send
276 * us TCP segments this way, but check here just to be sure. */
277 if (ip_flow
->nw_proto
!= IPPROTO_TCP
) {
278 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
279 VLOG_WARN_RL(&rl
, "TCP_RESET action on non-TCP packet");
283 uint64_t packet_stub
[128 / 8];
284 struct dp_packet packet
;
286 dp_packet_use_stub(&packet
, packet_stub
, sizeof packet_stub
);
287 dp_packet_clear(&packet
);
288 packet
.packet_type
= htonl(PT_ETH
);
290 struct eth_header
*eh
= dp_packet_put_zeros(&packet
, sizeof *eh
);
291 eh
->eth_dst
= ip_flow
->dl_dst
;
292 eh
->eth_src
= ip_flow
->dl_src
;
294 if (get_dl_type(ip_flow
) == htons(ETH_TYPE_IPV6
)) {
295 struct ip6_hdr
*nh
= dp_packet_put_zeros(&packet
, sizeof *nh
);
297 eh
->eth_type
= htons(ETH_TYPE_IPV6
);
298 dp_packet_set_l3(&packet
, nh
);
300 nh
->ip6_nxt
= IPPROTO_TCP
;
301 nh
->ip6_plen
= htons(TCP_HEADER_LEN
);
302 packet_set_ipv6(&packet
, &ip_flow
->ipv6_src
, &ip_flow
->ipv6_dst
,
303 ip_flow
->nw_tos
, ip_flow
->ipv6_label
, 255);
305 struct ip_header
*nh
= dp_packet_put_zeros(&packet
, sizeof *nh
);
307 eh
->eth_type
= htons(ETH_TYPE_IP
);
308 dp_packet_set_l3(&packet
, nh
);
309 nh
->ip_ihl_ver
= IP_IHL_VER(5, 4);
310 nh
->ip_tot_len
= htons(IP_HEADER_LEN
+ TCP_HEADER_LEN
);
311 nh
->ip_proto
= IPPROTO_TCP
;
312 nh
->ip_frag_off
= htons(IP_DF
);
313 packet_set_ipv4(&packet
, ip_flow
->nw_src
, ip_flow
->nw_dst
,
314 ip_flow
->nw_tos
, 255);
317 struct tcp_header
*th
= dp_packet_put_zeros(&packet
, sizeof *th
);
318 struct tcp_header
*tcp_in
= dp_packet_l4(pkt_in
);
319 dp_packet_set_l4(&packet
, th
);
320 th
->tcp_ctl
= TCP_CTL(TCP_RST
, 5);
321 if (ip_flow
->tcp_flags
& htons(TCP_ACK
)) {
322 th
->tcp_seq
= tcp_in
->tcp_ack
;
324 uint32_t tcp_seq
, ack_seq
, tcp_len
;
326 tcp_seq
= ntohl(get_16aligned_be32(&tcp_in
->tcp_seq
));
327 tcp_len
= TCP_OFFSET(tcp_in
->tcp_ctl
) * 4;
328 ack_seq
= tcp_seq
+ dp_packet_l4_size(pkt_in
) - tcp_len
;
329 put_16aligned_be32(&th
->tcp_ack
, htonl(ack_seq
));
330 put_16aligned_be32(&th
->tcp_seq
, 0);
332 packet_set_tcp_port(&packet
, ip_flow
->tp_dst
, ip_flow
->tp_src
);
334 if (ip_flow
->vlans
[0].tci
& htons(VLAN_CFI
)) {
335 eth_push_vlan(&packet
, htons(ETH_TYPE_VLAN_8021Q
),
336 ip_flow
->vlans
[0].tci
);
339 set_actions_and_enqueue_msg(&packet
, md
, userdata
);
340 dp_packet_uninit(&packet
);
344 pinctrl_handle_put_dhcp_opts(
345 struct dp_packet
*pkt_in
, struct ofputil_packet_in
*pin
,
346 struct ofpbuf
*userdata
, struct ofpbuf
*continuation
)
348 enum ofp_version version
= rconn_get_version(swconn
);
349 enum ofputil_protocol proto
= ofputil_protocol_from_ofp_version(version
);
350 struct dp_packet
*pkt_out_ptr
= NULL
;
351 uint32_t success
= 0;
353 /* Parse result field. */
354 const struct mf_field
*f
;
355 enum ofperr ofperr
= nx_pull_header(userdata
, NULL
, &f
, NULL
);
357 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
358 VLOG_WARN_RL(&rl
, "bad result OXM (%s)", ofperr_to_string(ofperr
));
362 /* Parse result offset and offer IP. */
363 ovs_be32
*ofsp
= ofpbuf_try_pull(userdata
, sizeof *ofsp
);
364 ovs_be32
*offer_ip
= ofpbuf_try_pull(userdata
, sizeof *offer_ip
);
365 if (!ofsp
|| !offer_ip
) {
366 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
367 VLOG_WARN_RL(&rl
, "offset or offer_ip not present in the userdata");
371 /* Check that the result is valid and writable. */
372 struct mf_subfield dst
= { .field
= f
, .ofs
= ntohl(*ofsp
), .n_bits
= 1 };
373 ofperr
= mf_check_dst(&dst
, NULL
);
375 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
376 VLOG_WARN_RL(&rl
, "bad result bit (%s)", ofperr_to_string(ofperr
));
380 if (!userdata
->size
) {
381 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
382 VLOG_WARN_RL(&rl
, "DHCP options not present in the userdata");
386 /* Validate the DHCP request packet.
387 * Format of the DHCP packet is
388 * ------------------------------------------------------------------------
389 *| UDP HEADER | DHCP HEADER | 4 Byte DHCP Cookie | DHCP OPTIONS(var len)|
390 * ------------------------------------------------------------------------
392 if (dp_packet_l4_size(pkt_in
) < (UDP_HEADER_LEN
+
393 sizeof (struct dhcp_header
) + sizeof(uint32_t) + 3)) {
394 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
395 VLOG_WARN_RL(&rl
, "Invalid or incomplete DHCP packet recieved");
399 struct dhcp_header
const *in_dhcp_data
= dp_packet_get_udp_payload(pkt_in
);
400 if (in_dhcp_data
->op
!= DHCP_OP_REQUEST
) {
401 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
402 VLOG_WARN_RL(&rl
, "Invalid opcode in the DHCP packet : %d",
407 /* DHCP options follow the DHCP header. The first 4 bytes of the DHCP
408 * options is the DHCP magic cookie followed by the actual DHCP options.
410 const uint8_t *in_dhcp_opt
=
411 (const uint8_t *)dp_packet_get_udp_payload(pkt_in
) +
412 sizeof (struct dhcp_header
);
414 ovs_be32 magic_cookie
= htonl(DHCP_MAGIC_COOKIE
);
415 if (memcmp(in_dhcp_opt
, &magic_cookie
, sizeof(ovs_be32
))) {
416 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
417 VLOG_WARN_RL(&rl
, "DHCP magic cookie not present in the DHCP packet");
422 /* Check that the DHCP Message Type (opt 53) is present or not with
423 * valid values - DHCP_MSG_DISCOVER or DHCP_MSG_REQUEST as the first
426 if (!(in_dhcp_opt
[0] == DHCP_OPT_MSG_TYPE
&& in_dhcp_opt
[1] == 1 && (
427 in_dhcp_opt
[2] == DHCP_MSG_DISCOVER
||
428 in_dhcp_opt
[2] == DHCP_MSG_REQUEST
))) {
429 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
430 VLOG_WARN_RL(&rl
, "Invalid DHCP message type : opt code = %d,"
431 " opt value = %d", in_dhcp_opt
[0], in_dhcp_opt
[2]);
436 if (in_dhcp_opt
[2] == DHCP_MSG_DISCOVER
) {
437 msg_type
= DHCP_MSG_OFFER
;
439 msg_type
= DHCP_MSG_ACK
;
442 /* Frame the DHCP reply packet
443 * Total DHCP options length will be options stored in the userdata +
446 * --------------------------------------------------------------
447 *| 4 Bytes (dhcp cookie) | 3 Bytes (option type) | DHCP options |
448 * --------------------------------------------------------------
449 *| 4 Bytes padding | 1 Byte (option end 0xFF ) | 4 Bytes padding|
450 * --------------------------------------------------------------
452 uint16_t new_l4_size
= UDP_HEADER_LEN
+ DHCP_HEADER_LEN
+ \
454 size_t new_packet_size
= pkt_in
->l4_ofs
+ new_l4_size
;
456 struct dp_packet pkt_out
;
457 dp_packet_init(&pkt_out
, new_packet_size
);
458 dp_packet_clear(&pkt_out
);
459 dp_packet_prealloc_tailroom(&pkt_out
, new_packet_size
);
460 pkt_out_ptr
= &pkt_out
;
462 /* Copy the L2 and L3 headers from the pkt_in as they would remain same*/
464 &pkt_out
, dp_packet_pull(pkt_in
, pkt_in
->l4_ofs
), pkt_in
->l4_ofs
);
466 pkt_out
.l2_5_ofs
= pkt_in
->l2_5_ofs
;
467 pkt_out
.l2_pad_size
= pkt_in
->l2_pad_size
;
468 pkt_out
.l3_ofs
= pkt_in
->l3_ofs
;
469 pkt_out
.l4_ofs
= pkt_in
->l4_ofs
;
471 struct udp_header
*udp
= dp_packet_put(
472 &pkt_out
, dp_packet_pull(pkt_in
, UDP_HEADER_LEN
), UDP_HEADER_LEN
);
474 struct dhcp_header
*dhcp_data
= dp_packet_put(
475 &pkt_out
, dp_packet_pull(pkt_in
, DHCP_HEADER_LEN
), DHCP_HEADER_LEN
);
476 dhcp_data
->op
= DHCP_OP_REPLY
;
477 dhcp_data
->yiaddr
= *offer_ip
;
478 dp_packet_put(&pkt_out
, &magic_cookie
, sizeof(ovs_be32
));
480 uint8_t *out_dhcp_opts
= dp_packet_put_zeros(&pkt_out
,
481 userdata
->size
+ 12);
482 /* DHCP option - type */
483 out_dhcp_opts
[0] = DHCP_OPT_MSG_TYPE
;
484 out_dhcp_opts
[1] = 1;
485 out_dhcp_opts
[2] = msg_type
;
488 memcpy(out_dhcp_opts
, userdata
->data
, userdata
->size
);
489 out_dhcp_opts
+= userdata
->size
;
493 out_dhcp_opts
[0] = DHCP_OPT_END
;
495 udp
->udp_len
= htons(new_l4_size
);
497 struct ip_header
*out_ip
= dp_packet_l3(&pkt_out
);
498 out_ip
->ip_tot_len
= htons(pkt_out
.l4_ofs
- pkt_out
.l3_ofs
+ new_l4_size
);
500 /* Checksum needs to be initialized to zero. */
502 out_ip
->ip_csum
= csum(out_ip
, sizeof *out_ip
);
504 pin
->packet
= dp_packet_data(&pkt_out
);
505 pin
->packet_len
= dp_packet_size(&pkt_out
);
507 /* Log the response. */
508 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(20, 40);
509 const struct eth_header
*l2
= dp_packet_eth(&pkt_out
);
510 VLOG_INFO_RL(&rl
, "DHCP%s "ETH_ADDR_FMT
" "IP_FMT
"",
511 msg_type
== DHCP_MSG_OFFER
? "OFFER" : "ACK",
512 ETH_ADDR_ARGS(l2
->eth_src
), IP_ARGS(*offer_ip
));
517 union mf_subvalue sv
;
519 mf_write_subfield(&dst
, &sv
, &pin
->flow_metadata
);
521 queue_msg(ofputil_encode_resume(pin
, continuation
, proto
));
523 dp_packet_uninit(pkt_out_ptr
);
528 compose_out_dhcpv6_opts(struct ofpbuf
*userdata
,
529 struct ofpbuf
*out_dhcpv6_opts
, ovs_be32 iaid
)
531 while (userdata
->size
) {
532 struct dhcp_opt6_header
*userdata_opt
= ofpbuf_try_pull(
533 userdata
, sizeof *userdata_opt
);
538 size_t size
= ntohs(userdata_opt
->size
);
539 uint8_t *userdata_opt_data
= ofpbuf_try_pull(userdata
, size
);
540 if (!userdata_opt_data
) {
544 switch (ntohs(userdata_opt
->opt_code
)) {
545 case DHCPV6_OPT_SERVER_ID_CODE
:
547 /* The Server Identifier option carries a DUID
548 * identifying a server between a client and a server.
549 * See RFC 3315 Sec 9 and Sec 22.3.
551 * We use DUID Based on Link-layer Address [DUID-LL].
554 struct dhcpv6_opt_server_id
*opt_server_id
= ofpbuf_put_zeros(
555 out_dhcpv6_opts
, sizeof *opt_server_id
);
557 opt_server_id
->opt
.code
= htons(DHCPV6_OPT_SERVER_ID_CODE
);
558 opt_server_id
->opt
.len
= htons(size
+ 4);
559 opt_server_id
->duid_type
= htons(DHCPV6_DUID_LL
);
560 opt_server_id
->hw_type
= htons(DHCPV6_HW_TYPE_ETH
);
561 memcpy(&opt_server_id
->mac
, userdata_opt_data
,
562 sizeof(struct eth_addr
));
566 case DHCPV6_OPT_IA_ADDR_CODE
:
568 if (size
!= sizeof(struct in6_addr
)) {
572 /* IA Address option is used to specify IPv6 addresses associated
573 * with an IA_NA or IA_TA. The IA Address option must be
574 * encapsulated in the Options field of an IA_NA or IA_TA option.
576 * We will encapsulate the IA Address within the IA_NA option.
577 * Please see RFC 3315 section 22.5 and 22.6
579 struct dhcpv6_opt_ia_na
*opt_ia_na
= ofpbuf_put_zeros(
580 out_dhcpv6_opts
, sizeof *opt_ia_na
);
581 opt_ia_na
->opt
.code
= htons(DHCPV6_OPT_IA_NA_CODE
);
582 /* IA_NA length (in bytes)-
586 * IA Address - sizeof(struct dhcpv6_opt_ia_addr)
588 opt_ia_na
->opt
.len
= htons(12 + sizeof(struct dhcpv6_opt_ia_addr
));
589 opt_ia_na
->iaid
= iaid
;
590 /* Set the lifetime of the address(es) to infinity */
591 opt_ia_na
->t1
= OVS_BE32_MAX
;
592 opt_ia_na
->t2
= OVS_BE32_MAX
;
594 struct dhcpv6_opt_ia_addr
*opt_ia_addr
= ofpbuf_put_zeros(
595 out_dhcpv6_opts
, sizeof *opt_ia_addr
);
596 opt_ia_addr
->opt
.code
= htons(DHCPV6_OPT_IA_ADDR_CODE
);
597 opt_ia_addr
->opt
.len
= htons(size
+ 8);
598 memcpy(opt_ia_addr
->ipv6
.s6_addr
, userdata_opt_data
, size
);
599 opt_ia_addr
->t1
= OVS_BE32_MAX
;
600 opt_ia_addr
->t2
= OVS_BE32_MAX
;
604 case DHCPV6_OPT_DNS_SERVER_CODE
:
606 struct dhcpv6_opt_header
*opt_dns
= ofpbuf_put_zeros(
607 out_dhcpv6_opts
, sizeof *opt_dns
);
608 opt_dns
->code
= htons(DHCPV6_OPT_DNS_SERVER_CODE
);
609 opt_dns
->len
= htons(size
);
610 ofpbuf_put(out_dhcpv6_opts
, userdata_opt_data
, size
);
614 case DHCPV6_OPT_DOMAIN_SEARCH_CODE
:
616 struct dhcpv6_opt_header
*opt_dsl
= ofpbuf_put_zeros(
617 out_dhcpv6_opts
, sizeof *opt_dsl
);
618 opt_dsl
->code
= htons(DHCPV6_OPT_DOMAIN_SEARCH_CODE
);
619 opt_dsl
->len
= htons(size
+ 2);
620 uint8_t *data
= ofpbuf_put_zeros(out_dhcpv6_opts
, size
+ 2);
622 memcpy(data
+ 1, userdata_opt_data
, size
);
634 pinctrl_handle_put_dhcpv6_opts(
635 struct dp_packet
*pkt_in
, struct ofputil_packet_in
*pin
,
636 struct ofpbuf
*userdata
, struct ofpbuf
*continuation OVS_UNUSED
)
638 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
639 enum ofp_version version
= rconn_get_version(swconn
);
640 enum ofputil_protocol proto
= ofputil_protocol_from_ofp_version(version
);
641 struct dp_packet
*pkt_out_ptr
= NULL
;
642 uint32_t success
= 0;
644 /* Parse result field. */
645 const struct mf_field
*f
;
646 enum ofperr ofperr
= nx_pull_header(userdata
, NULL
, &f
, NULL
);
648 VLOG_WARN_RL(&rl
, "bad result OXM (%s)", ofperr_to_string(ofperr
));
652 /* Parse result offset. */
653 ovs_be32
*ofsp
= ofpbuf_try_pull(userdata
, sizeof *ofsp
);
655 VLOG_WARN_RL(&rl
, "offset not present in the userdata");
659 /* Check that the result is valid and writable. */
660 struct mf_subfield dst
= { .field
= f
, .ofs
= ntohl(*ofsp
), .n_bits
= 1 };
661 ofperr
= mf_check_dst(&dst
, NULL
);
663 VLOG_WARN_RL(&rl
, "bad result bit (%s)", ofperr_to_string(ofperr
));
667 if (!userdata
->size
) {
668 VLOG_WARN_RL(&rl
, "DHCPv6 options not present in the userdata");
672 struct udp_header
*in_udp
= dp_packet_l4(pkt_in
);
673 const uint8_t *in_dhcpv6_data
= dp_packet_get_udp_payload(pkt_in
);
674 if (!in_udp
|| !in_dhcpv6_data
) {
675 VLOG_WARN_RL(&rl
, "truncated dhcpv6 packet");
679 uint8_t out_dhcpv6_msg_type
;
680 switch(*in_dhcpv6_data
) {
681 case DHCPV6_MSG_TYPE_SOLICIT
:
682 out_dhcpv6_msg_type
= DHCPV6_MSG_TYPE_ADVT
;
685 case DHCPV6_MSG_TYPE_REQUEST
:
686 case DHCPV6_MSG_TYPE_CONFIRM
:
687 case DHCPV6_MSG_TYPE_DECLINE
:
688 out_dhcpv6_msg_type
= DHCPV6_MSG_TYPE_REPLY
;
692 /* Invalid or unsupported DHCPv6 message type */
696 /* Skip 4 bytes (message type (1 byte) + transaction ID (3 bytes). */
698 /* We need to extract IAID from the IA-NA option of the client's DHCPv6
699 * solicit/request/confirm packet and copy the same IAID in the Server's
702 struct dhcpv6_opt_header
const *in_opt_client_id
= NULL
;
703 size_t udp_len
= ntohs(in_udp
->udp_len
);
704 size_t l4_len
= dp_packet_l4_size(pkt_in
);
705 uint8_t *end
= (uint8_t *)in_udp
+ MIN(udp_len
, l4_len
);
706 while (in_dhcpv6_data
< end
) {
707 struct dhcpv6_opt_header
const *in_opt
=
708 (struct dhcpv6_opt_header
*)in_dhcpv6_data
;
709 switch(ntohs(in_opt
->code
)) {
710 case DHCPV6_OPT_IA_NA_CODE
:
712 struct dhcpv6_opt_ia_na
*opt_ia_na
= (
713 struct dhcpv6_opt_ia_na
*)in_opt
;
714 iaid
= opt_ia_na
->iaid
;
718 case DHCPV6_OPT_CLIENT_ID_CODE
:
719 in_opt_client_id
= in_opt
;
725 in_dhcpv6_data
+= sizeof *in_opt
+ ntohs(in_opt
->len
);
728 if (!in_opt_client_id
) {
729 VLOG_WARN_RL(&rl
, "DHCPv6 option - Client id not present in the "
735 VLOG_WARN_RL(&rl
, "DHCPv6 option - IA NA not present in the "
740 uint64_t out_ofpacts_dhcpv6_opts_stub
[256 / 8];
741 struct ofpbuf out_dhcpv6_opts
=
742 OFPBUF_STUB_INITIALIZER(out_ofpacts_dhcpv6_opts_stub
);
744 if (!compose_out_dhcpv6_opts(userdata
, &out_dhcpv6_opts
, iaid
)) {
745 VLOG_WARN_RL(&rl
, "Invalid userdata");
750 = (UDP_HEADER_LEN
+ 4 + sizeof *in_opt_client_id
+
751 ntohs(in_opt_client_id
->len
) + out_dhcpv6_opts
.size
);
752 size_t new_packet_size
= pkt_in
->l4_ofs
+ new_l4_size
;
754 struct dp_packet pkt_out
;
755 dp_packet_init(&pkt_out
, new_packet_size
);
756 dp_packet_clear(&pkt_out
);
757 dp_packet_prealloc_tailroom(&pkt_out
, new_packet_size
);
758 pkt_out_ptr
= &pkt_out
;
760 /* Copy L2 and L3 headers from pkt_in. */
761 dp_packet_put(&pkt_out
, dp_packet_pull(pkt_in
, pkt_in
->l4_ofs
),
764 pkt_out
.l2_5_ofs
= pkt_in
->l2_5_ofs
;
765 pkt_out
.l2_pad_size
= pkt_in
->l2_pad_size
;
766 pkt_out
.l3_ofs
= pkt_in
->l3_ofs
;
767 pkt_out
.l4_ofs
= pkt_in
->l4_ofs
;
769 /* Pull the DHCPv6 message type and transaction id from the pkt_in.
770 * Need to preserve the transaction id in the DHCPv6 reply packet. */
771 struct udp_header
*out_udp
= dp_packet_put(
772 &pkt_out
, dp_packet_pull(pkt_in
, UDP_HEADER_LEN
), UDP_HEADER_LEN
);
773 uint8_t *out_dhcpv6
= dp_packet_put(&pkt_out
, dp_packet_pull(pkt_in
, 4), 4);
775 /* Set the proper DHCPv6 message type. */
776 *out_dhcpv6
= out_dhcpv6_msg_type
;
778 /* Copy the Client Identifier. */
779 dp_packet_put(&pkt_out
, in_opt_client_id
,
780 sizeof *in_opt_client_id
+ ntohs(in_opt_client_id
->len
));
782 /* Copy the DHCPv6 Options. */
783 dp_packet_put(&pkt_out
, out_dhcpv6_opts
.data
, out_dhcpv6_opts
.size
);
784 out_udp
->udp_len
= htons(new_l4_size
);
785 out_udp
->udp_csum
= 0;
787 struct ovs_16aligned_ip6_hdr
*out_ip6
= dp_packet_l3(&pkt_out
);
788 out_ip6
->ip6_ctlun
.ip6_un1
.ip6_un1_plen
= out_udp
->udp_len
;
791 csum
= packet_csum_pseudoheader6(dp_packet_l3(&pkt_out
));
792 csum
= csum_continue(csum
, out_udp
, dp_packet_size(&pkt_out
) -
793 ((const unsigned char *)out_udp
-
794 (const unsigned char *)dp_packet_eth(&pkt_out
)));
795 out_udp
->udp_csum
= csum_finish(csum
);
796 if (!out_udp
->udp_csum
) {
797 out_udp
->udp_csum
= htons(0xffff);
800 pin
->packet
= dp_packet_data(&pkt_out
);
801 pin
->packet_len
= dp_packet_size(&pkt_out
);
802 ofpbuf_uninit(&out_dhcpv6_opts
);
806 union mf_subvalue sv
;
808 mf_write_subfield(&dst
, &sv
, &pin
->flow_metadata
);
810 queue_msg(ofputil_encode_resume(pin
, continuation
, proto
));
811 dp_packet_uninit(pkt_out_ptr
);
815 put_be16(struct ofpbuf
*buf
, ovs_be16 x
)
817 ofpbuf_put(buf
, &x
, sizeof x
);
821 put_be32(struct ofpbuf
*buf
, ovs_be32 x
)
823 ofpbuf_put(buf
, &x
, sizeof x
);
827 pinctrl_handle_dns_lookup(
828 struct dp_packet
*pkt_in
, struct ofputil_packet_in
*pin
,
829 struct ofpbuf
*userdata
, struct ofpbuf
*continuation
,
830 struct controller_ctx
*ctx
)
832 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
833 enum ofp_version version
= rconn_get_version(swconn
);
834 enum ofputil_protocol proto
= ofputil_protocol_from_ofp_version(version
);
835 struct dp_packet
*pkt_out_ptr
= NULL
;
836 uint32_t success
= 0;
838 /* Parse result field. */
839 const struct mf_field
*f
;
840 enum ofperr ofperr
= nx_pull_header(userdata
, NULL
, &f
, NULL
);
842 VLOG_WARN_RL(&rl
, "bad result OXM (%s)", ofperr_to_string(ofperr
));
846 /* Parse result offset. */
847 ovs_be32
*ofsp
= ofpbuf_try_pull(userdata
, sizeof *ofsp
);
849 VLOG_WARN_RL(&rl
, "offset not present in the userdata");
853 /* Check that the result is valid and writable. */
854 struct mf_subfield dst
= { .field
= f
, .ofs
= ntohl(*ofsp
), .n_bits
= 1 };
855 ofperr
= mf_check_dst(&dst
, NULL
);
857 VLOG_WARN_RL(&rl
, "bad result bit (%s)", ofperr_to_string(ofperr
));
861 /* Extract the DNS header */
862 struct dns_header
const *in_dns_header
= dp_packet_get_udp_payload(pkt_in
);
863 if (!in_dns_header
) {
864 VLOG_WARN_RL(&rl
, "truncated dns packet");
868 /* Check if it is DNS request or not */
869 if (in_dns_header
->lo_flag
& 0x80) {
870 /* It's a DNS response packet which we are not interested in */
874 /* Check if at least one query request is present */
875 if (!in_dns_header
->qdcount
) {
879 struct udp_header
*in_udp
= dp_packet_l4(pkt_in
);
880 size_t udp_len
= ntohs(in_udp
->udp_len
);
881 size_t l4_len
= dp_packet_l4_size(pkt_in
);
882 uint8_t *end
= (uint8_t *)in_udp
+ MIN(udp_len
, l4_len
);
883 uint8_t *in_dns_data
= (uint8_t *)(in_dns_header
+ 1);
884 uint8_t *in_queryname
= in_dns_data
;
886 struct ds query_name
;
887 ds_init(&query_name
);
888 /* Extract the query_name. If the query name is - 'www.ovn.org' it would be
889 * encoded as (in hex) - 03 77 77 77 03 6f 76 63 03 6f 72 67 00.
891 while ((in_dns_data
+ idx
) < end
&& in_dns_data
[idx
]) {
892 uint8_t label_len
= in_dns_data
[idx
++];
893 if (in_dns_data
+ idx
+ label_len
> end
) {
894 ds_destroy(&query_name
);
897 ds_put_buffer(&query_name
, (const char *) in_dns_data
+ idx
, label_len
);
899 ds_put_char(&query_name
, '.');
903 ds_chomp(&query_name
, '.');
906 /* Query should have TYPE and CLASS fields */
907 if (in_dns_data
+ (2 * sizeof(ovs_be16
)) > end
) {
908 ds_destroy(&query_name
);
912 uint16_t query_type
= ntohs(*ALIGNED_CAST(const ovs_be16
*, in_dns_data
));
913 /* Supported query types - A, AAAA and ANY */
914 if (!(query_type
== DNS_QUERY_TYPE_A
|| query_type
== DNS_QUERY_TYPE_AAAA
915 || query_type
== DNS_QUERY_TYPE_ANY
)) {
916 ds_destroy(&query_name
);
920 uint64_t dp_key
= ntohll(pin
->flow_metadata
.flow
.metadata
);
921 const struct sbrec_dns
*sbrec_dns
;
922 const char *answer_ips
= NULL
;
923 SBREC_DNS_FOR_EACH(sbrec_dns
, ctx
->ovnsb_idl
) {
924 for (size_t i
= 0; i
< sbrec_dns
->n_datapaths
; i
++) {
925 if (sbrec_dns
->datapaths
[i
]->tunnel_key
== dp_key
) {
926 answer_ips
= smap_get(&sbrec_dns
->records
,
927 ds_cstr(&query_name
));
939 ds_destroy(&query_name
);
944 struct lport_addresses ip_addrs
;
945 if (!extract_ip_addresses(answer_ips
, &ip_addrs
)) {
949 uint16_t ancount
= 0;
950 uint64_t dns_ans_stub
[128 / 8];
951 struct ofpbuf dns_answer
= OFPBUF_STUB_INITIALIZER(dns_ans_stub
);
953 if (query_type
== DNS_QUERY_TYPE_A
|| query_type
== DNS_QUERY_TYPE_ANY
) {
954 for (size_t i
= 0; i
< ip_addrs
.n_ipv4_addrs
; i
++) {
955 /* Copy the answer section */
956 /* Format of the answer section is
957 * - NAME -> The domain name
958 * - TYPE -> 2 octets containing one of the RR type codes
959 * - CLASS -> 2 octets which specify the class of the data
960 * in the RDATA field.
961 * - TTL -> 32 bit unsigned int specifying the time
962 * interval (in secs) that the resource record
963 * may be cached before it should be discarded.
964 * - RDLENGTH -> 16 bit integer specifying the length of the
966 * - RDATA -> a variable length string of octets that
967 * describes the resource. In our case it will
968 * be IP address of the domain name.
970 ofpbuf_put(&dns_answer
, in_queryname
, idx
);
971 put_be16(&dns_answer
, htons(DNS_QUERY_TYPE_A
));
972 put_be16(&dns_answer
, htons(DNS_CLASS_IN
));
973 put_be32(&dns_answer
, htonl(DNS_DEFAULT_RR_TTL
));
974 put_be16(&dns_answer
, htons(sizeof(ovs_be32
)));
975 put_be32(&dns_answer
, ip_addrs
.ipv4_addrs
[i
].addr
);
980 if (query_type
== DNS_QUERY_TYPE_AAAA
||
981 query_type
== DNS_QUERY_TYPE_ANY
) {
982 for (size_t i
= 0; i
< ip_addrs
.n_ipv6_addrs
; i
++) {
983 ofpbuf_put(&dns_answer
, in_queryname
, idx
);
984 put_be16(&dns_answer
, htons(DNS_QUERY_TYPE_AAAA
));
985 put_be16(&dns_answer
, htons(DNS_CLASS_IN
));
986 put_be32(&dns_answer
, htonl(DNS_DEFAULT_RR_TTL
));
987 const struct in6_addr
*ip6
= &ip_addrs
.ipv6_addrs
[i
].addr
;
988 put_be16(&dns_answer
, htons(sizeof *ip6
));
989 ofpbuf_put(&dns_answer
, ip6
, sizeof *ip6
);
994 destroy_lport_addresses(&ip_addrs
);
997 ofpbuf_uninit(&dns_answer
);
1001 uint16_t new_l4_size
= ntohs(in_udp
->udp_len
) + dns_answer
.size
;
1002 size_t new_packet_size
= pkt_in
->l4_ofs
+ new_l4_size
;
1003 struct dp_packet pkt_out
;
1004 dp_packet_init(&pkt_out
, new_packet_size
);
1005 dp_packet_clear(&pkt_out
);
1006 dp_packet_prealloc_tailroom(&pkt_out
, new_packet_size
);
1007 pkt_out_ptr
= &pkt_out
;
1009 /* Copy the L2 and L3 headers from the pkt_in as they would remain same.*/
1011 &pkt_out
, dp_packet_pull(pkt_in
, pkt_in
->l4_ofs
), pkt_in
->l4_ofs
);
1013 pkt_out
.l2_5_ofs
= pkt_in
->l2_5_ofs
;
1014 pkt_out
.l2_pad_size
= pkt_in
->l2_pad_size
;
1015 pkt_out
.l3_ofs
= pkt_in
->l3_ofs
;
1016 pkt_out
.l4_ofs
= pkt_in
->l4_ofs
;
1018 struct udp_header
*out_udp
= dp_packet_put(
1019 &pkt_out
, dp_packet_pull(pkt_in
, UDP_HEADER_LEN
), UDP_HEADER_LEN
);
1021 /* Copy the DNS header. */
1022 struct dns_header
*out_dns_header
= dp_packet_put(
1023 &pkt_out
, dp_packet_pull(pkt_in
, sizeof *out_dns_header
),
1024 sizeof *out_dns_header
);
1026 /* Set the response bit to 1 in the flags. */
1027 out_dns_header
->lo_flag
|= 0x80;
1029 /* Set the answer RR. */
1030 out_dns_header
->ancount
= htons(ancount
);
1032 /* Copy the Query section. */
1033 dp_packet_put(&pkt_out
, dp_packet_data(pkt_in
), dp_packet_size(pkt_in
));
1035 /* Copy the answer sections. */
1036 dp_packet_put(&pkt_out
, dns_answer
.data
, dns_answer
.size
);
1037 ofpbuf_uninit(&dns_answer
);
1039 out_udp
->udp_len
= htons(new_l4_size
);
1040 out_udp
->udp_csum
= 0;
1042 struct eth_header
*eth
= dp_packet_data(&pkt_out
);
1043 if (eth
->eth_type
== htons(ETH_TYPE_IP
)) {
1044 struct ip_header
*out_ip
= dp_packet_l3(&pkt_out
);
1045 out_ip
->ip_tot_len
= htons(pkt_out
.l4_ofs
- pkt_out
.l3_ofs
1047 /* Checksum needs to be initialized to zero. */
1048 out_ip
->ip_csum
= 0;
1049 out_ip
->ip_csum
= csum(out_ip
, sizeof *out_ip
);
1051 struct ovs_16aligned_ip6_hdr
*nh
= dp_packet_l3(&pkt_out
);
1052 nh
->ip6_plen
= htons(new_l4_size
);
1054 /* IPv6 needs UDP checksum calculated */
1056 csum
= packet_csum_pseudoheader6(nh
);
1057 csum
= csum_continue(csum
, out_udp
, dp_packet_size(&pkt_out
) -
1058 ((const unsigned char *)out_udp
-
1059 (const unsigned char *)eth
));
1060 out_udp
->udp_csum
= csum_finish(csum
);
1061 if (!out_udp
->udp_csum
) {
1062 out_udp
->udp_csum
= htons(0xffff);
1066 pin
->packet
= dp_packet_data(&pkt_out
);
1067 pin
->packet_len
= dp_packet_size(&pkt_out
);
1072 union mf_subvalue sv
;
1073 sv
.u8_val
= success
;
1074 mf_write_subfield(&dst
, &sv
, &pin
->flow_metadata
);
1076 queue_msg(ofputil_encode_resume(pin
, continuation
, proto
));
1077 dp_packet_uninit(pkt_out_ptr
);
1081 process_packet_in(const struct ofp_header
*msg
, struct controller_ctx
*ctx
)
1083 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
1085 struct ofputil_packet_in pin
;
1086 struct ofpbuf continuation
;
1087 enum ofperr error
= ofputil_decode_packet_in(msg
, true, NULL
, NULL
, &pin
,
1088 NULL
, NULL
, &continuation
);
1091 VLOG_WARN_RL(&rl
, "error decoding packet-in: %s",
1092 ofperr_to_string(error
));
1095 if (pin
.reason
!= OFPR_ACTION
) {
1099 struct ofpbuf userdata
= ofpbuf_const_initializer(pin
.userdata
,
1101 const struct action_header
*ah
= ofpbuf_pull(&userdata
, sizeof *ah
);
1103 VLOG_WARN_RL(&rl
, "packet-in userdata lacks action header");
1107 struct dp_packet packet
;
1108 dp_packet_use_const(&packet
, pin
.packet
, pin
.packet_len
);
1109 struct flow headers
;
1110 flow_extract(&packet
, &headers
);
1112 switch (ntohl(ah
->opcode
)) {
1113 case ACTION_OPCODE_ARP
:
1114 pinctrl_handle_arp(&headers
, &pin
.flow_metadata
, &userdata
);
1117 case ACTION_OPCODE_PUT_ARP
:
1118 pinctrl_handle_put_mac_binding(&pin
.flow_metadata
.flow
, &headers
,
1122 case ACTION_OPCODE_PUT_DHCP_OPTS
:
1123 pinctrl_handle_put_dhcp_opts(&packet
, &pin
, &userdata
, &continuation
);
1126 case ACTION_OPCODE_ND_NA
:
1127 pinctrl_handle_nd_na(&headers
, &pin
.flow_metadata
, &userdata
);
1130 case ACTION_OPCODE_PUT_ND
:
1131 pinctrl_handle_put_mac_binding(&pin
.flow_metadata
.flow
, &headers
,
1135 case ACTION_OPCODE_PUT_DHCPV6_OPTS
:
1136 pinctrl_handle_put_dhcpv6_opts(&packet
, &pin
, &userdata
,
1140 case ACTION_OPCODE_DNS_LOOKUP
:
1141 pinctrl_handle_dns_lookup(&packet
, &pin
, &userdata
, &continuation
, ctx
);
1144 case ACTION_OPCODE_LOG
:
1145 handle_acl_log(&headers
, &userdata
);
1148 case ACTION_OPCODE_PUT_ND_RA_OPTS
:
1149 pinctrl_handle_put_nd_ra_opts(&headers
, &packet
, &pin
, &userdata
,
1153 case ACTION_OPCODE_ND_NS
:
1154 pinctrl_handle_nd_ns(&headers
, &pin
.flow_metadata
, &userdata
);
1157 case ACTION_OPCODE_ICMP4
:
1158 pinctrl_handle_icmp4(&headers
, &pin
.flow_metadata
, &userdata
);
1161 case ACTION_OPCODE_TCP_RESET
:
1162 pinctrl_handle_tcp_reset(&headers
, &packet
, &pin
.flow_metadata
,
1167 VLOG_WARN_RL(&rl
, "unrecognized packet-in opcode %"PRIu32
,
1174 pinctrl_recv(const struct ofp_header
*oh
, enum ofptype type
,
1175 struct controller_ctx
*ctx
)
1177 if (type
== OFPTYPE_ECHO_REQUEST
) {
1178 queue_msg(ofputil_encode_echo_reply(oh
));
1179 } else if (type
== OFPTYPE_GET_CONFIG_REPLY
) {
1180 /* Enable asynchronous messages */
1181 struct ofputil_switch_config config
;
1183 ofputil_decode_get_config_reply(oh
, &config
);
1184 config
.miss_send_len
= UINT16_MAX
;
1185 set_switch_config(swconn
, &config
);
1186 } else if (type
== OFPTYPE_PACKET_IN
) {
1187 process_packet_in(oh
, ctx
);
1189 if (VLOG_IS_DBG_ENABLED()) {
1190 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(30, 300);
1192 char *s
= ofp_to_string(oh
, ntohs(oh
->length
), NULL
, NULL
, 2);
1194 VLOG_DBG_RL(&rl
, "OpenFlow packet ignored: %s", s
);
1201 pinctrl_run(struct controller_ctx
*ctx
,
1202 const struct ovsrec_bridge
*br_int
,
1203 const struct sbrec_chassis
*chassis
,
1204 const struct chassis_index
*chassis_index
,
1205 struct hmap
*local_datapaths
,
1206 struct sset
*active_tunnels
)
1208 char *target
= xasprintf("unix:%s/%s.mgmt", ovs_rundir(), br_int
->name
);
1209 if (strcmp(target
, rconn_get_target(swconn
))) {
1210 VLOG_INFO("%s: connecting to switch", target
);
1211 rconn_connect(swconn
, target
, target
);
1217 if (!rconn_is_connected(swconn
)) {
1221 if (conn_seq_no
!= rconn_get_connection_seqno(swconn
)) {
1223 conn_seq_no
= rconn_get_connection_seqno(swconn
);
1224 flush_put_mac_bindings();
1227 /* Process a limited number of messages per call. */
1228 for (int i
= 0; i
< 50; i
++) {
1229 struct ofpbuf
*msg
= rconn_recv(swconn
);
1234 const struct ofp_header
*oh
= msg
->data
;
1237 ofptype_decode(&type
, oh
);
1238 pinctrl_recv(oh
, type
, ctx
);
1242 run_put_mac_bindings(ctx
);
1243 send_garp_run(ctx
, br_int
, chassis
, chassis_index
, local_datapaths
,
1245 send_ipv6_ras(ctx
, local_datapaths
);
1248 /* Table of ipv6_ra_state structures, keyed on logical port name */
1249 static struct shash ipv6_ras
;
1251 /* Next IPV6 RA in seconds. */
1252 static long long int send_ipv6_ra_time
;
1254 struct ipv6_ra_config
{
1255 time_t min_interval
;
1256 time_t max_interval
;
1257 struct eth_addr eth_src
;
1258 struct eth_addr eth_dst
;
1259 struct in6_addr ipv6_src
;
1260 struct in6_addr ipv6_dst
;
1262 uint8_t mo_flags
; /* Managed/Other flags for RAs */
1263 uint8_t la_flags
; /* On-link/autonomous flags for address prefixes */
1264 struct lport_addresses prefixes
;
1267 struct ipv6_ra_state
{
1268 long long int next_announce
;
1269 struct ipv6_ra_config
*config
;
1278 shash_init(&ipv6_ras
);
1279 send_ipv6_ra_time
= LLONG_MAX
;
1283 ipv6_ra_config_delete(struct ipv6_ra_config
*config
)
1286 destroy_lport_addresses(&config
->prefixes
);
1292 ipv6_ra_delete(struct ipv6_ra_state
*ra
)
1295 ipv6_ra_config_delete(ra
->config
);
1301 destroy_ipv6_ras(void)
1303 struct shash_node
*iter
, *next
;
1304 SHASH_FOR_EACH_SAFE (iter
, next
, &ipv6_ras
) {
1305 struct ipv6_ra_state
*ra
= iter
->data
;
1307 shash_delete(&ipv6_ras
, iter
);
1309 shash_destroy(&ipv6_ras
);
1312 static struct ipv6_ra_config
*
1313 ipv6_ra_update_config(const struct sbrec_port_binding
*pb
)
1315 struct ipv6_ra_config
*config
;
1317 config
= xzalloc(sizeof *config
);
1319 config
->max_interval
= smap_get_int(&pb
->options
, "ipv6_ra_max_interval",
1320 ND_RA_MAX_INTERVAL_DEFAULT
);
1321 config
->min_interval
= smap_get_int(&pb
->options
, "ipv6_ra_min_interval",
1322 nd_ra_min_interval_default(config
->max_interval
));
1323 config
->mtu
= smap_get_int(&pb
->options
, "ipv6_ra_mtu", ND_MTU_DEFAULT
);
1324 config
->la_flags
= ND_PREFIX_ON_LINK
;
1326 const char *address_mode
= smap_get(&pb
->options
, "ipv6_ra_address_mode");
1327 if (!address_mode
) {
1328 VLOG_WARN("No address mode specified");
1331 if (!strcmp(address_mode
, "dhcpv6_stateless")) {
1332 config
->mo_flags
= IPV6_ND_RA_FLAG_OTHER_ADDR_CONFIG
;
1333 } else if (!strcmp(address_mode
, "dhcpv6_stateful")) {
1334 config
->mo_flags
= IPV6_ND_RA_FLAG_MANAGED_ADDR_CONFIG
;
1335 } else if (!strcmp(address_mode
, "slaac")) {
1336 config
->la_flags
|= ND_PREFIX_AUTONOMOUS_ADDRESS
;
1338 VLOG_WARN("Invalid address mode %s", address_mode
);
1342 const char *prefixes
= smap_get(&pb
->options
, "ipv6_ra_prefixes");
1343 if (prefixes
&& !extract_ip_addresses(prefixes
, &config
->prefixes
)) {
1344 VLOG_WARN("Invalid IPv6 prefixes: %s", prefixes
);
1348 /* All nodes multicast addresses */
1349 config
->eth_dst
= (struct eth_addr
) ETH_ADDR_C(33,33,00,00,00,01);
1350 ipv6_parse("ff02::1", &config
->ipv6_dst
);
1352 const char *eth_addr
= smap_get(&pb
->options
, "ipv6_ra_src_eth");
1353 if (!eth_addr
|| !eth_addr_from_string(eth_addr
, &config
->eth_src
)) {
1354 VLOG_WARN("Invalid ethernet source %s", eth_addr
);
1357 const char *ip_addr
= smap_get(&pb
->options
, "ipv6_ra_src_addr");
1358 if (!ip_addr
|| !ipv6_parse(ip_addr
, &config
->ipv6_src
)) {
1359 VLOG_WARN("Invalid IP source %s", ip_addr
);
1366 ipv6_ra_config_delete(config
);
1370 static long long int
1371 ipv6_ra_calc_next_announce(time_t min_interval
, time_t max_interval
)
1373 long long int min_interval_ms
= min_interval
* 1000LL;
1374 long long int max_interval_ms
= max_interval
* 1000LL;
1376 return time_msec() + min_interval_ms
+
1377 random_range(max_interval_ms
- min_interval_ms
);
1381 put_load(uint64_t value
, enum mf_field_id dst
, int ofs
, int n_bits
,
1382 struct ofpbuf
*ofpacts
)
1384 struct ofpact_set_field
*sf
= ofpact_put_set_field(ofpacts
,
1385 mf_from_id(dst
), NULL
,
1387 ovs_be64 n_value
= htonll(value
);
1388 bitwise_copy(&n_value
, 8, 0, sf
->value
, sf
->field
->n_bytes
, ofs
, n_bits
);
1389 bitwise_one(ofpact_set_field_mask(sf
), sf
->field
->n_bytes
, ofs
, n_bits
);
1392 static long long int
1393 ipv6_ra_send(struct ipv6_ra_state
*ra
)
1395 if (time_msec() < ra
->next_announce
) {
1396 return ra
->next_announce
;
1399 uint64_t packet_stub
[128 / 8];
1400 struct dp_packet packet
;
1401 dp_packet_use_stub(&packet
, packet_stub
, sizeof packet_stub
);
1402 compose_nd_ra(&packet
, ra
->config
->eth_src
, ra
->config
->eth_dst
,
1403 &ra
->config
->ipv6_src
, &ra
->config
->ipv6_dst
,
1404 255, ra
->config
->mo_flags
, 0, 0, 0, ra
->config
->mtu
);
1406 for (int i
= 0; i
< ra
->config
->prefixes
.n_ipv6_addrs
; i
++) {
1408 memcpy(&addr
, &ra
->config
->prefixes
.ipv6_addrs
[i
].addr
, sizeof addr
);
1409 packet_put_ra_prefix_opt(&packet
,
1410 ra
->config
->prefixes
.ipv6_addrs
[i
].plen
,
1411 ra
->config
->la_flags
, htonl(IPV6_ND_RA_OPT_PREFIX_VALID_LIFETIME
),
1412 htonl(IPV6_ND_RA_OPT_PREFIX_PREFERRED_LIFETIME
), addr
);
1415 uint64_t ofpacts_stub
[4096 / 8];
1416 struct ofpbuf ofpacts
= OFPBUF_STUB_INITIALIZER(ofpacts_stub
);
1418 /* Set MFF_LOG_DATAPATH and MFF_LOG_INPORT. */
1419 uint32_t dp_key
= ra
->metadata
;
1420 uint32_t port_key
= ra
->port_key
;
1421 put_load(dp_key
, MFF_LOG_DATAPATH
, 0, 64, &ofpacts
);
1422 put_load(port_key
, MFF_LOG_INPORT
, 0, 32, &ofpacts
);
1423 put_load(1, MFF_LOG_FLAGS
, MLF_LOCAL_ONLY_BIT
, 1, &ofpacts
);
1424 struct ofpact_resubmit
*resubmit
= ofpact_put_RESUBMIT(&ofpacts
);
1425 resubmit
->in_port
= OFPP_CONTROLLER
;
1426 resubmit
->table_id
= OFTABLE_LOG_INGRESS_PIPELINE
;
1428 struct ofputil_packet_out po
= {
1429 .packet
= dp_packet_data(&packet
),
1430 .packet_len
= dp_packet_size(&packet
),
1431 .buffer_id
= UINT32_MAX
,
1432 .ofpacts
= ofpacts
.data
,
1433 .ofpacts_len
= ofpacts
.size
,
1436 match_set_in_port(&po
.flow_metadata
, OFPP_CONTROLLER
);
1437 enum ofp_version version
= rconn_get_version(swconn
);
1438 enum ofputil_protocol proto
= ofputil_protocol_from_ofp_version(version
);
1439 queue_msg(ofputil_encode_packet_out(&po
, proto
));
1440 dp_packet_uninit(&packet
);
1441 ofpbuf_uninit(&ofpacts
);
1443 ra
->next_announce
= ipv6_ra_calc_next_announce(ra
->config
->min_interval
,
1444 ra
->config
->max_interval
);
1446 return ra
->next_announce
;
1452 poll_timer_wait_until(send_ipv6_ra_time
);
1456 send_ipv6_ras(const struct controller_ctx
*ctx
, struct hmap
*local_datapaths
)
1458 struct shash_node
*iter
, *iter_next
;
1460 send_ipv6_ra_time
= LLONG_MAX
;
1462 SHASH_FOR_EACH (iter
, &ipv6_ras
) {
1463 struct ipv6_ra_state
*ra
= iter
->data
;
1464 ra
->delete_me
= true;
1467 const struct local_datapath
*ld
;
1468 HMAP_FOR_EACH (ld
, hmap_node
, local_datapaths
) {
1469 struct sbrec_port_binding
*lpval
;
1470 const struct sbrec_port_binding
*pb
;
1471 struct ovsdb_idl_index_cursor cursor
;
1473 lpval
= sbrec_port_binding_index_init_row(ctx
->ovnsb_idl
,
1474 &sbrec_table_port_binding
);
1475 sbrec_port_binding_index_set_datapath(lpval
, ld
->datapath
);
1476 ovsdb_idl_initialize_cursor(ctx
->ovnsb_idl
, &sbrec_table_port_binding
,
1477 "lport-by-datapath", &cursor
);
1478 SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb
, &cursor
, lpval
) {
1479 if (!smap_get_bool(&pb
->options
, "ipv6_ra_send_periodic", false)) {
1483 const char *peer_s
= smap_get(&pb
->options
, "peer");
1488 const struct sbrec_port_binding
*peer
1489 = lport_lookup_by_name(ctx
->ovnsb_idl
, peer_s
);
1494 struct ipv6_ra_config
*config
= ipv6_ra_update_config(pb
);
1499 struct ipv6_ra_state
*ra
1500 = shash_find_data(&ipv6_ras
, pb
->logical_port
);
1502 ra
= xzalloc(sizeof *ra
);
1503 ra
->config
= config
;
1504 ra
->next_announce
= ipv6_ra_calc_next_announce(
1505 ra
->config
->min_interval
,
1506 ra
->config
->max_interval
);
1507 shash_add(&ipv6_ras
, pb
->logical_port
, ra
);
1509 ipv6_ra_config_delete(ra
->config
);
1510 ra
->config
= config
;
1513 /* Peer is the logical switch port that the logical
1514 * router port is connected to. The RA is injected
1515 * into that logical switch port.
1517 ra
->port_key
= peer
->tunnel_key
;
1518 ra
->metadata
= peer
->datapath
->tunnel_key
;
1519 ra
->delete_me
= false;
1521 long long int next_ra
= ipv6_ra_send(ra
);
1522 if (send_ipv6_ra_time
> next_ra
) {
1523 send_ipv6_ra_time
= next_ra
;
1526 sbrec_port_binding_index_destroy_row(lpval
);
1529 /* Remove those that are no longer in the SB database */
1530 SHASH_FOR_EACH_SAFE (iter
, iter_next
, &ipv6_ras
) {
1531 struct ipv6_ra_state
*ra
= iter
->data
;
1532 if (ra
->delete_me
) {
1533 shash_delete(&ipv6_ras
, iter
);
1540 pinctrl_wait(struct controller_ctx
*ctx
)
1542 wait_put_mac_bindings(ctx
);
1543 rconn_run_wait(swconn
);
1544 rconn_recv_wait(swconn
);
1550 pinctrl_destroy(void)
1552 rconn_destroy(swconn
);
1553 destroy_put_mac_bindings();
1554 destroy_send_garps();
1558 /* Implementation of the "put_arp" and "put_nd" OVN actions. These
1559 * actions send a packet to ovn-controller, using the flow as an API
1560 * (see actions.h for details). This code implements the actions by
1561 * updating the MAC_Binding table in the southbound database.
1563 * This code could be a lot simpler if the database could always be updated,
1564 * but in fact we can only update it when ctx->ovnsb_idl_txn is nonnull. Thus,
1565 * we buffer up a few put_mac_bindings (but we don't keep them longer
1566 * than 1 second) and apply them whenever a database transaction is
1569 /* Buffered "put_mac_binding" operation. */
1570 struct put_mac_binding
{
1571 struct hmap_node hmap_node
; /* In 'put_mac_bindings'. */
1573 long long int timestamp
; /* In milliseconds. */
1578 char ip_s
[INET6_ADDRSTRLEN
+ 1];
1581 struct eth_addr mac
;
1584 /* Contains "struct put_mac_binding"s. */
1585 static struct hmap put_mac_bindings
;
1588 init_put_mac_bindings(void)
1590 hmap_init(&put_mac_bindings
);
1594 destroy_put_mac_bindings(void)
1596 flush_put_mac_bindings();
1597 hmap_destroy(&put_mac_bindings
);
1600 static struct put_mac_binding
*
1601 pinctrl_find_put_mac_binding(uint32_t dp_key
, uint32_t port_key
,
1602 const char *ip_s
, uint32_t hash
)
1604 struct put_mac_binding
*pa
;
1605 HMAP_FOR_EACH_WITH_HASH (pa
, hmap_node
, hash
, &put_mac_bindings
) {
1606 if (pa
->dp_key
== dp_key
1607 && pa
->port_key
== port_key
1608 && !strcmp(pa
->ip_s
, ip_s
)) {
1616 pinctrl_handle_put_mac_binding(const struct flow
*md
,
1617 const struct flow
*headers
, bool is_arp
)
1619 uint32_t dp_key
= ntohll(md
->metadata
);
1620 uint32_t port_key
= md
->regs
[MFF_LOG_INPORT
- MFF_REG0
];
1621 char ip_s
[INET6_ADDRSTRLEN
];
1624 ovs_be32 ip
= htonl(md
->regs
[0]);
1625 inet_ntop(AF_INET
, &ip
, ip_s
, sizeof(ip_s
));
1627 ovs_be128 ip6
= hton128(flow_get_xxreg(md
, 0));
1628 inet_ntop(AF_INET6
, &ip6
, ip_s
, sizeof(ip_s
));
1630 uint32_t hash
= hash_string(ip_s
, hash_2words(dp_key
, port_key
));
1631 struct put_mac_binding
*pmb
1632 = pinctrl_find_put_mac_binding(dp_key
, port_key
, ip_s
, hash
);
1634 if (hmap_count(&put_mac_bindings
) >= 1000) {
1635 COVERAGE_INC(pinctrl_drop_put_mac_binding
);
1639 pmb
= xmalloc(sizeof *pmb
);
1640 hmap_insert(&put_mac_bindings
, &pmb
->hmap_node
, hash
);
1641 pmb
->dp_key
= dp_key
;
1642 pmb
->port_key
= port_key
;
1643 ovs_strlcpy_arrays(pmb
->ip_s
, ip_s
);
1645 pmb
->timestamp
= time_msec();
1646 pmb
->mac
= headers
->dl_src
;
1650 run_put_mac_binding(struct controller_ctx
*ctx
,
1651 const struct put_mac_binding
*pmb
)
1653 if (time_msec() > pmb
->timestamp
+ 1000) {
1657 /* Convert logical datapath and logical port key into lport. */
1658 const struct sbrec_port_binding
*pb
1659 = lport_lookup_by_key(ctx
->ovnsb_idl
, pmb
->dp_key
, pmb
->port_key
);
1661 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
1663 VLOG_WARN_RL(&rl
, "unknown logical port with datapath %"PRIu32
" "
1664 "and port %"PRIu32
, pmb
->dp_key
, pmb
->port_key
);
1668 /* Convert ethernet argument to string form for database. */
1669 char mac_string
[ETH_ADDR_STRLEN
+ 1];
1670 snprintf(mac_string
, sizeof mac_string
,
1671 ETH_ADDR_FMT
, ETH_ADDR_ARGS(pmb
->mac
));
1673 /* Check for an update an existing IP-MAC binding for this logical
1676 * XXX This is not very efficient. */
1677 const struct sbrec_mac_binding
*b
;
1678 SBREC_MAC_BINDING_FOR_EACH (b
, ctx
->ovnsb_idl
) {
1679 if (!strcmp(b
->logical_port
, pb
->logical_port
)
1680 && !strcmp(b
->ip
, pmb
->ip_s
)) {
1681 if (strcmp(b
->mac
, mac_string
)) {
1682 sbrec_mac_binding_set_mac(b
, mac_string
);
1688 /* Add new IP-MAC binding for this logical port. */
1689 b
= sbrec_mac_binding_insert(ctx
->ovnsb_idl_txn
);
1690 sbrec_mac_binding_set_logical_port(b
, pb
->logical_port
);
1691 sbrec_mac_binding_set_ip(b
, pmb
->ip_s
);
1692 sbrec_mac_binding_set_mac(b
, mac_string
);
1693 sbrec_mac_binding_set_datapath(b
, pb
->datapath
);
1697 run_put_mac_bindings(struct controller_ctx
*ctx
)
1699 if (!ctx
->ovnsb_idl_txn
) {
1703 const struct put_mac_binding
*pmb
;
1704 HMAP_FOR_EACH (pmb
, hmap_node
, &put_mac_bindings
) {
1705 run_put_mac_binding(ctx
, pmb
);
1707 flush_put_mac_bindings();
1711 wait_put_mac_bindings(struct controller_ctx
*ctx
)
1713 if (ctx
->ovnsb_idl_txn
&& !hmap_is_empty(&put_mac_bindings
)) {
1714 poll_immediate_wake();
1719 flush_put_mac_bindings(void)
1721 struct put_mac_binding
*pmb
;
1722 HMAP_FOR_EACH_POP (pmb
, hmap_node
, &put_mac_bindings
) {
1728 * Send gratuitous ARP for vif on localnet.
1730 * When a new vif on localnet is added, gratuitous ARPs are sent announcing
1731 * the port's mac,ip mapping. On localnet, such announcements are needed for
1732 * switches and routers on the broadcast segment to update their port-mac
1736 struct eth_addr ea
; /* Ethernet address of port. */
1737 ovs_be32 ipv4
; /* Ipv4 address of port. */
1738 long long int announce_time
; /* Next announcement in ms. */
1739 int backoff
; /* Backoff for the next announcement. */
1740 ofp_port_t ofport
; /* ofport used to output this GARP. */
1741 int tag
; /* VLAN tag of this GARP packet, or -1. */
1744 /* Contains GARPs to be sent. */
1745 static struct shash send_garp_data
;
1747 /* Next GARP announcement in ms. */
1748 static long long int send_garp_time
;
1751 init_send_garps(void)
1753 shash_init(&send_garp_data
);
1754 send_garp_time
= LLONG_MAX
;
1758 destroy_send_garps(void)
1760 shash_destroy_free_data(&send_garp_data
);
1764 add_garp(const char *name
, ofp_port_t ofport
, int tag
,
1765 const struct eth_addr ea
, ovs_be32 ip
)
1767 struct garp_data
*garp
= xmalloc(sizeof *garp
);
1770 garp
->announce_time
= time_msec() + 1000;
1772 garp
->ofport
= ofport
;
1774 shash_add(&send_garp_data
, name
, garp
);
1777 /* Add or update a vif for which GARPs need to be announced. */
1779 send_garp_update(const struct sbrec_port_binding
*binding_rec
,
1780 struct simap
*localnet_ofports
, struct hmap
*local_datapaths
,
1781 struct shash
*nat_addresses
)
1783 /* Find the localnet ofport to send this GARP. */
1784 struct local_datapath
*ld
1785 = get_local_datapath(local_datapaths
,
1786 binding_rec
->datapath
->tunnel_key
);
1787 if (!ld
|| !ld
->localnet_port
) {
1790 ofp_port_t ofport
= u16_to_ofp(simap_get(localnet_ofports
,
1791 ld
->localnet_port
->logical_port
));
1792 int tag
= ld
->localnet_port
->n_tag
? *ld
->localnet_port
->tag
: -1;
1794 volatile struct garp_data
*garp
= NULL
;
1795 /* Update GARP for NAT IP if it exists. Consider port bindings with type
1796 * "l3gateway" for logical switch ports attached to gateway routers, and
1797 * port bindings with type "patch" for logical switch ports attached to
1798 * distributed gateway ports. */
1799 if (!strcmp(binding_rec
->type
, "l3gateway")
1800 || !strcmp(binding_rec
->type
, "patch")) {
1801 struct lport_addresses
*laddrs
= NULL
;
1802 while ((laddrs
= shash_find_and_delete(nat_addresses
,
1803 binding_rec
->logical_port
))) {
1805 for (i
= 0; i
< laddrs
->n_ipv4_addrs
; i
++) {
1806 char *name
= xasprintf("%s-%s", binding_rec
->logical_port
,
1807 laddrs
->ipv4_addrs
[i
].addr_s
);
1808 garp
= shash_find_data(&send_garp_data
, name
);
1810 garp
->ofport
= ofport
;
1813 add_garp(name
, ofport
, tag
, laddrs
->ea
,
1814 laddrs
->ipv4_addrs
[i
].addr
);
1818 destroy_lport_addresses(laddrs
);
1824 /* Update GARP for vif if it exists. */
1825 garp
= shash_find_data(&send_garp_data
, binding_rec
->logical_port
);
1827 garp
->ofport
= ofport
;
1831 /* Add GARP for new vif. */
1833 for (i
= 0; i
< binding_rec
->n_mac
; i
++) {
1834 struct lport_addresses laddrs
;
1835 if (!extract_lsp_addresses(binding_rec
->mac
[i
], &laddrs
)
1836 || !laddrs
.n_ipv4_addrs
) {
1840 add_garp(binding_rec
->logical_port
, ofport
, tag
,
1841 laddrs
.ea
, laddrs
.ipv4_addrs
[0].addr
);
1843 destroy_lport_addresses(&laddrs
);
1848 /* Remove a vif from GARP announcements. */
1850 send_garp_delete(const char *lport
)
1852 struct garp_data
*garp
= shash_find_and_delete(&send_garp_data
, lport
);
1856 static long long int
1857 send_garp(struct garp_data
*garp
, long long int current_time
)
1859 if (current_time
< garp
->announce_time
) {
1860 return garp
->announce_time
;
1863 /* Compose a GARP request packet. */
1864 uint64_t packet_stub
[128 / 8];
1865 struct dp_packet packet
;
1866 dp_packet_use_stub(&packet
, packet_stub
, sizeof packet_stub
);
1867 compose_arp(&packet
, ARP_OP_REQUEST
, garp
->ea
, eth_addr_zero
,
1868 true, garp
->ipv4
, garp
->ipv4
);
1870 /* Compose a GARP request packet's vlan if exist. */
1871 if (garp
->tag
>= 0) {
1872 eth_push_vlan(&packet
, htons(ETH_TYPE_VLAN
), htons(garp
->tag
));
1875 /* Compose actions. The garp request is output on localnet ofport. */
1876 uint64_t ofpacts_stub
[4096 / 8];
1877 struct ofpbuf ofpacts
= OFPBUF_STUB_INITIALIZER(ofpacts_stub
);
1878 enum ofp_version version
= rconn_get_version(swconn
);
1879 ofpact_put_OUTPUT(&ofpacts
)->port
= garp
->ofport
;
1881 struct ofputil_packet_out po
= {
1882 .packet
= dp_packet_data(&packet
),
1883 .packet_len
= dp_packet_size(&packet
),
1884 .buffer_id
= UINT32_MAX
,
1885 .ofpacts
= ofpacts
.data
,
1886 .ofpacts_len
= ofpacts
.size
,
1888 match_set_in_port(&po
.flow_metadata
, OFPP_CONTROLLER
);
1889 enum ofputil_protocol proto
= ofputil_protocol_from_ofp_version(version
);
1890 queue_msg(ofputil_encode_packet_out(&po
, proto
));
1891 dp_packet_uninit(&packet
);
1892 ofpbuf_uninit(&ofpacts
);
1894 /* Set the next announcement. At most 5 announcements are sent for a
1896 if (garp
->backoff
< 16) {
1898 garp
->announce_time
= current_time
+ garp
->backoff
* 1000;
1900 garp
->announce_time
= LLONG_MAX
;
1902 return garp
->announce_time
;
1905 /* Get localnet vifs, local l3gw ports and ofport for localnet patch ports. */
1907 get_localnet_vifs_l3gwports(struct controller_ctx
*ctx
,
1908 const struct ovsrec_bridge
*br_int
,
1909 const struct sbrec_chassis
*chassis
,
1910 struct hmap
*local_datapaths
,
1911 struct sset
*localnet_vifs
,
1912 struct simap
*localnet_ofports
,
1913 struct sset
*local_l3gw_ports
)
1915 for (int i
= 0; i
< br_int
->n_ports
; i
++) {
1916 const struct ovsrec_port
*port_rec
= br_int
->ports
[i
];
1917 if (!strcmp(port_rec
->name
, br_int
->name
)) {
1920 const char *chassis_id
= smap_get(&port_rec
->external_ids
,
1922 if (chassis_id
&& !strcmp(chassis_id
, chassis
->name
)) {
1925 const char *localnet
= smap_get(&port_rec
->external_ids
,
1926 "ovn-localnet-port");
1927 for (int j
= 0; j
< port_rec
->n_interfaces
; j
++) {
1928 const struct ovsrec_interface
*iface_rec
= port_rec
->interfaces
[j
];
1929 if (!iface_rec
->n_ofport
) {
1932 /* Get localnet port with its ofport. */
1934 int64_t ofport
= iface_rec
->ofport
[0];
1935 if (ofport
< 1 || ofport
> ofp_to_u16(OFPP_MAX
)) {
1938 simap_put(localnet_ofports
, localnet
, ofport
);
1941 /* Get localnet vif. */
1942 const char *iface_id
= smap_get(&iface_rec
->external_ids
,
1947 const struct sbrec_port_binding
*pb
1948 = lport_lookup_by_name(ctx
->ovnsb_idl
, iface_id
);
1952 struct local_datapath
*ld
1953 = get_local_datapath(local_datapaths
,
1954 pb
->datapath
->tunnel_key
);
1955 if (ld
&& ld
->localnet_port
) {
1956 sset_add(localnet_vifs
, iface_id
);
1961 const struct local_datapath
*ld
;
1962 struct ovsdb_idl_index_cursor cursor
;
1963 struct sbrec_port_binding
*lpval
;
1964 lpval
= sbrec_port_binding_index_init_row(ctx
->ovnsb_idl
,
1965 &sbrec_table_port_binding
);
1966 ovsdb_idl_initialize_cursor(ctx
->ovnsb_idl
, &sbrec_table_port_binding
,
1967 "lport-by-datapath", &cursor
);
1968 HMAP_FOR_EACH (ld
, hmap_node
, local_datapaths
) {
1969 const struct sbrec_port_binding
*pb
;
1971 if (!ld
->localnet_port
) {
1975 /* Get l3gw ports. Consider port bindings with type "l3gateway"
1976 * that connect to gateway routers (if local), and consider port
1977 * bindings of type "patch" since they might connect to
1978 * distributed gateway ports with NAT addresses. */
1980 sbrec_port_binding_index_set_datapath(lpval
, ld
->datapath
);
1982 SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb
, &cursor
, lpval
) {
1983 if ((ld
->has_local_l3gateway
&& !strcmp(pb
->type
, "l3gateway"))
1984 || !strcmp(pb
->type
, "patch")) {
1985 sset_add(local_l3gw_ports
, pb
->logical_port
);
1989 sbrec_port_binding_index_destroy_row(lpval
);
1993 pinctrl_is_chassis_resident(struct controller_ctx
*ctx
,
1994 const struct sbrec_chassis
*chassis
,
1995 const struct chassis_index
*chassis_index
,
1996 struct sset
*active_tunnels
,
1997 const char *port_name
)
1999 const struct sbrec_port_binding
*pb
2000 = lport_lookup_by_name(ctx
->ovnsb_idl
, port_name
);
2001 if (!pb
|| !pb
->chassis
) {
2004 if (strcmp(pb
->type
, "chassisredirect")) {
2005 return pb
->chassis
== chassis
;
2007 struct ovs_list
*gateway_chassis
=
2008 gateway_chassis_get_ordered(pb
, chassis_index
);
2009 bool active
= gateway_chassis_is_active(gateway_chassis
,
2012 gateway_chassis_destroy(gateway_chassis
);
2017 /* Extracts the mac, IPv4 and IPv6 addresses, and logical port from
2018 * 'addresses' which should be of the format 'MAC [IP1 IP2 ..]
2019 * [is_chassis_resident("LPORT_NAME")]', where IPn should be a valid IPv4
2020 * or IPv6 address, and stores them in the 'ipv4_addrs' and 'ipv6_addrs'
2021 * fields of 'laddrs'. The logical port name is stored in 'lport'.
2023 * Returns true if at least 'MAC' is found in 'address', false otherwise.
2025 * The caller must call destroy_lport_addresses() and free(*lport). */
2027 extract_addresses_with_port(const char *addresses
,
2028 struct lport_addresses
*laddrs
,
2032 if (!extract_addresses(addresses
, laddrs
, &ofs
)) {
2034 } else if (ofs
>= strlen(addresses
)) {
2039 lexer_init(&lexer
, addresses
+ ofs
);
2042 if (lexer
.error
|| lexer
.token
.type
!= LEX_T_ID
2043 || !lexer_match_id(&lexer
, "is_chassis_resident")) {
2044 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
2045 VLOG_INFO_RL(&rl
, "invalid syntax '%s' in address", addresses
);
2046 lexer_destroy(&lexer
);
2050 if (!lexer_match(&lexer
, LEX_T_LPAREN
)) {
2051 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
2052 VLOG_INFO_RL(&rl
, "Syntax error: expecting '(' after "
2053 "'is_chassis_resident' in address '%s'", addresses
);
2054 lexer_destroy(&lexer
);
2058 if (lexer
.token
.type
!= LEX_T_STRING
) {
2059 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
2061 "Syntax error: expecting quoted string after"
2062 " 'is_chassis_resident' in address '%s'", addresses
);
2063 lexer_destroy(&lexer
);
2067 *lport
= xstrdup(lexer
.token
.s
);
2070 if (!lexer_match(&lexer
, LEX_T_RPAREN
)) {
2071 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
2072 VLOG_INFO_RL(&rl
, "Syntax error: expecting ')' after quoted string in "
2073 "'is_chassis_resident()' in address '%s'",
2075 lexer_destroy(&lexer
);
2079 lexer_destroy(&lexer
);
2084 consider_nat_address(struct controller_ctx
*ctx
,
2085 const char *nat_address
,
2086 const struct sbrec_port_binding
*pb
,
2087 struct sset
*nat_address_keys
,
2088 const struct sbrec_chassis
*chassis
,
2089 const struct chassis_index
*chassis_index
,
2090 struct sset
*active_tunnels
,
2091 struct shash
*nat_addresses
)
2093 struct lport_addresses
*laddrs
= xmalloc(sizeof *laddrs
);
2095 if (!extract_addresses_with_port(nat_address
, laddrs
, &lport
)
2096 || (!lport
&& !strcmp(pb
->type
, "patch"))
2097 || (lport
&& !pinctrl_is_chassis_resident(
2098 ctx
, chassis
, chassis_index
, active_tunnels
, lport
))) {
2099 destroy_lport_addresses(laddrs
);
2107 for (i
= 0; i
< laddrs
->n_ipv4_addrs
; i
++) {
2108 char *name
= xasprintf("%s-%s", pb
->logical_port
,
2109 laddrs
->ipv4_addrs
[i
].addr_s
);
2110 sset_add(nat_address_keys
, name
);
2113 shash_add(nat_addresses
, pb
->logical_port
, laddrs
);
2117 get_nat_addresses_and_keys(struct controller_ctx
*ctx
,
2118 struct sset
*nat_address_keys
,
2119 struct sset
*local_l3gw_ports
,
2120 const struct sbrec_chassis
*chassis
,
2121 const struct chassis_index
*chassis_index
,
2122 struct sset
*active_tunnels
,
2123 struct shash
*nat_addresses
)
2125 const char *gw_port
;
2126 SSET_FOR_EACH(gw_port
, local_l3gw_ports
) {
2127 const struct sbrec_port_binding
*pb
;
2129 pb
= lport_lookup_by_name(ctx
->ovnsb_idl
, gw_port
);
2134 if (pb
->n_nat_addresses
) {
2135 for (int i
= 0; i
< pb
->n_nat_addresses
; i
++) {
2136 consider_nat_address(ctx
, pb
->nat_addresses
[i
], pb
,
2137 nat_address_keys
, chassis
,
2138 chassis_index
, active_tunnels
,
2142 /* Continue to support options:nat-addresses for version
2144 const char *nat_addresses_options
= smap_get(&pb
->options
,
2146 if (nat_addresses_options
) {
2147 consider_nat_address(ctx
, nat_addresses_options
, pb
,
2148 nat_address_keys
, chassis
,
2149 chassis_index
, active_tunnels
,
2157 send_garp_wait(void)
2159 poll_timer_wait_until(send_garp_time
);
2163 send_garp_run(struct controller_ctx
*ctx
,
2164 const struct ovsrec_bridge
*br_int
,
2165 const struct sbrec_chassis
*chassis
,
2166 const struct chassis_index
*chassis_index
,
2167 struct hmap
*local_datapaths
,
2168 struct sset
*active_tunnels
)
2170 struct sset localnet_vifs
= SSET_INITIALIZER(&localnet_vifs
);
2171 struct sset local_l3gw_ports
= SSET_INITIALIZER(&local_l3gw_ports
);
2172 struct sset nat_ip_keys
= SSET_INITIALIZER(&nat_ip_keys
);
2173 struct simap localnet_ofports
= SIMAP_INITIALIZER(&localnet_ofports
);
2174 struct shash nat_addresses
;
2176 shash_init(&nat_addresses
);
2178 get_localnet_vifs_l3gwports(ctx
, br_int
, chassis
, local_datapaths
,
2179 &localnet_vifs
, &localnet_ofports
, &local_l3gw_ports
);
2181 get_nat_addresses_and_keys(ctx
, &nat_ip_keys
, &local_l3gw_ports
,
2182 chassis
, chassis_index
, active_tunnels
,
2184 /* For deleted ports and deleted nat ips, remove from send_garp_data. */
2185 struct shash_node
*iter
, *next
;
2186 SHASH_FOR_EACH_SAFE (iter
, next
, &send_garp_data
) {
2187 if (!sset_contains(&localnet_vifs
, iter
->name
) &&
2188 !sset_contains(&nat_ip_keys
, iter
->name
)) {
2189 send_garp_delete(iter
->name
);
2193 /* Update send_garp_data. */
2194 const char *iface_id
;
2195 SSET_FOR_EACH (iface_id
, &localnet_vifs
) {
2196 const struct sbrec_port_binding
*pb
;
2198 pb
= lport_lookup_by_name(ctx
->ovnsb_idl
, iface_id
);
2200 send_garp_update(pb
, &localnet_ofports
, local_datapaths
,
2205 /* Update send_garp_data for nat-addresses. */
2206 const char *gw_port
;
2207 SSET_FOR_EACH (gw_port
, &local_l3gw_ports
) {
2208 const struct sbrec_port_binding
*pb
;
2210 pb
= lport_lookup_by_name(ctx
->ovnsb_idl
, gw_port
);
2212 send_garp_update(pb
, &localnet_ofports
, local_datapaths
,
2217 /* Send GARPs, and update the next announcement. */
2218 long long int current_time
= time_msec();
2219 send_garp_time
= LLONG_MAX
;
2220 SHASH_FOR_EACH (iter
, &send_garp_data
) {
2221 long long int next_announce
= send_garp(iter
->data
, current_time
);
2222 if (send_garp_time
> next_announce
) {
2223 send_garp_time
= next_announce
;
2226 sset_destroy(&localnet_vifs
);
2227 sset_destroy(&local_l3gw_ports
);
2228 simap_destroy(&localnet_ofports
);
2230 SHASH_FOR_EACH_SAFE (iter
, next
, &nat_addresses
) {
2231 struct lport_addresses
*laddrs
= iter
->data
;
2232 destroy_lport_addresses(laddrs
);
2233 shash_delete(&nat_addresses
, iter
);
2236 shash_destroy(&nat_addresses
);
2238 sset_destroy(&nat_ip_keys
);
2242 reload_metadata(struct ofpbuf
*ofpacts
, const struct match
*md
)
2244 enum mf_field_id md_fields
[] = {
2245 #if FLOW_N_REGS == 16
2267 for (size_t i
= 0; i
< ARRAY_SIZE(md_fields
); i
++) {
2268 const struct mf_field
*field
= mf_from_id(md_fields
[i
]);
2269 if (!mf_is_all_wild(field
, &md
->wc
)) {
2270 union mf_value value
;
2271 mf_get_value(field
, &md
->flow
, &value
);
2272 ofpact_put_set_field(ofpacts
, field
, &value
, NULL
);
2278 pinctrl_handle_nd_na(const struct flow
*ip_flow
, const struct match
*md
,
2279 struct ofpbuf
*userdata
)
2281 /* This action only works for IPv6 ND packets, and the switch should only
2282 * send us ND packets this way, but check here just to be sure. */
2283 if (!is_nd(ip_flow
, NULL
)) {
2284 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
2285 VLOG_WARN_RL(&rl
, "NA action on non-ND packet");
2289 uint64_t packet_stub
[128 / 8];
2290 struct dp_packet packet
;
2291 dp_packet_use_stub(&packet
, packet_stub
, sizeof packet_stub
);
2293 /* xxx These flags are not exactly correct. Look at section 7.2.4
2294 * xxx of RFC 4861. For example, we need to set ND_RSO_ROUTER for
2295 * xxx router's interfaces and ND_RSO_SOLICITED only if it was
2297 compose_nd_na(&packet
, ip_flow
->dl_dst
, ip_flow
->dl_src
,
2298 &ip_flow
->nd_target
, &ip_flow
->ipv6_src
,
2299 htonl(ND_RSO_SOLICITED
| ND_RSO_OVERRIDE
));
2301 /* Reload previous packet metadata and set actions from userdata. */
2302 set_actions_and_enqueue_msg(&packet
, md
, userdata
);
2303 dp_packet_uninit(&packet
);
2307 pinctrl_handle_nd_ns(const struct flow
*ip_flow
, const struct match
*md
,
2308 struct ofpbuf
*userdata
)
2310 /* This action only works for IPv6 packets. */
2311 if (get_dl_type(ip_flow
) != htons(ETH_TYPE_IPV6
)) {
2312 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
2313 VLOG_WARN_RL(&rl
, "NS action on non-IPv6 packet");
2317 uint64_t packet_stub
[128 / 8];
2318 struct dp_packet packet
;
2319 dp_packet_use_stub(&packet
, packet_stub
, sizeof packet_stub
);
2321 compose_nd_ns(&packet
, ip_flow
->dl_src
, &ip_flow
->ipv6_src
,
2322 &ip_flow
->ipv6_dst
);
2324 /* Reload previous packet metadata and set actions from userdata. */
2325 set_actions_and_enqueue_msg(&packet
, md
, userdata
);
2326 dp_packet_uninit(&packet
);
2330 pinctrl_handle_put_nd_ra_opts(
2331 const struct flow
*in_flow
, struct dp_packet
*pkt_in
,
2332 struct ofputil_packet_in
*pin
, struct ofpbuf
*userdata
,
2333 struct ofpbuf
*continuation
)
2335 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
2336 enum ofp_version version
= rconn_get_version(swconn
);
2337 enum ofputil_protocol proto
= ofputil_protocol_from_ofp_version(version
);
2338 struct dp_packet
*pkt_out_ptr
= NULL
;
2339 uint32_t success
= 0;
2341 /* Parse result field. */
2342 const struct mf_field
*f
;
2343 enum ofperr ofperr
= nx_pull_header(userdata
, NULL
, &f
, NULL
);
2345 VLOG_WARN_RL(&rl
, "bad result OXM (%s)", ofperr_to_string(ofperr
));
2349 /* Parse result offset. */
2350 ovs_be32
*ofsp
= ofpbuf_try_pull(userdata
, sizeof *ofsp
);
2352 VLOG_WARN_RL(&rl
, "offset not present in the userdata");
2356 /* Check that the result is valid and writable. */
2357 struct mf_subfield dst
= { .field
= f
, .ofs
= ntohl(*ofsp
), .n_bits
= 1 };
2358 ofperr
= mf_check_dst(&dst
, NULL
);
2360 VLOG_WARN_RL(&rl
, "bad result bit (%s)", ofperr_to_string(ofperr
));
2364 if (!userdata
->size
) {
2365 VLOG_WARN_RL(&rl
, "IPv6 ND RA options not present in the userdata");
2369 if (!is_icmpv6(in_flow
, NULL
) || in_flow
->tp_dst
!= htons(0) ||
2370 in_flow
->tp_src
!= htons(ND_ROUTER_SOLICIT
)) {
2371 VLOG_WARN_RL(&rl
, "put_nd_ra action on invalid or unsupported packet");
2375 size_t new_packet_size
= pkt_in
->l4_ofs
+ userdata
->size
;
2376 struct dp_packet pkt_out
;
2377 dp_packet_init(&pkt_out
, new_packet_size
);
2378 dp_packet_clear(&pkt_out
);
2379 dp_packet_prealloc_tailroom(&pkt_out
, new_packet_size
);
2380 pkt_out_ptr
= &pkt_out
;
2382 /* Copy L2 and L3 headers from pkt_in. */
2383 dp_packet_put(&pkt_out
, dp_packet_pull(pkt_in
, pkt_in
->l4_ofs
),
2386 pkt_out
.l2_5_ofs
= pkt_in
->l2_5_ofs
;
2387 pkt_out
.l2_pad_size
= pkt_in
->l2_pad_size
;
2388 pkt_out
.l3_ofs
= pkt_in
->l3_ofs
;
2389 pkt_out
.l4_ofs
= pkt_in
->l4_ofs
;
2391 /* Copy the ICMPv6 Router Advertisement data from 'userdata' field. */
2392 dp_packet_put(&pkt_out
, userdata
->data
, userdata
->size
);
2394 /* Set the IPv6 payload length and calculate the ICMPv6 checksum. */
2395 struct ovs_16aligned_ip6_hdr
*nh
= dp_packet_l3(&pkt_out
);
2396 nh
->ip6_plen
= htons(userdata
->size
);
2397 struct ovs_ra_msg
*ra
= dp_packet_l4(&pkt_out
);
2398 ra
->icmph
.icmp6_cksum
= 0;
2399 uint32_t icmp_csum
= packet_csum_pseudoheader6(nh
);
2400 ra
->icmph
.icmp6_cksum
= csum_finish(csum_continue(
2401 icmp_csum
, ra
, userdata
->size
));
2402 pin
->packet
= dp_packet_data(&pkt_out
);
2403 pin
->packet_len
= dp_packet_size(&pkt_out
);
2408 union mf_subvalue sv
;
2409 sv
.u8_val
= success
;
2410 mf_write_subfield(&dst
, &sv
, &pin
->flow_metadata
);
2412 queue_msg(ofputil_encode_resume(pin
, continuation
, proto
));
2413 dp_packet_uninit(pkt_out_ptr
);