1 /* Copyright (c) 2015, 2016, 2017 Red Hat, Inc.
2 * Copyright (c) 2017 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
24 #include "dp-packet.h"
29 #include "ovn-controller.h"
30 #include "lib/packets.h"
32 #include "openvswitch/ofp-actions.h"
33 #include "openvswitch/ofp-msgs.h"
34 #include "openvswitch/ofp-packet.h"
35 #include "openvswitch/ofp-print.h"
36 #include "openvswitch/ofp-switch.h"
37 #include "openvswitch/ofp-util.h"
38 #include "openvswitch/vlog.h"
41 #include "ovn-controller.h"
42 #include "ovn/actions.h"
44 #include "ovn/lib/acl-log.h"
45 #include "ovn/lib/logical-fields.h"
46 #include "ovn/lib/ovn-l7.h"
47 #include "ovn/lib/ovn-util.h"
48 #include "openvswitch/poll-loop.h"
49 #include "openvswitch/rconn.h"
50 #include "socket-util.h"
52 #include "vswitch-idl.h"
55 VLOG_DEFINE_THIS_MODULE(pinctrl
);
57 /* OpenFlow connection to the switch. */
58 static struct rconn
*swconn
;
60 /* Last seen sequence number for 'swconn'. When this differs from
61 * rconn_get_connection_seqno(rconn), 'swconn' has reconnected. */
62 static unsigned int conn_seq_no
;
64 static void pinctrl_handle_put_mac_binding(const struct flow
*md
,
65 const struct flow
*headers
,
67 static void init_put_mac_bindings(void);
68 static void destroy_put_mac_bindings(void);
69 static void run_put_mac_bindings(struct controller_ctx
*);
70 static void wait_put_mac_bindings(struct controller_ctx
*);
71 static void flush_put_mac_bindings(void);
73 static void init_send_garps(void);
74 static void destroy_send_garps(void);
75 static void send_garp_wait(void);
76 static void send_garp_run(struct controller_ctx
*ctx
,
77 const struct ovsrec_bridge
*,
78 const struct sbrec_chassis
*,
79 const struct chassis_index
*chassis_index
,
80 struct hmap
*local_datapaths
,
81 struct sset
*active_tunnels
);
82 static void pinctrl_handle_nd_na(const struct flow
*ip_flow
,
83 const struct match
*md
,
84 struct ofpbuf
*userdata
);
85 static void reload_metadata(struct ofpbuf
*ofpacts
,
86 const struct match
*md
);
87 static void pinctrl_handle_put_nd_ra_opts(
88 const struct flow
*ip_flow
, struct dp_packet
*pkt_in
,
89 struct ofputil_packet_in
*pin
, struct ofpbuf
*userdata
,
90 struct ofpbuf
*continuation
);
91 static void pinctrl_handle_nd_ns(const struct flow
*ip_flow
,
92 const struct match
*md
,
93 struct ofpbuf
*userdata
);
94 static void init_ipv6_ras(void);
95 static void destroy_ipv6_ras(void);
96 static void ipv6_ra_wait(void);
97 static void send_ipv6_ras(const struct controller_ctx
*,
98 struct hmap
*local_datapaths
);
100 COVERAGE_DEFINE(pinctrl_drop_put_mac_binding
);
105 swconn
= rconn_create(5, 0, DSCP_DEFAULT
, 1 << OFP13_VERSION
);
107 init_put_mac_bindings();
113 queue_msg(struct ofpbuf
*msg
)
115 const struct ofp_header
*oh
= msg
->data
;
116 ovs_be32 xid
= oh
->xid
;
118 rconn_send(swconn
, msg
, NULL
);
122 /* Sets up global 'swconn', a newly (re)connected connection to a switch. */
126 /* Fetch the switch configuration. The response later will allow us to
127 * change the miss_send_len to UINT16_MAX, so that we can enable
128 * asynchronous messages. */
129 queue_msg(ofpraw_alloc(OFPRAW_OFPT_GET_CONFIG_REQUEST
,
130 rconn_get_version(swconn
), 0));
132 /* Set a packet-in format that supports userdata. */
133 queue_msg(ofputil_encode_set_packet_in_format(rconn_get_version(swconn
),
134 OFPUTIL_PACKET_IN_NXT2
));
138 set_switch_config(struct rconn
*swconn_
,
139 const struct ofputil_switch_config
*config
)
141 enum ofp_version version
= rconn_get_version(swconn_
);
142 struct ofpbuf
*request
= ofputil_encode_set_config(config
, version
);
147 set_actions_and_enqueue_msg(const struct dp_packet
*packet
,
148 const struct match
*md
,
149 struct ofpbuf
*userdata
)
151 /* Copy metadata from 'md' into the packet-out via "set_field"
152 * actions, then add actions from 'userdata'.
154 uint64_t ofpacts_stub
[4096 / 8];
155 struct ofpbuf ofpacts
= OFPBUF_STUB_INITIALIZER(ofpacts_stub
);
156 enum ofp_version version
= rconn_get_version(swconn
);
158 reload_metadata(&ofpacts
, md
);
159 enum ofperr error
= ofpacts_pull_openflow_actions(userdata
, userdata
->size
,
163 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
164 VLOG_WARN_RL(&rl
, "failed to parse actions from userdata (%s)",
165 ofperr_to_string(error
));
166 ofpbuf_uninit(&ofpacts
);
170 struct ofputil_packet_out po
= {
171 .packet
= dp_packet_data(packet
),
172 .packet_len
= dp_packet_size(packet
),
173 .buffer_id
= UINT32_MAX
,
174 .ofpacts
= ofpacts
.data
,
175 .ofpacts_len
= ofpacts
.size
,
177 match_set_in_port(&po
.flow_metadata
, OFPP_CONTROLLER
);
178 enum ofputil_protocol proto
= ofputil_protocol_from_ofp_version(version
);
179 queue_msg(ofputil_encode_packet_out(&po
, proto
));
180 ofpbuf_uninit(&ofpacts
);
184 pinctrl_handle_arp(const struct flow
*ip_flow
, const struct match
*md
,
185 struct ofpbuf
*userdata
)
187 /* This action only works for IP packets, and the switch should only send
188 * us IP packets this way, but check here just to be sure. */
189 if (ip_flow
->dl_type
!= htons(ETH_TYPE_IP
)) {
190 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
191 VLOG_WARN_RL(&rl
, "ARP action on non-IP packet (Ethertype %"PRIx16
")",
192 ntohs(ip_flow
->dl_type
));
196 /* Compose an ARP packet. */
197 uint64_t packet_stub
[128 / 8];
198 struct dp_packet packet
;
199 dp_packet_use_stub(&packet
, packet_stub
, sizeof packet_stub
);
200 compose_arp__(&packet
);
202 struct eth_header
*eth
= dp_packet_eth(&packet
);
203 eth
->eth_dst
= ip_flow
->dl_dst
;
204 eth
->eth_src
= ip_flow
->dl_src
;
206 struct arp_eth_header
*arp
= dp_packet_l3(&packet
);
207 arp
->ar_op
= htons(ARP_OP_REQUEST
);
208 arp
->ar_sha
= ip_flow
->dl_src
;
209 put_16aligned_be32(&arp
->ar_spa
, ip_flow
->nw_src
);
210 arp
->ar_tha
= eth_addr_zero
;
211 put_16aligned_be32(&arp
->ar_tpa
, ip_flow
->nw_dst
);
213 if (ip_flow
->vlans
[0].tci
& htons(VLAN_CFI
)) {
214 eth_push_vlan(&packet
, htons(ETH_TYPE_VLAN_8021Q
),
215 ip_flow
->vlans
[0].tci
);
218 set_actions_and_enqueue_msg(&packet
, md
, userdata
);
219 dp_packet_uninit(&packet
);
223 pinctrl_handle_icmp(const struct flow
*ip_flow
, struct dp_packet
*pkt_in
,
224 const struct match
*md
, struct ofpbuf
*userdata
)
226 /* This action only works for IP packets, and the switch should only send
227 * us IP packets this way, but check here just to be sure. */
228 if (ip_flow
->dl_type
!= htons(ETH_TYPE_IP
) &&
229 ip_flow
->dl_type
!= htons(ETH_TYPE_IPV6
)) {
230 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
232 "ICMP action on non-IP packet (eth_type 0x%"PRIx16
")",
233 ntohs(ip_flow
->dl_type
));
237 uint64_t packet_stub
[128 / 8];
238 struct dp_packet packet
;
240 dp_packet_use_stub(&packet
, packet_stub
, sizeof packet_stub
);
241 dp_packet_clear(&packet
);
242 packet
.packet_type
= htonl(PT_ETH
);
244 struct eth_header
*eh
= dp_packet_put_zeros(&packet
, sizeof *eh
);
245 eh
->eth_dst
= ip_flow
->dl_dst
;
246 eh
->eth_src
= ip_flow
->dl_src
;
248 if (get_dl_type(ip_flow
) == htons(ETH_TYPE_IP
)) {
249 struct ip_header
*nh
= dp_packet_put_zeros(&packet
, sizeof *nh
);
251 eh
->eth_type
= htons(ETH_TYPE_IP
);
252 dp_packet_set_l3(&packet
, nh
);
253 nh
->ip_ihl_ver
= IP_IHL_VER(5, 4);
254 nh
->ip_tot_len
= htons(sizeof(struct ip_header
) +
255 sizeof(struct icmp_header
));
256 nh
->ip_proto
= IPPROTO_ICMP
;
257 nh
->ip_frag_off
= htons(IP_DF
);
258 packet_set_ipv4(&packet
, ip_flow
->nw_src
, ip_flow
->nw_dst
,
259 ip_flow
->nw_tos
, 255);
261 struct icmp_header
*ih
= dp_packet_put_zeros(&packet
, sizeof *ih
);
262 dp_packet_set_l4(&packet
, ih
);
263 packet_set_icmp(&packet
, ICMP4_DST_UNREACH
, 1);
265 struct ip6_hdr
*nh
= dp_packet_put_zeros(&packet
, sizeof *nh
);
266 struct icmp6_error_header
*ih
;
267 uint32_t icmpv6_csum
;
269 eh
->eth_type
= htons(ETH_TYPE_IPV6
);
270 dp_packet_set_l3(&packet
, nh
);
272 nh
->ip6_nxt
= IPPROTO_ICMPV6
;
273 nh
->ip6_plen
= htons(sizeof(*nh
) + ICMP6_ERROR_HEADER_LEN
);
274 packet_set_ipv6(&packet
, &ip_flow
->ipv6_src
, &ip_flow
->ipv6_dst
,
275 ip_flow
->nw_tos
, ip_flow
->ipv6_label
, 255);
277 ih
= dp_packet_put_zeros(&packet
, sizeof *ih
);
278 dp_packet_set_l4(&packet
, ih
);
279 ih
->icmp6_base
.icmp6_type
= ICMP6_DST_UNREACH
;
280 ih
->icmp6_base
.icmp6_code
= 1;
281 ih
->icmp6_base
.icmp6_cksum
= 0;
283 uint8_t *data
= dp_packet_put_zeros(&packet
, sizeof *nh
);
284 memcpy(data
, dp_packet_l3(pkt_in
), sizeof(*nh
));
286 icmpv6_csum
= packet_csum_pseudoheader6(dp_packet_l3(&packet
));
287 ih
->icmp6_base
.icmp6_cksum
= csum_finish(
288 csum_continue(icmpv6_csum
, ih
,
289 sizeof(*nh
) + ICMP6_ERROR_HEADER_LEN
));
292 if (ip_flow
->vlans
[0].tci
& htons(VLAN_CFI
)) {
293 eth_push_vlan(&packet
, htons(ETH_TYPE_VLAN_8021Q
),
294 ip_flow
->vlans
[0].tci
);
297 set_actions_and_enqueue_msg(&packet
, md
, userdata
);
298 dp_packet_uninit(&packet
);
302 pinctrl_handle_tcp_reset(const struct flow
*ip_flow
, struct dp_packet
*pkt_in
,
303 const struct match
*md
, struct ofpbuf
*userdata
)
305 /* This action only works for TCP segments, and the switch should only send
306 * us TCP segments this way, but check here just to be sure. */
307 if (ip_flow
->nw_proto
!= IPPROTO_TCP
) {
308 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
309 VLOG_WARN_RL(&rl
, "TCP_RESET action on non-TCP packet");
313 uint64_t packet_stub
[128 / 8];
314 struct dp_packet packet
;
316 dp_packet_use_stub(&packet
, packet_stub
, sizeof packet_stub
);
317 dp_packet_clear(&packet
);
318 packet
.packet_type
= htonl(PT_ETH
);
320 struct eth_header
*eh
= dp_packet_put_zeros(&packet
, sizeof *eh
);
321 eh
->eth_dst
= ip_flow
->dl_dst
;
322 eh
->eth_src
= ip_flow
->dl_src
;
324 if (get_dl_type(ip_flow
) == htons(ETH_TYPE_IPV6
)) {
325 struct ip6_hdr
*nh
= dp_packet_put_zeros(&packet
, sizeof *nh
);
327 eh
->eth_type
= htons(ETH_TYPE_IPV6
);
328 dp_packet_set_l3(&packet
, nh
);
330 nh
->ip6_nxt
= IPPROTO_TCP
;
331 nh
->ip6_plen
= htons(TCP_HEADER_LEN
);
332 packet_set_ipv6(&packet
, &ip_flow
->ipv6_src
, &ip_flow
->ipv6_dst
,
333 ip_flow
->nw_tos
, ip_flow
->ipv6_label
, 255);
335 struct ip_header
*nh
= dp_packet_put_zeros(&packet
, sizeof *nh
);
337 eh
->eth_type
= htons(ETH_TYPE_IP
);
338 dp_packet_set_l3(&packet
, nh
);
339 nh
->ip_ihl_ver
= IP_IHL_VER(5, 4);
340 nh
->ip_tot_len
= htons(IP_HEADER_LEN
+ TCP_HEADER_LEN
);
341 nh
->ip_proto
= IPPROTO_TCP
;
342 nh
->ip_frag_off
= htons(IP_DF
);
343 packet_set_ipv4(&packet
, ip_flow
->nw_src
, ip_flow
->nw_dst
,
344 ip_flow
->nw_tos
, 255);
347 struct tcp_header
*th
= dp_packet_put_zeros(&packet
, sizeof *th
);
348 struct tcp_header
*tcp_in
= dp_packet_l4(pkt_in
);
349 dp_packet_set_l4(&packet
, th
);
350 th
->tcp_ctl
= TCP_CTL(TCP_RST
, 5);
351 if (ip_flow
->tcp_flags
& htons(TCP_ACK
)) {
352 th
->tcp_seq
= tcp_in
->tcp_ack
;
354 uint32_t tcp_seq
, ack_seq
, tcp_len
;
356 tcp_seq
= ntohl(get_16aligned_be32(&tcp_in
->tcp_seq
));
357 tcp_len
= TCP_OFFSET(tcp_in
->tcp_ctl
) * 4;
358 ack_seq
= tcp_seq
+ dp_packet_l4_size(pkt_in
) - tcp_len
;
359 put_16aligned_be32(&th
->tcp_ack
, htonl(ack_seq
));
360 put_16aligned_be32(&th
->tcp_seq
, 0);
362 packet_set_tcp_port(&packet
, ip_flow
->tp_dst
, ip_flow
->tp_src
);
364 if (ip_flow
->vlans
[0].tci
& htons(VLAN_CFI
)) {
365 eth_push_vlan(&packet
, htons(ETH_TYPE_VLAN_8021Q
),
366 ip_flow
->vlans
[0].tci
);
369 set_actions_and_enqueue_msg(&packet
, md
, userdata
);
370 dp_packet_uninit(&packet
);
374 pinctrl_handle_put_dhcp_opts(
375 struct dp_packet
*pkt_in
, struct ofputil_packet_in
*pin
,
376 struct ofpbuf
*userdata
, struct ofpbuf
*continuation
)
378 enum ofp_version version
= rconn_get_version(swconn
);
379 enum ofputil_protocol proto
= ofputil_protocol_from_ofp_version(version
);
380 struct dp_packet
*pkt_out_ptr
= NULL
;
381 uint32_t success
= 0;
383 /* Parse result field. */
384 const struct mf_field
*f
;
385 enum ofperr ofperr
= nx_pull_header(userdata
, NULL
, &f
, NULL
);
387 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
388 VLOG_WARN_RL(&rl
, "bad result OXM (%s)", ofperr_to_string(ofperr
));
392 /* Parse result offset and offer IP. */
393 ovs_be32
*ofsp
= ofpbuf_try_pull(userdata
, sizeof *ofsp
);
394 ovs_be32
*offer_ip
= ofpbuf_try_pull(userdata
, sizeof *offer_ip
);
395 if (!ofsp
|| !offer_ip
) {
396 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
397 VLOG_WARN_RL(&rl
, "offset or offer_ip not present in the userdata");
401 /* Check that the result is valid and writable. */
402 struct mf_subfield dst
= { .field
= f
, .ofs
= ntohl(*ofsp
), .n_bits
= 1 };
403 ofperr
= mf_check_dst(&dst
, NULL
);
405 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
406 VLOG_WARN_RL(&rl
, "bad result bit (%s)", ofperr_to_string(ofperr
));
410 if (!userdata
->size
) {
411 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
412 VLOG_WARN_RL(&rl
, "DHCP options not present in the userdata");
416 /* Validate the DHCP request packet.
417 * Format of the DHCP packet is
418 * ------------------------------------------------------------------------
419 *| UDP HEADER | DHCP HEADER | 4 Byte DHCP Cookie | DHCP OPTIONS(var len)|
420 * ------------------------------------------------------------------------
422 if (dp_packet_l4_size(pkt_in
) < (UDP_HEADER_LEN
+
423 sizeof (struct dhcp_header
) + sizeof(uint32_t) + 3)) {
424 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
425 VLOG_WARN_RL(&rl
, "Invalid or incomplete DHCP packet recieved");
429 struct dhcp_header
const *in_dhcp_data
= dp_packet_get_udp_payload(pkt_in
);
430 if (in_dhcp_data
->op
!= DHCP_OP_REQUEST
) {
431 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
432 VLOG_WARN_RL(&rl
, "Invalid opcode in the DHCP packet : %d",
437 /* DHCP options follow the DHCP header. The first 4 bytes of the DHCP
438 * options is the DHCP magic cookie followed by the actual DHCP options.
440 const uint8_t *in_dhcp_opt
=
441 (const uint8_t *)dp_packet_get_udp_payload(pkt_in
) +
442 sizeof (struct dhcp_header
);
444 ovs_be32 magic_cookie
= htonl(DHCP_MAGIC_COOKIE
);
445 if (memcmp(in_dhcp_opt
, &magic_cookie
, sizeof(ovs_be32
))) {
446 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
447 VLOG_WARN_RL(&rl
, "DHCP magic cookie not present in the DHCP packet");
452 /* Check that the DHCP Message Type (opt 53) is present or not with
453 * valid values - DHCP_MSG_DISCOVER or DHCP_MSG_REQUEST as the first
456 if (!(in_dhcp_opt
[0] == DHCP_OPT_MSG_TYPE
&& in_dhcp_opt
[1] == 1 && (
457 in_dhcp_opt
[2] == DHCP_MSG_DISCOVER
||
458 in_dhcp_opt
[2] == DHCP_MSG_REQUEST
))) {
459 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
460 VLOG_WARN_RL(&rl
, "Invalid DHCP message type : opt code = %d,"
461 " opt value = %d", in_dhcp_opt
[0], in_dhcp_opt
[2]);
466 if (in_dhcp_opt
[2] == DHCP_MSG_DISCOVER
) {
467 msg_type
= DHCP_MSG_OFFER
;
469 msg_type
= DHCP_MSG_ACK
;
472 /* Frame the DHCP reply packet
473 * Total DHCP options length will be options stored in the userdata +
476 * --------------------------------------------------------------
477 *| 4 Bytes (dhcp cookie) | 3 Bytes (option type) | DHCP options |
478 * --------------------------------------------------------------
479 *| 4 Bytes padding | 1 Byte (option end 0xFF ) | 4 Bytes padding|
480 * --------------------------------------------------------------
482 uint16_t new_l4_size
= UDP_HEADER_LEN
+ DHCP_HEADER_LEN
+ \
484 size_t new_packet_size
= pkt_in
->l4_ofs
+ new_l4_size
;
486 struct dp_packet pkt_out
;
487 dp_packet_init(&pkt_out
, new_packet_size
);
488 dp_packet_clear(&pkt_out
);
489 dp_packet_prealloc_tailroom(&pkt_out
, new_packet_size
);
490 pkt_out_ptr
= &pkt_out
;
492 /* Copy the L2 and L3 headers from the pkt_in as they would remain same*/
494 &pkt_out
, dp_packet_pull(pkt_in
, pkt_in
->l4_ofs
), pkt_in
->l4_ofs
);
496 pkt_out
.l2_5_ofs
= pkt_in
->l2_5_ofs
;
497 pkt_out
.l2_pad_size
= pkt_in
->l2_pad_size
;
498 pkt_out
.l3_ofs
= pkt_in
->l3_ofs
;
499 pkt_out
.l4_ofs
= pkt_in
->l4_ofs
;
501 struct udp_header
*udp
= dp_packet_put(
502 &pkt_out
, dp_packet_pull(pkt_in
, UDP_HEADER_LEN
), UDP_HEADER_LEN
);
504 struct dhcp_header
*dhcp_data
= dp_packet_put(
505 &pkt_out
, dp_packet_pull(pkt_in
, DHCP_HEADER_LEN
), DHCP_HEADER_LEN
);
506 dhcp_data
->op
= DHCP_OP_REPLY
;
507 dhcp_data
->yiaddr
= *offer_ip
;
508 dp_packet_put(&pkt_out
, &magic_cookie
, sizeof(ovs_be32
));
510 uint8_t *out_dhcp_opts
= dp_packet_put_zeros(&pkt_out
,
511 userdata
->size
+ 12);
512 /* DHCP option - type */
513 out_dhcp_opts
[0] = DHCP_OPT_MSG_TYPE
;
514 out_dhcp_opts
[1] = 1;
515 out_dhcp_opts
[2] = msg_type
;
518 memcpy(out_dhcp_opts
, userdata
->data
, userdata
->size
);
519 out_dhcp_opts
+= userdata
->size
;
523 out_dhcp_opts
[0] = DHCP_OPT_END
;
525 udp
->udp_len
= htons(new_l4_size
);
527 struct ip_header
*out_ip
= dp_packet_l3(&pkt_out
);
528 out_ip
->ip_tot_len
= htons(pkt_out
.l4_ofs
- pkt_out
.l3_ofs
+ new_l4_size
);
530 /* Checksum needs to be initialized to zero. */
532 out_ip
->ip_csum
= csum(out_ip
, sizeof *out_ip
);
534 pin
->packet
= dp_packet_data(&pkt_out
);
535 pin
->packet_len
= dp_packet_size(&pkt_out
);
537 /* Log the response. */
538 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(20, 40);
539 const struct eth_header
*l2
= dp_packet_eth(&pkt_out
);
540 VLOG_INFO_RL(&rl
, "DHCP%s "ETH_ADDR_FMT
" "IP_FMT
"",
541 msg_type
== DHCP_MSG_OFFER
? "OFFER" : "ACK",
542 ETH_ADDR_ARGS(l2
->eth_src
), IP_ARGS(*offer_ip
));
547 union mf_subvalue sv
;
549 mf_write_subfield(&dst
, &sv
, &pin
->flow_metadata
);
551 queue_msg(ofputil_encode_resume(pin
, continuation
, proto
));
553 dp_packet_uninit(pkt_out_ptr
);
558 compose_out_dhcpv6_opts(struct ofpbuf
*userdata
,
559 struct ofpbuf
*out_dhcpv6_opts
, ovs_be32 iaid
)
561 while (userdata
->size
) {
562 struct dhcp_opt6_header
*userdata_opt
= ofpbuf_try_pull(
563 userdata
, sizeof *userdata_opt
);
568 size_t size
= ntohs(userdata_opt
->size
);
569 uint8_t *userdata_opt_data
= ofpbuf_try_pull(userdata
, size
);
570 if (!userdata_opt_data
) {
574 switch (ntohs(userdata_opt
->opt_code
)) {
575 case DHCPV6_OPT_SERVER_ID_CODE
:
577 /* The Server Identifier option carries a DUID
578 * identifying a server between a client and a server.
579 * See RFC 3315 Sec 9 and Sec 22.3.
581 * We use DUID Based on Link-layer Address [DUID-LL].
584 struct dhcpv6_opt_server_id
*opt_server_id
= ofpbuf_put_zeros(
585 out_dhcpv6_opts
, sizeof *opt_server_id
);
587 opt_server_id
->opt
.code
= htons(DHCPV6_OPT_SERVER_ID_CODE
);
588 opt_server_id
->opt
.len
= htons(size
+ 4);
589 opt_server_id
->duid_type
= htons(DHCPV6_DUID_LL
);
590 opt_server_id
->hw_type
= htons(DHCPV6_HW_TYPE_ETH
);
591 memcpy(&opt_server_id
->mac
, userdata_opt_data
,
592 sizeof(struct eth_addr
));
596 case DHCPV6_OPT_IA_ADDR_CODE
:
598 if (size
!= sizeof(struct in6_addr
)) {
602 /* IA Address option is used to specify IPv6 addresses associated
603 * with an IA_NA or IA_TA. The IA Address option must be
604 * encapsulated in the Options field of an IA_NA or IA_TA option.
606 * We will encapsulate the IA Address within the IA_NA option.
607 * Please see RFC 3315 section 22.5 and 22.6
609 struct dhcpv6_opt_ia_na
*opt_ia_na
= ofpbuf_put_zeros(
610 out_dhcpv6_opts
, sizeof *opt_ia_na
);
611 opt_ia_na
->opt
.code
= htons(DHCPV6_OPT_IA_NA_CODE
);
612 /* IA_NA length (in bytes)-
616 * IA Address - sizeof(struct dhcpv6_opt_ia_addr)
618 opt_ia_na
->opt
.len
= htons(12 + sizeof(struct dhcpv6_opt_ia_addr
));
619 opt_ia_na
->iaid
= iaid
;
620 /* Set the lifetime of the address(es) to infinity */
621 opt_ia_na
->t1
= OVS_BE32_MAX
;
622 opt_ia_na
->t2
= OVS_BE32_MAX
;
624 struct dhcpv6_opt_ia_addr
*opt_ia_addr
= ofpbuf_put_zeros(
625 out_dhcpv6_opts
, sizeof *opt_ia_addr
);
626 opt_ia_addr
->opt
.code
= htons(DHCPV6_OPT_IA_ADDR_CODE
);
627 opt_ia_addr
->opt
.len
= htons(size
+ 8);
628 memcpy(opt_ia_addr
->ipv6
.s6_addr
, userdata_opt_data
, size
);
629 opt_ia_addr
->t1
= OVS_BE32_MAX
;
630 opt_ia_addr
->t2
= OVS_BE32_MAX
;
634 case DHCPV6_OPT_DNS_SERVER_CODE
:
636 struct dhcpv6_opt_header
*opt_dns
= ofpbuf_put_zeros(
637 out_dhcpv6_opts
, sizeof *opt_dns
);
638 opt_dns
->code
= htons(DHCPV6_OPT_DNS_SERVER_CODE
);
639 opt_dns
->len
= htons(size
);
640 ofpbuf_put(out_dhcpv6_opts
, userdata_opt_data
, size
);
644 case DHCPV6_OPT_DOMAIN_SEARCH_CODE
:
646 struct dhcpv6_opt_header
*opt_dsl
= ofpbuf_put_zeros(
647 out_dhcpv6_opts
, sizeof *opt_dsl
);
648 opt_dsl
->code
= htons(DHCPV6_OPT_DOMAIN_SEARCH_CODE
);
649 opt_dsl
->len
= htons(size
+ 2);
650 uint8_t *data
= ofpbuf_put_zeros(out_dhcpv6_opts
, size
+ 2);
652 memcpy(data
+ 1, userdata_opt_data
, size
);
664 pinctrl_handle_put_dhcpv6_opts(
665 struct dp_packet
*pkt_in
, struct ofputil_packet_in
*pin
,
666 struct ofpbuf
*userdata
, struct ofpbuf
*continuation OVS_UNUSED
)
668 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
669 enum ofp_version version
= rconn_get_version(swconn
);
670 enum ofputil_protocol proto
= ofputil_protocol_from_ofp_version(version
);
671 struct dp_packet
*pkt_out_ptr
= NULL
;
672 uint32_t success
= 0;
674 /* Parse result field. */
675 const struct mf_field
*f
;
676 enum ofperr ofperr
= nx_pull_header(userdata
, NULL
, &f
, NULL
);
678 VLOG_WARN_RL(&rl
, "bad result OXM (%s)", ofperr_to_string(ofperr
));
682 /* Parse result offset. */
683 ovs_be32
*ofsp
= ofpbuf_try_pull(userdata
, sizeof *ofsp
);
685 VLOG_WARN_RL(&rl
, "offset not present in the userdata");
689 /* Check that the result is valid and writable. */
690 struct mf_subfield dst
= { .field
= f
, .ofs
= ntohl(*ofsp
), .n_bits
= 1 };
691 ofperr
= mf_check_dst(&dst
, NULL
);
693 VLOG_WARN_RL(&rl
, "bad result bit (%s)", ofperr_to_string(ofperr
));
697 if (!userdata
->size
) {
698 VLOG_WARN_RL(&rl
, "DHCPv6 options not present in the userdata");
702 struct udp_header
*in_udp
= dp_packet_l4(pkt_in
);
703 const uint8_t *in_dhcpv6_data
= dp_packet_get_udp_payload(pkt_in
);
704 if (!in_udp
|| !in_dhcpv6_data
) {
705 VLOG_WARN_RL(&rl
, "truncated dhcpv6 packet");
709 uint8_t out_dhcpv6_msg_type
;
710 switch(*in_dhcpv6_data
) {
711 case DHCPV6_MSG_TYPE_SOLICIT
:
712 out_dhcpv6_msg_type
= DHCPV6_MSG_TYPE_ADVT
;
715 case DHCPV6_MSG_TYPE_REQUEST
:
716 case DHCPV6_MSG_TYPE_CONFIRM
:
717 case DHCPV6_MSG_TYPE_DECLINE
:
718 out_dhcpv6_msg_type
= DHCPV6_MSG_TYPE_REPLY
;
722 /* Invalid or unsupported DHCPv6 message type */
726 /* Skip 4 bytes (message type (1 byte) + transaction ID (3 bytes). */
728 /* We need to extract IAID from the IA-NA option of the client's DHCPv6
729 * solicit/request/confirm packet and copy the same IAID in the Server's
732 struct dhcpv6_opt_header
const *in_opt_client_id
= NULL
;
733 size_t udp_len
= ntohs(in_udp
->udp_len
);
734 size_t l4_len
= dp_packet_l4_size(pkt_in
);
735 uint8_t *end
= (uint8_t *)in_udp
+ MIN(udp_len
, l4_len
);
736 while (in_dhcpv6_data
< end
) {
737 struct dhcpv6_opt_header
const *in_opt
=
738 (struct dhcpv6_opt_header
*)in_dhcpv6_data
;
739 switch(ntohs(in_opt
->code
)) {
740 case DHCPV6_OPT_IA_NA_CODE
:
742 struct dhcpv6_opt_ia_na
*opt_ia_na
= (
743 struct dhcpv6_opt_ia_na
*)in_opt
;
744 iaid
= opt_ia_na
->iaid
;
748 case DHCPV6_OPT_CLIENT_ID_CODE
:
749 in_opt_client_id
= in_opt
;
755 in_dhcpv6_data
+= sizeof *in_opt
+ ntohs(in_opt
->len
);
758 if (!in_opt_client_id
) {
759 VLOG_WARN_RL(&rl
, "DHCPv6 option - Client id not present in the "
765 VLOG_WARN_RL(&rl
, "DHCPv6 option - IA NA not present in the "
770 uint64_t out_ofpacts_dhcpv6_opts_stub
[256 / 8];
771 struct ofpbuf out_dhcpv6_opts
=
772 OFPBUF_STUB_INITIALIZER(out_ofpacts_dhcpv6_opts_stub
);
774 if (!compose_out_dhcpv6_opts(userdata
, &out_dhcpv6_opts
, iaid
)) {
775 VLOG_WARN_RL(&rl
, "Invalid userdata");
780 = (UDP_HEADER_LEN
+ 4 + sizeof *in_opt_client_id
+
781 ntohs(in_opt_client_id
->len
) + out_dhcpv6_opts
.size
);
782 size_t new_packet_size
= pkt_in
->l4_ofs
+ new_l4_size
;
784 struct dp_packet pkt_out
;
785 dp_packet_init(&pkt_out
, new_packet_size
);
786 dp_packet_clear(&pkt_out
);
787 dp_packet_prealloc_tailroom(&pkt_out
, new_packet_size
);
788 pkt_out_ptr
= &pkt_out
;
790 /* Copy L2 and L3 headers from pkt_in. */
791 dp_packet_put(&pkt_out
, dp_packet_pull(pkt_in
, pkt_in
->l4_ofs
),
794 pkt_out
.l2_5_ofs
= pkt_in
->l2_5_ofs
;
795 pkt_out
.l2_pad_size
= pkt_in
->l2_pad_size
;
796 pkt_out
.l3_ofs
= pkt_in
->l3_ofs
;
797 pkt_out
.l4_ofs
= pkt_in
->l4_ofs
;
799 /* Pull the DHCPv6 message type and transaction id from the pkt_in.
800 * Need to preserve the transaction id in the DHCPv6 reply packet. */
801 struct udp_header
*out_udp
= dp_packet_put(
802 &pkt_out
, dp_packet_pull(pkt_in
, UDP_HEADER_LEN
), UDP_HEADER_LEN
);
803 uint8_t *out_dhcpv6
= dp_packet_put(&pkt_out
, dp_packet_pull(pkt_in
, 4), 4);
805 /* Set the proper DHCPv6 message type. */
806 *out_dhcpv6
= out_dhcpv6_msg_type
;
808 /* Copy the Client Identifier. */
809 dp_packet_put(&pkt_out
, in_opt_client_id
,
810 sizeof *in_opt_client_id
+ ntohs(in_opt_client_id
->len
));
812 /* Copy the DHCPv6 Options. */
813 dp_packet_put(&pkt_out
, out_dhcpv6_opts
.data
, out_dhcpv6_opts
.size
);
814 out_udp
->udp_len
= htons(new_l4_size
);
815 out_udp
->udp_csum
= 0;
817 struct ovs_16aligned_ip6_hdr
*out_ip6
= dp_packet_l3(&pkt_out
);
818 out_ip6
->ip6_ctlun
.ip6_un1
.ip6_un1_plen
= out_udp
->udp_len
;
821 csum
= packet_csum_pseudoheader6(dp_packet_l3(&pkt_out
));
822 csum
= csum_continue(csum
, out_udp
, dp_packet_size(&pkt_out
) -
823 ((const unsigned char *)out_udp
-
824 (const unsigned char *)dp_packet_eth(&pkt_out
)));
825 out_udp
->udp_csum
= csum_finish(csum
);
826 if (!out_udp
->udp_csum
) {
827 out_udp
->udp_csum
= htons(0xffff);
830 pin
->packet
= dp_packet_data(&pkt_out
);
831 pin
->packet_len
= dp_packet_size(&pkt_out
);
832 ofpbuf_uninit(&out_dhcpv6_opts
);
836 union mf_subvalue sv
;
838 mf_write_subfield(&dst
, &sv
, &pin
->flow_metadata
);
840 queue_msg(ofputil_encode_resume(pin
, continuation
, proto
));
841 dp_packet_uninit(pkt_out_ptr
);
845 put_be16(struct ofpbuf
*buf
, ovs_be16 x
)
847 ofpbuf_put(buf
, &x
, sizeof x
);
851 put_be32(struct ofpbuf
*buf
, ovs_be32 x
)
853 ofpbuf_put(buf
, &x
, sizeof x
);
857 pinctrl_handle_dns_lookup(
858 struct dp_packet
*pkt_in
, struct ofputil_packet_in
*pin
,
859 struct ofpbuf
*userdata
, struct ofpbuf
*continuation
,
860 struct controller_ctx
*ctx
)
862 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
863 enum ofp_version version
= rconn_get_version(swconn
);
864 enum ofputil_protocol proto
= ofputil_protocol_from_ofp_version(version
);
865 struct dp_packet
*pkt_out_ptr
= NULL
;
866 uint32_t success
= 0;
868 /* Parse result field. */
869 const struct mf_field
*f
;
870 enum ofperr ofperr
= nx_pull_header(userdata
, NULL
, &f
, NULL
);
872 VLOG_WARN_RL(&rl
, "bad result OXM (%s)", ofperr_to_string(ofperr
));
876 /* Parse result offset. */
877 ovs_be32
*ofsp
= ofpbuf_try_pull(userdata
, sizeof *ofsp
);
879 VLOG_WARN_RL(&rl
, "offset not present in the userdata");
883 /* Check that the result is valid and writable. */
884 struct mf_subfield dst
= { .field
= f
, .ofs
= ntohl(*ofsp
), .n_bits
= 1 };
885 ofperr
= mf_check_dst(&dst
, NULL
);
887 VLOG_WARN_RL(&rl
, "bad result bit (%s)", ofperr_to_string(ofperr
));
891 /* Extract the DNS header */
892 struct dns_header
const *in_dns_header
= dp_packet_get_udp_payload(pkt_in
);
893 if (!in_dns_header
) {
894 VLOG_WARN_RL(&rl
, "truncated dns packet");
898 /* Check if it is DNS request or not */
899 if (in_dns_header
->lo_flag
& 0x80) {
900 /* It's a DNS response packet which we are not interested in */
904 /* Check if at least one query request is present */
905 if (!in_dns_header
->qdcount
) {
909 struct udp_header
*in_udp
= dp_packet_l4(pkt_in
);
910 size_t udp_len
= ntohs(in_udp
->udp_len
);
911 size_t l4_len
= dp_packet_l4_size(pkt_in
);
912 uint8_t *end
= (uint8_t *)in_udp
+ MIN(udp_len
, l4_len
);
913 uint8_t *in_dns_data
= (uint8_t *)(in_dns_header
+ 1);
914 uint8_t *in_queryname
= in_dns_data
;
916 struct ds query_name
;
917 ds_init(&query_name
);
918 /* Extract the query_name. If the query name is - 'www.ovn.org' it would be
919 * encoded as (in hex) - 03 77 77 77 03 6f 76 63 03 6f 72 67 00.
921 while ((in_dns_data
+ idx
) < end
&& in_dns_data
[idx
]) {
922 uint8_t label_len
= in_dns_data
[idx
++];
923 if (in_dns_data
+ idx
+ label_len
> end
) {
924 ds_destroy(&query_name
);
927 ds_put_buffer(&query_name
, (const char *) in_dns_data
+ idx
, label_len
);
929 ds_put_char(&query_name
, '.');
933 ds_chomp(&query_name
, '.');
936 /* Query should have TYPE and CLASS fields */
937 if (in_dns_data
+ (2 * sizeof(ovs_be16
)) > end
) {
938 ds_destroy(&query_name
);
942 uint16_t query_type
= ntohs(*ALIGNED_CAST(const ovs_be16
*, in_dns_data
));
943 /* Supported query types - A, AAAA and ANY */
944 if (!(query_type
== DNS_QUERY_TYPE_A
|| query_type
== DNS_QUERY_TYPE_AAAA
945 || query_type
== DNS_QUERY_TYPE_ANY
)) {
946 ds_destroy(&query_name
);
950 uint64_t dp_key
= ntohll(pin
->flow_metadata
.flow
.metadata
);
951 const struct sbrec_dns
*sbrec_dns
;
952 const char *answer_ips
= NULL
;
953 SBREC_DNS_FOR_EACH(sbrec_dns
, ctx
->ovnsb_idl
) {
954 for (size_t i
= 0; i
< sbrec_dns
->n_datapaths
; i
++) {
955 if (sbrec_dns
->datapaths
[i
]->tunnel_key
== dp_key
) {
956 answer_ips
= smap_get(&sbrec_dns
->records
,
957 ds_cstr(&query_name
));
969 ds_destroy(&query_name
);
974 struct lport_addresses ip_addrs
;
975 if (!extract_ip_addresses(answer_ips
, &ip_addrs
)) {
979 uint16_t ancount
= 0;
980 uint64_t dns_ans_stub
[128 / 8];
981 struct ofpbuf dns_answer
= OFPBUF_STUB_INITIALIZER(dns_ans_stub
);
983 if (query_type
== DNS_QUERY_TYPE_A
|| query_type
== DNS_QUERY_TYPE_ANY
) {
984 for (size_t i
= 0; i
< ip_addrs
.n_ipv4_addrs
; i
++) {
985 /* Copy the answer section */
986 /* Format of the answer section is
987 * - NAME -> The domain name
988 * - TYPE -> 2 octets containing one of the RR type codes
989 * - CLASS -> 2 octets which specify the class of the data
990 * in the RDATA field.
991 * - TTL -> 32 bit unsigned int specifying the time
992 * interval (in secs) that the resource record
993 * may be cached before it should be discarded.
994 * - RDLENGTH -> 16 bit integer specifying the length of the
996 * - RDATA -> a variable length string of octets that
997 * describes the resource. In our case it will
998 * be IP address of the domain name.
1000 ofpbuf_put(&dns_answer
, in_queryname
, idx
);
1001 put_be16(&dns_answer
, htons(DNS_QUERY_TYPE_A
));
1002 put_be16(&dns_answer
, htons(DNS_CLASS_IN
));
1003 put_be32(&dns_answer
, htonl(DNS_DEFAULT_RR_TTL
));
1004 put_be16(&dns_answer
, htons(sizeof(ovs_be32
)));
1005 put_be32(&dns_answer
, ip_addrs
.ipv4_addrs
[i
].addr
);
1010 if (query_type
== DNS_QUERY_TYPE_AAAA
||
1011 query_type
== DNS_QUERY_TYPE_ANY
) {
1012 for (size_t i
= 0; i
< ip_addrs
.n_ipv6_addrs
; i
++) {
1013 ofpbuf_put(&dns_answer
, in_queryname
, idx
);
1014 put_be16(&dns_answer
, htons(DNS_QUERY_TYPE_AAAA
));
1015 put_be16(&dns_answer
, htons(DNS_CLASS_IN
));
1016 put_be32(&dns_answer
, htonl(DNS_DEFAULT_RR_TTL
));
1017 const struct in6_addr
*ip6
= &ip_addrs
.ipv6_addrs
[i
].addr
;
1018 put_be16(&dns_answer
, htons(sizeof *ip6
));
1019 ofpbuf_put(&dns_answer
, ip6
, sizeof *ip6
);
1024 destroy_lport_addresses(&ip_addrs
);
1027 ofpbuf_uninit(&dns_answer
);
1031 uint16_t new_l4_size
= ntohs(in_udp
->udp_len
) + dns_answer
.size
;
1032 size_t new_packet_size
= pkt_in
->l4_ofs
+ new_l4_size
;
1033 struct dp_packet pkt_out
;
1034 dp_packet_init(&pkt_out
, new_packet_size
);
1035 dp_packet_clear(&pkt_out
);
1036 dp_packet_prealloc_tailroom(&pkt_out
, new_packet_size
);
1037 pkt_out_ptr
= &pkt_out
;
1039 /* Copy the L2 and L3 headers from the pkt_in as they would remain same.*/
1041 &pkt_out
, dp_packet_pull(pkt_in
, pkt_in
->l4_ofs
), pkt_in
->l4_ofs
);
1043 pkt_out
.l2_5_ofs
= pkt_in
->l2_5_ofs
;
1044 pkt_out
.l2_pad_size
= pkt_in
->l2_pad_size
;
1045 pkt_out
.l3_ofs
= pkt_in
->l3_ofs
;
1046 pkt_out
.l4_ofs
= pkt_in
->l4_ofs
;
1048 struct udp_header
*out_udp
= dp_packet_put(
1049 &pkt_out
, dp_packet_pull(pkt_in
, UDP_HEADER_LEN
), UDP_HEADER_LEN
);
1051 /* Copy the DNS header. */
1052 struct dns_header
*out_dns_header
= dp_packet_put(
1053 &pkt_out
, dp_packet_pull(pkt_in
, sizeof *out_dns_header
),
1054 sizeof *out_dns_header
);
1056 /* Set the response bit to 1 in the flags. */
1057 out_dns_header
->lo_flag
|= 0x80;
1059 /* Set the answer RR. */
1060 out_dns_header
->ancount
= htons(ancount
);
1062 /* Copy the Query section. */
1063 dp_packet_put(&pkt_out
, dp_packet_data(pkt_in
), dp_packet_size(pkt_in
));
1065 /* Copy the answer sections. */
1066 dp_packet_put(&pkt_out
, dns_answer
.data
, dns_answer
.size
);
1067 ofpbuf_uninit(&dns_answer
);
1069 out_udp
->udp_len
= htons(new_l4_size
);
1070 out_udp
->udp_csum
= 0;
1072 struct eth_header
*eth
= dp_packet_data(&pkt_out
);
1073 if (eth
->eth_type
== htons(ETH_TYPE_IP
)) {
1074 struct ip_header
*out_ip
= dp_packet_l3(&pkt_out
);
1075 out_ip
->ip_tot_len
= htons(pkt_out
.l4_ofs
- pkt_out
.l3_ofs
1077 /* Checksum needs to be initialized to zero. */
1078 out_ip
->ip_csum
= 0;
1079 out_ip
->ip_csum
= csum(out_ip
, sizeof *out_ip
);
1081 struct ovs_16aligned_ip6_hdr
*nh
= dp_packet_l3(&pkt_out
);
1082 nh
->ip6_plen
= htons(new_l4_size
);
1084 /* IPv6 needs UDP checksum calculated */
1086 csum
= packet_csum_pseudoheader6(nh
);
1087 csum
= csum_continue(csum
, out_udp
, dp_packet_size(&pkt_out
) -
1088 ((const unsigned char *)out_udp
-
1089 (const unsigned char *)eth
));
1090 out_udp
->udp_csum
= csum_finish(csum
);
1091 if (!out_udp
->udp_csum
) {
1092 out_udp
->udp_csum
= htons(0xffff);
1096 pin
->packet
= dp_packet_data(&pkt_out
);
1097 pin
->packet_len
= dp_packet_size(&pkt_out
);
1102 union mf_subvalue sv
;
1103 sv
.u8_val
= success
;
1104 mf_write_subfield(&dst
, &sv
, &pin
->flow_metadata
);
1106 queue_msg(ofputil_encode_resume(pin
, continuation
, proto
));
1107 dp_packet_uninit(pkt_out_ptr
);
1111 process_packet_in(const struct ofp_header
*msg
, struct controller_ctx
*ctx
)
1113 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
1115 struct ofputil_packet_in pin
;
1116 struct ofpbuf continuation
;
1117 enum ofperr error
= ofputil_decode_packet_in(msg
, true, NULL
, NULL
, &pin
,
1118 NULL
, NULL
, &continuation
);
1121 VLOG_WARN_RL(&rl
, "error decoding packet-in: %s",
1122 ofperr_to_string(error
));
1125 if (pin
.reason
!= OFPR_ACTION
) {
1129 struct ofpbuf userdata
= ofpbuf_const_initializer(pin
.userdata
,
1131 const struct action_header
*ah
= ofpbuf_pull(&userdata
, sizeof *ah
);
1133 VLOG_WARN_RL(&rl
, "packet-in userdata lacks action header");
1137 struct dp_packet packet
;
1138 dp_packet_use_const(&packet
, pin
.packet
, pin
.packet_len
);
1139 struct flow headers
;
1140 flow_extract(&packet
, &headers
);
1142 switch (ntohl(ah
->opcode
)) {
1143 case ACTION_OPCODE_ARP
:
1144 pinctrl_handle_arp(&headers
, &pin
.flow_metadata
, &userdata
);
1147 case ACTION_OPCODE_PUT_ARP
:
1148 pinctrl_handle_put_mac_binding(&pin
.flow_metadata
.flow
, &headers
,
1152 case ACTION_OPCODE_PUT_DHCP_OPTS
:
1153 pinctrl_handle_put_dhcp_opts(&packet
, &pin
, &userdata
, &continuation
);
1156 case ACTION_OPCODE_ND_NA
:
1157 pinctrl_handle_nd_na(&headers
, &pin
.flow_metadata
, &userdata
);
1160 case ACTION_OPCODE_PUT_ND
:
1161 pinctrl_handle_put_mac_binding(&pin
.flow_metadata
.flow
, &headers
,
1165 case ACTION_OPCODE_PUT_DHCPV6_OPTS
:
1166 pinctrl_handle_put_dhcpv6_opts(&packet
, &pin
, &userdata
,
1170 case ACTION_OPCODE_DNS_LOOKUP
:
1171 pinctrl_handle_dns_lookup(&packet
, &pin
, &userdata
, &continuation
, ctx
);
1174 case ACTION_OPCODE_LOG
:
1175 handle_acl_log(&headers
, &userdata
);
1178 case ACTION_OPCODE_PUT_ND_RA_OPTS
:
1179 pinctrl_handle_put_nd_ra_opts(&headers
, &packet
, &pin
, &userdata
,
1183 case ACTION_OPCODE_ND_NS
:
1184 pinctrl_handle_nd_ns(&headers
, &pin
.flow_metadata
, &userdata
);
1187 case ACTION_OPCODE_ICMP
:
1188 pinctrl_handle_icmp(&headers
, &packet
, &pin
.flow_metadata
,
1192 case ACTION_OPCODE_TCP_RESET
:
1193 pinctrl_handle_tcp_reset(&headers
, &packet
, &pin
.flow_metadata
,
1198 VLOG_WARN_RL(&rl
, "unrecognized packet-in opcode %"PRIu32
,
1205 pinctrl_recv(const struct ofp_header
*oh
, enum ofptype type
,
1206 struct controller_ctx
*ctx
)
1208 if (type
== OFPTYPE_ECHO_REQUEST
) {
1209 queue_msg(ofputil_encode_echo_reply(oh
));
1210 } else if (type
== OFPTYPE_GET_CONFIG_REPLY
) {
1211 /* Enable asynchronous messages */
1212 struct ofputil_switch_config config
;
1214 ofputil_decode_get_config_reply(oh
, &config
);
1215 config
.miss_send_len
= UINT16_MAX
;
1216 set_switch_config(swconn
, &config
);
1217 } else if (type
== OFPTYPE_PACKET_IN
) {
1218 process_packet_in(oh
, ctx
);
1220 if (VLOG_IS_DBG_ENABLED()) {
1221 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(30, 300);
1223 char *s
= ofp_to_string(oh
, ntohs(oh
->length
), NULL
, NULL
, 2);
1225 VLOG_DBG_RL(&rl
, "OpenFlow packet ignored: %s", s
);
1232 pinctrl_run(struct controller_ctx
*ctx
,
1233 const struct ovsrec_bridge
*br_int
,
1234 const struct sbrec_chassis
*chassis
,
1235 const struct chassis_index
*chassis_index
,
1236 struct hmap
*local_datapaths
,
1237 struct sset
*active_tunnels
)
1239 char *target
= xasprintf("unix:%s/%s.mgmt", ovs_rundir(), br_int
->name
);
1240 if (strcmp(target
, rconn_get_target(swconn
))) {
1241 VLOG_INFO("%s: connecting to switch", target
);
1242 rconn_connect(swconn
, target
, target
);
1248 if (!rconn_is_connected(swconn
)) {
1252 if (conn_seq_no
!= rconn_get_connection_seqno(swconn
)) {
1254 conn_seq_no
= rconn_get_connection_seqno(swconn
);
1255 flush_put_mac_bindings();
1258 /* Process a limited number of messages per call. */
1259 for (int i
= 0; i
< 50; i
++) {
1260 struct ofpbuf
*msg
= rconn_recv(swconn
);
1265 const struct ofp_header
*oh
= msg
->data
;
1268 ofptype_decode(&type
, oh
);
1269 pinctrl_recv(oh
, type
, ctx
);
1273 run_put_mac_bindings(ctx
);
1274 send_garp_run(ctx
, br_int
, chassis
, chassis_index
, local_datapaths
,
1276 send_ipv6_ras(ctx
, local_datapaths
);
1279 /* Table of ipv6_ra_state structures, keyed on logical port name */
1280 static struct shash ipv6_ras
;
1282 /* Next IPV6 RA in seconds. */
1283 static long long int send_ipv6_ra_time
;
1285 struct ipv6_ra_config
{
1286 time_t min_interval
;
1287 time_t max_interval
;
1288 struct eth_addr eth_src
;
1289 struct eth_addr eth_dst
;
1290 struct in6_addr ipv6_src
;
1291 struct in6_addr ipv6_dst
;
1293 uint8_t mo_flags
; /* Managed/Other flags for RAs */
1294 uint8_t la_flags
; /* On-link/autonomous flags for address prefixes */
1295 struct lport_addresses prefixes
;
1298 struct ipv6_ra_state
{
1299 long long int next_announce
;
1300 struct ipv6_ra_config
*config
;
1309 shash_init(&ipv6_ras
);
1310 send_ipv6_ra_time
= LLONG_MAX
;
1314 ipv6_ra_config_delete(struct ipv6_ra_config
*config
)
1317 destroy_lport_addresses(&config
->prefixes
);
1323 ipv6_ra_delete(struct ipv6_ra_state
*ra
)
1326 ipv6_ra_config_delete(ra
->config
);
1332 destroy_ipv6_ras(void)
1334 struct shash_node
*iter
, *next
;
1335 SHASH_FOR_EACH_SAFE (iter
, next
, &ipv6_ras
) {
1336 struct ipv6_ra_state
*ra
= iter
->data
;
1338 shash_delete(&ipv6_ras
, iter
);
1340 shash_destroy(&ipv6_ras
);
1343 static struct ipv6_ra_config
*
1344 ipv6_ra_update_config(const struct sbrec_port_binding
*pb
)
1346 struct ipv6_ra_config
*config
;
1348 config
= xzalloc(sizeof *config
);
1350 config
->max_interval
= smap_get_int(&pb
->options
, "ipv6_ra_max_interval",
1351 ND_RA_MAX_INTERVAL_DEFAULT
);
1352 config
->min_interval
= smap_get_int(&pb
->options
, "ipv6_ra_min_interval",
1353 nd_ra_min_interval_default(config
->max_interval
));
1354 config
->mtu
= smap_get_int(&pb
->options
, "ipv6_ra_mtu", ND_MTU_DEFAULT
);
1355 config
->la_flags
= ND_PREFIX_ON_LINK
;
1357 const char *address_mode
= smap_get(&pb
->options
, "ipv6_ra_address_mode");
1358 if (!address_mode
) {
1359 VLOG_WARN("No address mode specified");
1362 if (!strcmp(address_mode
, "dhcpv6_stateless")) {
1363 config
->mo_flags
= IPV6_ND_RA_FLAG_OTHER_ADDR_CONFIG
;
1364 } else if (!strcmp(address_mode
, "dhcpv6_stateful")) {
1365 config
->mo_flags
= IPV6_ND_RA_FLAG_MANAGED_ADDR_CONFIG
;
1366 } else if (!strcmp(address_mode
, "slaac")) {
1367 config
->la_flags
|= ND_PREFIX_AUTONOMOUS_ADDRESS
;
1369 VLOG_WARN("Invalid address mode %s", address_mode
);
1373 const char *prefixes
= smap_get(&pb
->options
, "ipv6_ra_prefixes");
1374 if (prefixes
&& !extract_ip_addresses(prefixes
, &config
->prefixes
)) {
1375 VLOG_WARN("Invalid IPv6 prefixes: %s", prefixes
);
1379 /* All nodes multicast addresses */
1380 config
->eth_dst
= (struct eth_addr
) ETH_ADDR_C(33,33,00,00,00,01);
1381 ipv6_parse("ff02::1", &config
->ipv6_dst
);
1383 const char *eth_addr
= smap_get(&pb
->options
, "ipv6_ra_src_eth");
1384 if (!eth_addr
|| !eth_addr_from_string(eth_addr
, &config
->eth_src
)) {
1385 VLOG_WARN("Invalid ethernet source %s", eth_addr
);
1388 const char *ip_addr
= smap_get(&pb
->options
, "ipv6_ra_src_addr");
1389 if (!ip_addr
|| !ipv6_parse(ip_addr
, &config
->ipv6_src
)) {
1390 VLOG_WARN("Invalid IP source %s", ip_addr
);
1397 ipv6_ra_config_delete(config
);
1401 static long long int
1402 ipv6_ra_calc_next_announce(time_t min_interval
, time_t max_interval
)
1404 long long int min_interval_ms
= min_interval
* 1000LL;
1405 long long int max_interval_ms
= max_interval
* 1000LL;
1407 return time_msec() + min_interval_ms
+
1408 random_range(max_interval_ms
- min_interval_ms
);
1412 put_load(uint64_t value
, enum mf_field_id dst
, int ofs
, int n_bits
,
1413 struct ofpbuf
*ofpacts
)
1415 struct ofpact_set_field
*sf
= ofpact_put_set_field(ofpacts
,
1416 mf_from_id(dst
), NULL
,
1418 ovs_be64 n_value
= htonll(value
);
1419 bitwise_copy(&n_value
, 8, 0, sf
->value
, sf
->field
->n_bytes
, ofs
, n_bits
);
1420 bitwise_one(ofpact_set_field_mask(sf
), sf
->field
->n_bytes
, ofs
, n_bits
);
1423 static long long int
1424 ipv6_ra_send(struct ipv6_ra_state
*ra
)
1426 if (time_msec() < ra
->next_announce
) {
1427 return ra
->next_announce
;
1430 uint64_t packet_stub
[128 / 8];
1431 struct dp_packet packet
;
1432 dp_packet_use_stub(&packet
, packet_stub
, sizeof packet_stub
);
1433 compose_nd_ra(&packet
, ra
->config
->eth_src
, ra
->config
->eth_dst
,
1434 &ra
->config
->ipv6_src
, &ra
->config
->ipv6_dst
,
1435 255, ra
->config
->mo_flags
, 0, 0, 0, ra
->config
->mtu
);
1437 for (int i
= 0; i
< ra
->config
->prefixes
.n_ipv6_addrs
; i
++) {
1439 memcpy(&addr
, &ra
->config
->prefixes
.ipv6_addrs
[i
].addr
, sizeof addr
);
1440 packet_put_ra_prefix_opt(&packet
,
1441 ra
->config
->prefixes
.ipv6_addrs
[i
].plen
,
1442 ra
->config
->la_flags
, htonl(IPV6_ND_RA_OPT_PREFIX_VALID_LIFETIME
),
1443 htonl(IPV6_ND_RA_OPT_PREFIX_PREFERRED_LIFETIME
), addr
);
1446 uint64_t ofpacts_stub
[4096 / 8];
1447 struct ofpbuf ofpacts
= OFPBUF_STUB_INITIALIZER(ofpacts_stub
);
1449 /* Set MFF_LOG_DATAPATH and MFF_LOG_INPORT. */
1450 uint32_t dp_key
= ra
->metadata
;
1451 uint32_t port_key
= ra
->port_key
;
1452 put_load(dp_key
, MFF_LOG_DATAPATH
, 0, 64, &ofpacts
);
1453 put_load(port_key
, MFF_LOG_INPORT
, 0, 32, &ofpacts
);
1454 put_load(1, MFF_LOG_FLAGS
, MLF_LOCAL_ONLY_BIT
, 1, &ofpacts
);
1455 struct ofpact_resubmit
*resubmit
= ofpact_put_RESUBMIT(&ofpacts
);
1456 resubmit
->in_port
= OFPP_CONTROLLER
;
1457 resubmit
->table_id
= OFTABLE_LOG_INGRESS_PIPELINE
;
1459 struct ofputil_packet_out po
= {
1460 .packet
= dp_packet_data(&packet
),
1461 .packet_len
= dp_packet_size(&packet
),
1462 .buffer_id
= UINT32_MAX
,
1463 .ofpacts
= ofpacts
.data
,
1464 .ofpacts_len
= ofpacts
.size
,
1467 match_set_in_port(&po
.flow_metadata
, OFPP_CONTROLLER
);
1468 enum ofp_version version
= rconn_get_version(swconn
);
1469 enum ofputil_protocol proto
= ofputil_protocol_from_ofp_version(version
);
1470 queue_msg(ofputil_encode_packet_out(&po
, proto
));
1471 dp_packet_uninit(&packet
);
1472 ofpbuf_uninit(&ofpacts
);
1474 ra
->next_announce
= ipv6_ra_calc_next_announce(ra
->config
->min_interval
,
1475 ra
->config
->max_interval
);
1477 return ra
->next_announce
;
1483 poll_timer_wait_until(send_ipv6_ra_time
);
1487 send_ipv6_ras(const struct controller_ctx
*ctx
, struct hmap
*local_datapaths
)
1489 struct shash_node
*iter
, *iter_next
;
1491 send_ipv6_ra_time
= LLONG_MAX
;
1493 SHASH_FOR_EACH (iter
, &ipv6_ras
) {
1494 struct ipv6_ra_state
*ra
= iter
->data
;
1495 ra
->delete_me
= true;
1498 const struct local_datapath
*ld
;
1499 HMAP_FOR_EACH (ld
, hmap_node
, local_datapaths
) {
1500 struct sbrec_port_binding
*lpval
;
1501 const struct sbrec_port_binding
*pb
;
1502 struct ovsdb_idl_index_cursor cursor
;
1504 lpval
= sbrec_port_binding_index_init_row(ctx
->ovnsb_idl
,
1505 &sbrec_table_port_binding
);
1506 sbrec_port_binding_index_set_datapath(lpval
, ld
->datapath
);
1507 ovsdb_idl_initialize_cursor(ctx
->ovnsb_idl
, &sbrec_table_port_binding
,
1508 "lport-by-datapath", &cursor
);
1509 SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb
, &cursor
, lpval
) {
1510 if (!smap_get_bool(&pb
->options
, "ipv6_ra_send_periodic", false)) {
1514 const char *peer_s
= smap_get(&pb
->options
, "peer");
1519 const struct sbrec_port_binding
*peer
1520 = lport_lookup_by_name(ctx
->ovnsb_idl
, peer_s
);
1525 struct ipv6_ra_config
*config
= ipv6_ra_update_config(pb
);
1530 struct ipv6_ra_state
*ra
1531 = shash_find_data(&ipv6_ras
, pb
->logical_port
);
1533 ra
= xzalloc(sizeof *ra
);
1534 ra
->config
= config
;
1535 ra
->next_announce
= ipv6_ra_calc_next_announce(
1536 ra
->config
->min_interval
,
1537 ra
->config
->max_interval
);
1538 shash_add(&ipv6_ras
, pb
->logical_port
, ra
);
1540 ipv6_ra_config_delete(ra
->config
);
1541 ra
->config
= config
;
1544 /* Peer is the logical switch port that the logical
1545 * router port is connected to. The RA is injected
1546 * into that logical switch port.
1548 ra
->port_key
= peer
->tunnel_key
;
1549 ra
->metadata
= peer
->datapath
->tunnel_key
;
1550 ra
->delete_me
= false;
1552 long long int next_ra
= ipv6_ra_send(ra
);
1553 if (send_ipv6_ra_time
> next_ra
) {
1554 send_ipv6_ra_time
= next_ra
;
1557 sbrec_port_binding_index_destroy_row(lpval
);
1560 /* Remove those that are no longer in the SB database */
1561 SHASH_FOR_EACH_SAFE (iter
, iter_next
, &ipv6_ras
) {
1562 struct ipv6_ra_state
*ra
= iter
->data
;
1563 if (ra
->delete_me
) {
1564 shash_delete(&ipv6_ras
, iter
);
1571 pinctrl_wait(struct controller_ctx
*ctx
)
1573 wait_put_mac_bindings(ctx
);
1574 rconn_run_wait(swconn
);
1575 rconn_recv_wait(swconn
);
1581 pinctrl_destroy(void)
1583 rconn_destroy(swconn
);
1584 destroy_put_mac_bindings();
1585 destroy_send_garps();
1589 /* Implementation of the "put_arp" and "put_nd" OVN actions. These
1590 * actions send a packet to ovn-controller, using the flow as an API
1591 * (see actions.h for details). This code implements the actions by
1592 * updating the MAC_Binding table in the southbound database.
1594 * This code could be a lot simpler if the database could always be updated,
1595 * but in fact we can only update it when ctx->ovnsb_idl_txn is nonnull. Thus,
1596 * we buffer up a few put_mac_bindings (but we don't keep them longer
1597 * than 1 second) and apply them whenever a database transaction is
1600 /* Buffered "put_mac_binding" operation. */
1601 struct put_mac_binding
{
1602 struct hmap_node hmap_node
; /* In 'put_mac_bindings'. */
1604 long long int timestamp
; /* In milliseconds. */
1609 char ip_s
[INET6_ADDRSTRLEN
+ 1];
1612 struct eth_addr mac
;
1615 /* Contains "struct put_mac_binding"s. */
1616 static struct hmap put_mac_bindings
;
1619 init_put_mac_bindings(void)
1621 hmap_init(&put_mac_bindings
);
1625 destroy_put_mac_bindings(void)
1627 flush_put_mac_bindings();
1628 hmap_destroy(&put_mac_bindings
);
1631 static struct put_mac_binding
*
1632 pinctrl_find_put_mac_binding(uint32_t dp_key
, uint32_t port_key
,
1633 const char *ip_s
, uint32_t hash
)
1635 struct put_mac_binding
*pa
;
1636 HMAP_FOR_EACH_WITH_HASH (pa
, hmap_node
, hash
, &put_mac_bindings
) {
1637 if (pa
->dp_key
== dp_key
1638 && pa
->port_key
== port_key
1639 && !strcmp(pa
->ip_s
, ip_s
)) {
1647 pinctrl_handle_put_mac_binding(const struct flow
*md
,
1648 const struct flow
*headers
, bool is_arp
)
1650 uint32_t dp_key
= ntohll(md
->metadata
);
1651 uint32_t port_key
= md
->regs
[MFF_LOG_INPORT
- MFF_REG0
];
1652 char ip_s
[INET6_ADDRSTRLEN
];
1655 ovs_be32 ip
= htonl(md
->regs
[0]);
1656 inet_ntop(AF_INET
, &ip
, ip_s
, sizeof(ip_s
));
1658 ovs_be128 ip6
= hton128(flow_get_xxreg(md
, 0));
1659 inet_ntop(AF_INET6
, &ip6
, ip_s
, sizeof(ip_s
));
1661 uint32_t hash
= hash_string(ip_s
, hash_2words(dp_key
, port_key
));
1662 struct put_mac_binding
*pmb
1663 = pinctrl_find_put_mac_binding(dp_key
, port_key
, ip_s
, hash
);
1665 if (hmap_count(&put_mac_bindings
) >= 1000) {
1666 COVERAGE_INC(pinctrl_drop_put_mac_binding
);
1670 pmb
= xmalloc(sizeof *pmb
);
1671 hmap_insert(&put_mac_bindings
, &pmb
->hmap_node
, hash
);
1672 pmb
->dp_key
= dp_key
;
1673 pmb
->port_key
= port_key
;
1674 ovs_strlcpy_arrays(pmb
->ip_s
, ip_s
);
1676 pmb
->timestamp
= time_msec();
1677 pmb
->mac
= headers
->dl_src
;
1681 run_put_mac_binding(struct controller_ctx
*ctx
,
1682 const struct put_mac_binding
*pmb
)
1684 if (time_msec() > pmb
->timestamp
+ 1000) {
1688 /* Convert logical datapath and logical port key into lport. */
1689 const struct sbrec_port_binding
*pb
1690 = lport_lookup_by_key(ctx
->ovnsb_idl
, pmb
->dp_key
, pmb
->port_key
);
1692 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
1694 VLOG_WARN_RL(&rl
, "unknown logical port with datapath %"PRIu32
" "
1695 "and port %"PRIu32
, pmb
->dp_key
, pmb
->port_key
);
1699 /* Convert ethernet argument to string form for database. */
1700 char mac_string
[ETH_ADDR_STRLEN
+ 1];
1701 snprintf(mac_string
, sizeof mac_string
,
1702 ETH_ADDR_FMT
, ETH_ADDR_ARGS(pmb
->mac
));
1704 /* Check for an update an existing IP-MAC binding for this logical
1707 * XXX This is not very efficient. */
1708 const struct sbrec_mac_binding
*b
;
1709 SBREC_MAC_BINDING_FOR_EACH (b
, ctx
->ovnsb_idl
) {
1710 if (!strcmp(b
->logical_port
, pb
->logical_port
)
1711 && !strcmp(b
->ip
, pmb
->ip_s
)) {
1712 if (strcmp(b
->mac
, mac_string
)) {
1713 sbrec_mac_binding_set_mac(b
, mac_string
);
1719 /* Add new IP-MAC binding for this logical port. */
1720 b
= sbrec_mac_binding_insert(ctx
->ovnsb_idl_txn
);
1721 sbrec_mac_binding_set_logical_port(b
, pb
->logical_port
);
1722 sbrec_mac_binding_set_ip(b
, pmb
->ip_s
);
1723 sbrec_mac_binding_set_mac(b
, mac_string
);
1724 sbrec_mac_binding_set_datapath(b
, pb
->datapath
);
1728 run_put_mac_bindings(struct controller_ctx
*ctx
)
1730 if (!ctx
->ovnsb_idl_txn
) {
1734 const struct put_mac_binding
*pmb
;
1735 HMAP_FOR_EACH (pmb
, hmap_node
, &put_mac_bindings
) {
1736 run_put_mac_binding(ctx
, pmb
);
1738 flush_put_mac_bindings();
1742 wait_put_mac_bindings(struct controller_ctx
*ctx
)
1744 if (ctx
->ovnsb_idl_txn
&& !hmap_is_empty(&put_mac_bindings
)) {
1745 poll_immediate_wake();
1750 flush_put_mac_bindings(void)
1752 struct put_mac_binding
*pmb
;
1753 HMAP_FOR_EACH_POP (pmb
, hmap_node
, &put_mac_bindings
) {
1759 * Send gratuitous ARP for vif on localnet.
1761 * When a new vif on localnet is added, gratuitous ARPs are sent announcing
1762 * the port's mac,ip mapping. On localnet, such announcements are needed for
1763 * switches and routers on the broadcast segment to update their port-mac
1767 struct eth_addr ea
; /* Ethernet address of port. */
1768 ovs_be32 ipv4
; /* Ipv4 address of port. */
1769 long long int announce_time
; /* Next announcement in ms. */
1770 int backoff
; /* Backoff for the next announcement. */
1771 ofp_port_t ofport
; /* ofport used to output this GARP. */
1772 int tag
; /* VLAN tag of this GARP packet, or -1. */
1775 /* Contains GARPs to be sent. */
1776 static struct shash send_garp_data
;
1778 /* Next GARP announcement in ms. */
1779 static long long int send_garp_time
;
1782 init_send_garps(void)
1784 shash_init(&send_garp_data
);
1785 send_garp_time
= LLONG_MAX
;
1789 destroy_send_garps(void)
1791 shash_destroy_free_data(&send_garp_data
);
1795 add_garp(const char *name
, ofp_port_t ofport
, int tag
,
1796 const struct eth_addr ea
, ovs_be32 ip
)
1798 struct garp_data
*garp
= xmalloc(sizeof *garp
);
1801 garp
->announce_time
= time_msec() + 1000;
1803 garp
->ofport
= ofport
;
1805 shash_add(&send_garp_data
, name
, garp
);
1808 /* Add or update a vif for which GARPs need to be announced. */
1810 send_garp_update(const struct sbrec_port_binding
*binding_rec
,
1811 struct simap
*localnet_ofports
, struct hmap
*local_datapaths
,
1812 struct shash
*nat_addresses
)
1814 /* Find the localnet ofport to send this GARP. */
1815 struct local_datapath
*ld
1816 = get_local_datapath(local_datapaths
,
1817 binding_rec
->datapath
->tunnel_key
);
1818 if (!ld
|| !ld
->localnet_port
) {
1821 ofp_port_t ofport
= u16_to_ofp(simap_get(localnet_ofports
,
1822 ld
->localnet_port
->logical_port
));
1823 int tag
= ld
->localnet_port
->n_tag
? *ld
->localnet_port
->tag
: -1;
1825 volatile struct garp_data
*garp
= NULL
;
1826 /* Update GARP for NAT IP if it exists. Consider port bindings with type
1827 * "l3gateway" for logical switch ports attached to gateway routers, and
1828 * port bindings with type "patch" for logical switch ports attached to
1829 * distributed gateway ports. */
1830 if (!strcmp(binding_rec
->type
, "l3gateway")
1831 || !strcmp(binding_rec
->type
, "patch")) {
1832 struct lport_addresses
*laddrs
= NULL
;
1833 while ((laddrs
= shash_find_and_delete(nat_addresses
,
1834 binding_rec
->logical_port
))) {
1836 for (i
= 0; i
< laddrs
->n_ipv4_addrs
; i
++) {
1837 char *name
= xasprintf("%s-%s", binding_rec
->logical_port
,
1838 laddrs
->ipv4_addrs
[i
].addr_s
);
1839 garp
= shash_find_data(&send_garp_data
, name
);
1841 garp
->ofport
= ofport
;
1844 add_garp(name
, ofport
, tag
, laddrs
->ea
,
1845 laddrs
->ipv4_addrs
[i
].addr
);
1849 destroy_lport_addresses(laddrs
);
1855 /* Update GARP for vif if it exists. */
1856 garp
= shash_find_data(&send_garp_data
, binding_rec
->logical_port
);
1858 garp
->ofport
= ofport
;
1862 /* Add GARP for new vif. */
1864 for (i
= 0; i
< binding_rec
->n_mac
; i
++) {
1865 struct lport_addresses laddrs
;
1866 if (!extract_lsp_addresses(binding_rec
->mac
[i
], &laddrs
)
1867 || !laddrs
.n_ipv4_addrs
) {
1871 add_garp(binding_rec
->logical_port
, ofport
, tag
,
1872 laddrs
.ea
, laddrs
.ipv4_addrs
[0].addr
);
1874 destroy_lport_addresses(&laddrs
);
1879 /* Remove a vif from GARP announcements. */
1881 send_garp_delete(const char *lport
)
1883 struct garp_data
*garp
= shash_find_and_delete(&send_garp_data
, lport
);
1887 static long long int
1888 send_garp(struct garp_data
*garp
, long long int current_time
)
1890 if (current_time
< garp
->announce_time
) {
1891 return garp
->announce_time
;
1894 /* Compose a GARP request packet. */
1895 uint64_t packet_stub
[128 / 8];
1896 struct dp_packet packet
;
1897 dp_packet_use_stub(&packet
, packet_stub
, sizeof packet_stub
);
1898 compose_arp(&packet
, ARP_OP_REQUEST
, garp
->ea
, eth_addr_zero
,
1899 true, garp
->ipv4
, garp
->ipv4
);
1901 /* Compose a GARP request packet's vlan if exist. */
1902 if (garp
->tag
>= 0) {
1903 eth_push_vlan(&packet
, htons(ETH_TYPE_VLAN
), htons(garp
->tag
));
1906 /* Compose actions. The garp request is output on localnet ofport. */
1907 uint64_t ofpacts_stub
[4096 / 8];
1908 struct ofpbuf ofpacts
= OFPBUF_STUB_INITIALIZER(ofpacts_stub
);
1909 enum ofp_version version
= rconn_get_version(swconn
);
1910 ofpact_put_OUTPUT(&ofpacts
)->port
= garp
->ofport
;
1912 struct ofputil_packet_out po
= {
1913 .packet
= dp_packet_data(&packet
),
1914 .packet_len
= dp_packet_size(&packet
),
1915 .buffer_id
= UINT32_MAX
,
1916 .ofpacts
= ofpacts
.data
,
1917 .ofpacts_len
= ofpacts
.size
,
1919 match_set_in_port(&po
.flow_metadata
, OFPP_CONTROLLER
);
1920 enum ofputil_protocol proto
= ofputil_protocol_from_ofp_version(version
);
1921 queue_msg(ofputil_encode_packet_out(&po
, proto
));
1922 dp_packet_uninit(&packet
);
1923 ofpbuf_uninit(&ofpacts
);
1925 /* Set the next announcement. At most 5 announcements are sent for a
1927 if (garp
->backoff
< 16) {
1929 garp
->announce_time
= current_time
+ garp
->backoff
* 1000;
1931 garp
->announce_time
= LLONG_MAX
;
1933 return garp
->announce_time
;
1936 /* Get localnet vifs, local l3gw ports and ofport for localnet patch ports. */
1938 get_localnet_vifs_l3gwports(struct controller_ctx
*ctx
,
1939 const struct ovsrec_bridge
*br_int
,
1940 const struct sbrec_chassis
*chassis
,
1941 struct hmap
*local_datapaths
,
1942 struct sset
*localnet_vifs
,
1943 struct simap
*localnet_ofports
,
1944 struct sset
*local_l3gw_ports
)
1946 for (int i
= 0; i
< br_int
->n_ports
; i
++) {
1947 const struct ovsrec_port
*port_rec
= br_int
->ports
[i
];
1948 if (!strcmp(port_rec
->name
, br_int
->name
)) {
1951 const char *chassis_id
= smap_get(&port_rec
->external_ids
,
1953 if (chassis_id
&& !strcmp(chassis_id
, chassis
->name
)) {
1956 const char *localnet
= smap_get(&port_rec
->external_ids
,
1957 "ovn-localnet-port");
1958 for (int j
= 0; j
< port_rec
->n_interfaces
; j
++) {
1959 const struct ovsrec_interface
*iface_rec
= port_rec
->interfaces
[j
];
1960 if (!iface_rec
->n_ofport
) {
1963 /* Get localnet port with its ofport. */
1965 int64_t ofport
= iface_rec
->ofport
[0];
1966 if (ofport
< 1 || ofport
> ofp_to_u16(OFPP_MAX
)) {
1969 simap_put(localnet_ofports
, localnet
, ofport
);
1972 /* Get localnet vif. */
1973 const char *iface_id
= smap_get(&iface_rec
->external_ids
,
1978 const struct sbrec_port_binding
*pb
1979 = lport_lookup_by_name(ctx
->ovnsb_idl
, iface_id
);
1983 struct local_datapath
*ld
1984 = get_local_datapath(local_datapaths
,
1985 pb
->datapath
->tunnel_key
);
1986 if (ld
&& ld
->localnet_port
) {
1987 sset_add(localnet_vifs
, iface_id
);
1992 const struct local_datapath
*ld
;
1993 struct ovsdb_idl_index_cursor cursor
;
1994 struct sbrec_port_binding
*lpval
;
1995 lpval
= sbrec_port_binding_index_init_row(ctx
->ovnsb_idl
,
1996 &sbrec_table_port_binding
);
1997 ovsdb_idl_initialize_cursor(ctx
->ovnsb_idl
, &sbrec_table_port_binding
,
1998 "lport-by-datapath", &cursor
);
1999 HMAP_FOR_EACH (ld
, hmap_node
, local_datapaths
) {
2000 const struct sbrec_port_binding
*pb
;
2002 if (!ld
->localnet_port
) {
2006 /* Get l3gw ports. Consider port bindings with type "l3gateway"
2007 * that connect to gateway routers (if local), and consider port
2008 * bindings of type "patch" since they might connect to
2009 * distributed gateway ports with NAT addresses. */
2011 sbrec_port_binding_index_set_datapath(lpval
, ld
->datapath
);
2013 SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb
, &cursor
, lpval
) {
2014 if ((ld
->has_local_l3gateway
&& !strcmp(pb
->type
, "l3gateway"))
2015 || !strcmp(pb
->type
, "patch")) {
2016 sset_add(local_l3gw_ports
, pb
->logical_port
);
2020 sbrec_port_binding_index_destroy_row(lpval
);
2024 pinctrl_is_chassis_resident(struct controller_ctx
*ctx
,
2025 const struct sbrec_chassis
*chassis
,
2026 const struct chassis_index
*chassis_index
,
2027 struct sset
*active_tunnels
,
2028 const char *port_name
)
2030 const struct sbrec_port_binding
*pb
2031 = lport_lookup_by_name(ctx
->ovnsb_idl
, port_name
);
2032 if (!pb
|| !pb
->chassis
) {
2035 if (strcmp(pb
->type
, "chassisredirect")) {
2036 return pb
->chassis
== chassis
;
2038 struct ovs_list
*gateway_chassis
=
2039 gateway_chassis_get_ordered(pb
, chassis_index
);
2040 bool active
= gateway_chassis_is_active(gateway_chassis
,
2043 gateway_chassis_destroy(gateway_chassis
);
2048 /* Extracts the mac, IPv4 and IPv6 addresses, and logical port from
2049 * 'addresses' which should be of the format 'MAC [IP1 IP2 ..]
2050 * [is_chassis_resident("LPORT_NAME")]', where IPn should be a valid IPv4
2051 * or IPv6 address, and stores them in the 'ipv4_addrs' and 'ipv6_addrs'
2052 * fields of 'laddrs'. The logical port name is stored in 'lport'.
2054 * Returns true if at least 'MAC' is found in 'address', false otherwise.
2056 * The caller must call destroy_lport_addresses() and free(*lport). */
2058 extract_addresses_with_port(const char *addresses
,
2059 struct lport_addresses
*laddrs
,
2063 if (!extract_addresses(addresses
, laddrs
, &ofs
)) {
2065 } else if (ofs
>= strlen(addresses
)) {
2070 lexer_init(&lexer
, addresses
+ ofs
);
2073 if (lexer
.error
|| lexer
.token
.type
!= LEX_T_ID
2074 || !lexer_match_id(&lexer
, "is_chassis_resident")) {
2075 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
2076 VLOG_INFO_RL(&rl
, "invalid syntax '%s' in address", addresses
);
2077 lexer_destroy(&lexer
);
2081 if (!lexer_match(&lexer
, LEX_T_LPAREN
)) {
2082 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
2083 VLOG_INFO_RL(&rl
, "Syntax error: expecting '(' after "
2084 "'is_chassis_resident' in address '%s'", addresses
);
2085 lexer_destroy(&lexer
);
2089 if (lexer
.token
.type
!= LEX_T_STRING
) {
2090 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
2092 "Syntax error: expecting quoted string after"
2093 " 'is_chassis_resident' in address '%s'", addresses
);
2094 lexer_destroy(&lexer
);
2098 *lport
= xstrdup(lexer
.token
.s
);
2101 if (!lexer_match(&lexer
, LEX_T_RPAREN
)) {
2102 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 1);
2103 VLOG_INFO_RL(&rl
, "Syntax error: expecting ')' after quoted string in "
2104 "'is_chassis_resident()' in address '%s'",
2106 lexer_destroy(&lexer
);
2110 lexer_destroy(&lexer
);
2115 consider_nat_address(struct controller_ctx
*ctx
,
2116 const char *nat_address
,
2117 const struct sbrec_port_binding
*pb
,
2118 struct sset
*nat_address_keys
,
2119 const struct sbrec_chassis
*chassis
,
2120 const struct chassis_index
*chassis_index
,
2121 struct sset
*active_tunnels
,
2122 struct shash
*nat_addresses
)
2124 struct lport_addresses
*laddrs
= xmalloc(sizeof *laddrs
);
2126 if (!extract_addresses_with_port(nat_address
, laddrs
, &lport
)
2127 || (!lport
&& !strcmp(pb
->type
, "patch"))
2128 || (lport
&& !pinctrl_is_chassis_resident(
2129 ctx
, chassis
, chassis_index
, active_tunnels
, lport
))) {
2130 destroy_lport_addresses(laddrs
);
2138 for (i
= 0; i
< laddrs
->n_ipv4_addrs
; i
++) {
2139 char *name
= xasprintf("%s-%s", pb
->logical_port
,
2140 laddrs
->ipv4_addrs
[i
].addr_s
);
2141 sset_add(nat_address_keys
, name
);
2144 shash_add(nat_addresses
, pb
->logical_port
, laddrs
);
2148 get_nat_addresses_and_keys(struct controller_ctx
*ctx
,
2149 struct sset
*nat_address_keys
,
2150 struct sset
*local_l3gw_ports
,
2151 const struct sbrec_chassis
*chassis
,
2152 const struct chassis_index
*chassis_index
,
2153 struct sset
*active_tunnels
,
2154 struct shash
*nat_addresses
)
2156 const char *gw_port
;
2157 SSET_FOR_EACH(gw_port
, local_l3gw_ports
) {
2158 const struct sbrec_port_binding
*pb
;
2160 pb
= lport_lookup_by_name(ctx
->ovnsb_idl
, gw_port
);
2165 if (pb
->n_nat_addresses
) {
2166 for (int i
= 0; i
< pb
->n_nat_addresses
; i
++) {
2167 consider_nat_address(ctx
, pb
->nat_addresses
[i
], pb
,
2168 nat_address_keys
, chassis
,
2169 chassis_index
, active_tunnels
,
2173 /* Continue to support options:nat-addresses for version
2175 const char *nat_addresses_options
= smap_get(&pb
->options
,
2177 if (nat_addresses_options
) {
2178 consider_nat_address(ctx
, nat_addresses_options
, pb
,
2179 nat_address_keys
, chassis
,
2180 chassis_index
, active_tunnels
,
2188 send_garp_wait(void)
2190 poll_timer_wait_until(send_garp_time
);
2194 send_garp_run(struct controller_ctx
*ctx
,
2195 const struct ovsrec_bridge
*br_int
,
2196 const struct sbrec_chassis
*chassis
,
2197 const struct chassis_index
*chassis_index
,
2198 struct hmap
*local_datapaths
,
2199 struct sset
*active_tunnels
)
2201 struct sset localnet_vifs
= SSET_INITIALIZER(&localnet_vifs
);
2202 struct sset local_l3gw_ports
= SSET_INITIALIZER(&local_l3gw_ports
);
2203 struct sset nat_ip_keys
= SSET_INITIALIZER(&nat_ip_keys
);
2204 struct simap localnet_ofports
= SIMAP_INITIALIZER(&localnet_ofports
);
2205 struct shash nat_addresses
;
2207 shash_init(&nat_addresses
);
2209 get_localnet_vifs_l3gwports(ctx
, br_int
, chassis
, local_datapaths
,
2210 &localnet_vifs
, &localnet_ofports
, &local_l3gw_ports
);
2212 get_nat_addresses_and_keys(ctx
, &nat_ip_keys
, &local_l3gw_ports
,
2213 chassis
, chassis_index
, active_tunnels
,
2215 /* For deleted ports and deleted nat ips, remove from send_garp_data. */
2216 struct shash_node
*iter
, *next
;
2217 SHASH_FOR_EACH_SAFE (iter
, next
, &send_garp_data
) {
2218 if (!sset_contains(&localnet_vifs
, iter
->name
) &&
2219 !sset_contains(&nat_ip_keys
, iter
->name
)) {
2220 send_garp_delete(iter
->name
);
2224 /* Update send_garp_data. */
2225 const char *iface_id
;
2226 SSET_FOR_EACH (iface_id
, &localnet_vifs
) {
2227 const struct sbrec_port_binding
*pb
;
2229 pb
= lport_lookup_by_name(ctx
->ovnsb_idl
, iface_id
);
2231 send_garp_update(pb
, &localnet_ofports
, local_datapaths
,
2236 /* Update send_garp_data for nat-addresses. */
2237 const char *gw_port
;
2238 SSET_FOR_EACH (gw_port
, &local_l3gw_ports
) {
2239 const struct sbrec_port_binding
*pb
;
2241 pb
= lport_lookup_by_name(ctx
->ovnsb_idl
, gw_port
);
2243 send_garp_update(pb
, &localnet_ofports
, local_datapaths
,
2248 /* Send GARPs, and update the next announcement. */
2249 long long int current_time
= time_msec();
2250 send_garp_time
= LLONG_MAX
;
2251 SHASH_FOR_EACH (iter
, &send_garp_data
) {
2252 long long int next_announce
= send_garp(iter
->data
, current_time
);
2253 if (send_garp_time
> next_announce
) {
2254 send_garp_time
= next_announce
;
2257 sset_destroy(&localnet_vifs
);
2258 sset_destroy(&local_l3gw_ports
);
2259 simap_destroy(&localnet_ofports
);
2261 SHASH_FOR_EACH_SAFE (iter
, next
, &nat_addresses
) {
2262 struct lport_addresses
*laddrs
= iter
->data
;
2263 destroy_lport_addresses(laddrs
);
2264 shash_delete(&nat_addresses
, iter
);
2267 shash_destroy(&nat_addresses
);
2269 sset_destroy(&nat_ip_keys
);
2273 reload_metadata(struct ofpbuf
*ofpacts
, const struct match
*md
)
2275 enum mf_field_id md_fields
[] = {
2276 #if FLOW_N_REGS == 16
2298 for (size_t i
= 0; i
< ARRAY_SIZE(md_fields
); i
++) {
2299 const struct mf_field
*field
= mf_from_id(md_fields
[i
]);
2300 if (!mf_is_all_wild(field
, &md
->wc
)) {
2301 union mf_value value
;
2302 mf_get_value(field
, &md
->flow
, &value
);
2303 ofpact_put_set_field(ofpacts
, field
, &value
, NULL
);
2309 pinctrl_handle_nd_na(const struct flow
*ip_flow
, const struct match
*md
,
2310 struct ofpbuf
*userdata
)
2312 /* This action only works for IPv6 ND packets, and the switch should only
2313 * send us ND packets this way, but check here just to be sure. */
2314 if (!is_nd(ip_flow
, NULL
)) {
2315 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
2316 VLOG_WARN_RL(&rl
, "NA action on non-ND packet");
2320 uint64_t packet_stub
[128 / 8];
2321 struct dp_packet packet
;
2322 dp_packet_use_stub(&packet
, packet_stub
, sizeof packet_stub
);
2324 /* xxx These flags are not exactly correct. Look at section 7.2.4
2325 * xxx of RFC 4861. For example, we need to set ND_RSO_ROUTER for
2326 * xxx router's interfaces and ND_RSO_SOLICITED only if it was
2328 compose_nd_na(&packet
, ip_flow
->dl_dst
, ip_flow
->dl_src
,
2329 &ip_flow
->nd_target
, &ip_flow
->ipv6_src
,
2330 htonl(ND_RSO_SOLICITED
| ND_RSO_OVERRIDE
));
2332 /* Reload previous packet metadata and set actions from userdata. */
2333 set_actions_and_enqueue_msg(&packet
, md
, userdata
);
2334 dp_packet_uninit(&packet
);
2338 pinctrl_handle_nd_ns(const struct flow
*ip_flow
, const struct match
*md
,
2339 struct ofpbuf
*userdata
)
2341 /* This action only works for IPv6 packets. */
2342 if (get_dl_type(ip_flow
) != htons(ETH_TYPE_IPV6
)) {
2343 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
2344 VLOG_WARN_RL(&rl
, "NS action on non-IPv6 packet");
2348 uint64_t packet_stub
[128 / 8];
2349 struct dp_packet packet
;
2350 dp_packet_use_stub(&packet
, packet_stub
, sizeof packet_stub
);
2352 compose_nd_ns(&packet
, ip_flow
->dl_src
, &ip_flow
->ipv6_src
,
2353 &ip_flow
->ipv6_dst
);
2355 /* Reload previous packet metadata and set actions from userdata. */
2356 set_actions_and_enqueue_msg(&packet
, md
, userdata
);
2357 dp_packet_uninit(&packet
);
2361 pinctrl_handle_put_nd_ra_opts(
2362 const struct flow
*in_flow
, struct dp_packet
*pkt_in
,
2363 struct ofputil_packet_in
*pin
, struct ofpbuf
*userdata
,
2364 struct ofpbuf
*continuation
)
2366 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
2367 enum ofp_version version
= rconn_get_version(swconn
);
2368 enum ofputil_protocol proto
= ofputil_protocol_from_ofp_version(version
);
2369 struct dp_packet
*pkt_out_ptr
= NULL
;
2370 uint32_t success
= 0;
2372 /* Parse result field. */
2373 const struct mf_field
*f
;
2374 enum ofperr ofperr
= nx_pull_header(userdata
, NULL
, &f
, NULL
);
2376 VLOG_WARN_RL(&rl
, "bad result OXM (%s)", ofperr_to_string(ofperr
));
2380 /* Parse result offset. */
2381 ovs_be32
*ofsp
= ofpbuf_try_pull(userdata
, sizeof *ofsp
);
2383 VLOG_WARN_RL(&rl
, "offset not present in the userdata");
2387 /* Check that the result is valid and writable. */
2388 struct mf_subfield dst
= { .field
= f
, .ofs
= ntohl(*ofsp
), .n_bits
= 1 };
2389 ofperr
= mf_check_dst(&dst
, NULL
);
2391 VLOG_WARN_RL(&rl
, "bad result bit (%s)", ofperr_to_string(ofperr
));
2395 if (!userdata
->size
) {
2396 VLOG_WARN_RL(&rl
, "IPv6 ND RA options not present in the userdata");
2400 if (!is_icmpv6(in_flow
, NULL
) || in_flow
->tp_dst
!= htons(0) ||
2401 in_flow
->tp_src
!= htons(ND_ROUTER_SOLICIT
)) {
2402 VLOG_WARN_RL(&rl
, "put_nd_ra action on invalid or unsupported packet");
2406 size_t new_packet_size
= pkt_in
->l4_ofs
+ userdata
->size
;
2407 struct dp_packet pkt_out
;
2408 dp_packet_init(&pkt_out
, new_packet_size
);
2409 dp_packet_clear(&pkt_out
);
2410 dp_packet_prealloc_tailroom(&pkt_out
, new_packet_size
);
2411 pkt_out_ptr
= &pkt_out
;
2413 /* Copy L2 and L3 headers from pkt_in. */
2414 dp_packet_put(&pkt_out
, dp_packet_pull(pkt_in
, pkt_in
->l4_ofs
),
2417 pkt_out
.l2_5_ofs
= pkt_in
->l2_5_ofs
;
2418 pkt_out
.l2_pad_size
= pkt_in
->l2_pad_size
;
2419 pkt_out
.l3_ofs
= pkt_in
->l3_ofs
;
2420 pkt_out
.l4_ofs
= pkt_in
->l4_ofs
;
2422 /* Copy the ICMPv6 Router Advertisement data from 'userdata' field. */
2423 dp_packet_put(&pkt_out
, userdata
->data
, userdata
->size
);
2425 /* Set the IPv6 payload length and calculate the ICMPv6 checksum. */
2426 struct ovs_16aligned_ip6_hdr
*nh
= dp_packet_l3(&pkt_out
);
2427 nh
->ip6_plen
= htons(userdata
->size
);
2428 struct ovs_ra_msg
*ra
= dp_packet_l4(&pkt_out
);
2429 ra
->icmph
.icmp6_cksum
= 0;
2430 uint32_t icmp_csum
= packet_csum_pseudoheader6(nh
);
2431 ra
->icmph
.icmp6_cksum
= csum_finish(csum_continue(
2432 icmp_csum
, ra
, userdata
->size
));
2433 pin
->packet
= dp_packet_data(&pkt_out
);
2434 pin
->packet_len
= dp_packet_size(&pkt_out
);
2439 union mf_subvalue sv
;
2440 sv
.u8_val
= success
;
2441 mf_write_subfield(&dst
, &sv
, &pin
->flow_metadata
);
2443 queue_msg(ofputil_encode_resume(pin
, continuation
, proto
));
2444 dp_packet_uninit(pkt_out_ptr
);