]> git.proxmox.com Git - mirror_ovs.git/blob - ovn/controller/pinctrl.c
OVN: add icmp6{} action support
[mirror_ovs.git] / ovn / controller / pinctrl.c
1 /* Copyright (c) 2015, 2016, 2017 Red Hat, Inc.
2 * Copyright (c) 2017 Nicira, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <config.h>
18
19 #include "pinctrl.h"
20
21 #include "coverage.h"
22 #include "csum.h"
23 #include "dirs.h"
24 #include "dp-packet.h"
25 #include "flow.h"
26 #include "gchassis.h"
27 #include "lport.h"
28 #include "nx-match.h"
29 #include "ovn-controller.h"
30 #include "lib/packets.h"
31 #include "lib/sset.h"
32 #include "openvswitch/ofp-actions.h"
33 #include "openvswitch/ofp-msgs.h"
34 #include "openvswitch/ofp-packet.h"
35 #include "openvswitch/ofp-print.h"
36 #include "openvswitch/ofp-switch.h"
37 #include "openvswitch/ofp-util.h"
38 #include "openvswitch/vlog.h"
39
40 #include "lib/dhcp.h"
41 #include "ovn-controller.h"
42 #include "ovn/actions.h"
43 #include "ovn/lex.h"
44 #include "ovn/lib/acl-log.h"
45 #include "ovn/lib/logical-fields.h"
46 #include "ovn/lib/ovn-l7.h"
47 #include "ovn/lib/ovn-util.h"
48 #include "openvswitch/poll-loop.h"
49 #include "openvswitch/rconn.h"
50 #include "socket-util.h"
51 #include "timeval.h"
52 #include "vswitch-idl.h"
53 #include "lflow.h"
54
55 VLOG_DEFINE_THIS_MODULE(pinctrl);
56
57 /* OpenFlow connection to the switch. */
58 static struct rconn *swconn;
59
60 /* Last seen sequence number for 'swconn'. When this differs from
61 * rconn_get_connection_seqno(rconn), 'swconn' has reconnected. */
62 static unsigned int conn_seq_no;
63
64 static void pinctrl_handle_put_mac_binding(const struct flow *md,
65 const struct flow *headers,
66 bool is_arp);
67 static void init_put_mac_bindings(void);
68 static void destroy_put_mac_bindings(void);
69 static void run_put_mac_bindings(struct controller_ctx *);
70 static void wait_put_mac_bindings(struct controller_ctx *);
71 static void flush_put_mac_bindings(void);
72
73 static void init_send_garps(void);
74 static void destroy_send_garps(void);
75 static void send_garp_wait(void);
76 static void send_garp_run(struct controller_ctx *ctx,
77 const struct ovsrec_bridge *,
78 const struct sbrec_chassis *,
79 const struct chassis_index *chassis_index,
80 struct hmap *local_datapaths,
81 struct sset *active_tunnels);
82 static void pinctrl_handle_nd_na(const struct flow *ip_flow,
83 const struct match *md,
84 struct ofpbuf *userdata);
85 static void reload_metadata(struct ofpbuf *ofpacts,
86 const struct match *md);
87 static void pinctrl_handle_put_nd_ra_opts(
88 const struct flow *ip_flow, struct dp_packet *pkt_in,
89 struct ofputil_packet_in *pin, struct ofpbuf *userdata,
90 struct ofpbuf *continuation);
91 static void pinctrl_handle_nd_ns(const struct flow *ip_flow,
92 const struct match *md,
93 struct ofpbuf *userdata);
94 static void init_ipv6_ras(void);
95 static void destroy_ipv6_ras(void);
96 static void ipv6_ra_wait(void);
97 static void send_ipv6_ras(const struct controller_ctx *,
98 struct hmap *local_datapaths);
99
100 COVERAGE_DEFINE(pinctrl_drop_put_mac_binding);
101
102 void
103 pinctrl_init(void)
104 {
105 swconn = rconn_create(5, 0, DSCP_DEFAULT, 1 << OFP13_VERSION);
106 conn_seq_no = 0;
107 init_put_mac_bindings();
108 init_send_garps();
109 init_ipv6_ras();
110 }
111
112 static ovs_be32
113 queue_msg(struct ofpbuf *msg)
114 {
115 const struct ofp_header *oh = msg->data;
116 ovs_be32 xid = oh->xid;
117
118 rconn_send(swconn, msg, NULL);
119 return xid;
120 }
121
122 /* Sets up global 'swconn', a newly (re)connected connection to a switch. */
123 static void
124 pinctrl_setup(void)
125 {
126 /* Fetch the switch configuration. The response later will allow us to
127 * change the miss_send_len to UINT16_MAX, so that we can enable
128 * asynchronous messages. */
129 queue_msg(ofpraw_alloc(OFPRAW_OFPT_GET_CONFIG_REQUEST,
130 rconn_get_version(swconn), 0));
131
132 /* Set a packet-in format that supports userdata. */
133 queue_msg(ofputil_encode_set_packet_in_format(rconn_get_version(swconn),
134 OFPUTIL_PACKET_IN_NXT2));
135 }
136
137 static void
138 set_switch_config(struct rconn *swconn_,
139 const struct ofputil_switch_config *config)
140 {
141 enum ofp_version version = rconn_get_version(swconn_);
142 struct ofpbuf *request = ofputil_encode_set_config(config, version);
143 queue_msg(request);
144 }
145
146 static void
147 set_actions_and_enqueue_msg(const struct dp_packet *packet,
148 const struct match *md,
149 struct ofpbuf *userdata)
150 {
151 /* Copy metadata from 'md' into the packet-out via "set_field"
152 * actions, then add actions from 'userdata'.
153 */
154 uint64_t ofpacts_stub[4096 / 8];
155 struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
156 enum ofp_version version = rconn_get_version(swconn);
157
158 reload_metadata(&ofpacts, md);
159 enum ofperr error = ofpacts_pull_openflow_actions(userdata, userdata->size,
160 version, NULL, NULL,
161 &ofpacts);
162 if (error) {
163 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
164 VLOG_WARN_RL(&rl, "failed to parse actions from userdata (%s)",
165 ofperr_to_string(error));
166 ofpbuf_uninit(&ofpacts);
167 return;
168 }
169
170 struct ofputil_packet_out po = {
171 .packet = dp_packet_data(packet),
172 .packet_len = dp_packet_size(packet),
173 .buffer_id = UINT32_MAX,
174 .ofpacts = ofpacts.data,
175 .ofpacts_len = ofpacts.size,
176 };
177 match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
178 enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
179 queue_msg(ofputil_encode_packet_out(&po, proto));
180 ofpbuf_uninit(&ofpacts);
181 }
182
183 static void
184 pinctrl_handle_arp(const struct flow *ip_flow, const struct match *md,
185 struct ofpbuf *userdata)
186 {
187 /* This action only works for IP packets, and the switch should only send
188 * us IP packets this way, but check here just to be sure. */
189 if (ip_flow->dl_type != htons(ETH_TYPE_IP)) {
190 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
191 VLOG_WARN_RL(&rl, "ARP action on non-IP packet (Ethertype %"PRIx16")",
192 ntohs(ip_flow->dl_type));
193 return;
194 }
195
196 /* Compose an ARP packet. */
197 uint64_t packet_stub[128 / 8];
198 struct dp_packet packet;
199 dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
200 compose_arp__(&packet);
201
202 struct eth_header *eth = dp_packet_eth(&packet);
203 eth->eth_dst = ip_flow->dl_dst;
204 eth->eth_src = ip_flow->dl_src;
205
206 struct arp_eth_header *arp = dp_packet_l3(&packet);
207 arp->ar_op = htons(ARP_OP_REQUEST);
208 arp->ar_sha = ip_flow->dl_src;
209 put_16aligned_be32(&arp->ar_spa, ip_flow->nw_src);
210 arp->ar_tha = eth_addr_zero;
211 put_16aligned_be32(&arp->ar_tpa, ip_flow->nw_dst);
212
213 if (ip_flow->vlans[0].tci & htons(VLAN_CFI)) {
214 eth_push_vlan(&packet, htons(ETH_TYPE_VLAN_8021Q),
215 ip_flow->vlans[0].tci);
216 }
217
218 set_actions_and_enqueue_msg(&packet, md, userdata);
219 dp_packet_uninit(&packet);
220 }
221
222 static void
223 pinctrl_handle_icmp(const struct flow *ip_flow, struct dp_packet *pkt_in,
224 const struct match *md, struct ofpbuf *userdata)
225 {
226 /* This action only works for IP packets, and the switch should only send
227 * us IP packets this way, but check here just to be sure. */
228 if (ip_flow->dl_type != htons(ETH_TYPE_IP) &&
229 ip_flow->dl_type != htons(ETH_TYPE_IPV6)) {
230 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
231 VLOG_WARN_RL(&rl,
232 "ICMP action on non-IP packet (eth_type 0x%"PRIx16")",
233 ntohs(ip_flow->dl_type));
234 return;
235 }
236
237 uint64_t packet_stub[128 / 8];
238 struct dp_packet packet;
239
240 dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
241 dp_packet_clear(&packet);
242 packet.packet_type = htonl(PT_ETH);
243
244 struct eth_header *eh = dp_packet_put_zeros(&packet, sizeof *eh);
245 eh->eth_dst = ip_flow->dl_dst;
246 eh->eth_src = ip_flow->dl_src;
247
248 if (get_dl_type(ip_flow) == htons(ETH_TYPE_IP)) {
249 struct ip_header *nh = dp_packet_put_zeros(&packet, sizeof *nh);
250
251 eh->eth_type = htons(ETH_TYPE_IP);
252 dp_packet_set_l3(&packet, nh);
253 nh->ip_ihl_ver = IP_IHL_VER(5, 4);
254 nh->ip_tot_len = htons(sizeof(struct ip_header) +
255 sizeof(struct icmp_header));
256 nh->ip_proto = IPPROTO_ICMP;
257 nh->ip_frag_off = htons(IP_DF);
258 packet_set_ipv4(&packet, ip_flow->nw_src, ip_flow->nw_dst,
259 ip_flow->nw_tos, 255);
260
261 struct icmp_header *ih = dp_packet_put_zeros(&packet, sizeof *ih);
262 dp_packet_set_l4(&packet, ih);
263 packet_set_icmp(&packet, ICMP4_DST_UNREACH, 1);
264 } else {
265 struct ip6_hdr *nh = dp_packet_put_zeros(&packet, sizeof *nh);
266 struct icmp6_error_header *ih;
267 uint32_t icmpv6_csum;
268
269 eh->eth_type = htons(ETH_TYPE_IPV6);
270 dp_packet_set_l3(&packet, nh);
271 nh->ip6_vfc = 0x60;
272 nh->ip6_nxt = IPPROTO_ICMPV6;
273 nh->ip6_plen = htons(sizeof(*nh) + ICMP6_ERROR_HEADER_LEN);
274 packet_set_ipv6(&packet, &ip_flow->ipv6_src, &ip_flow->ipv6_dst,
275 ip_flow->nw_tos, ip_flow->ipv6_label, 255);
276
277 ih = dp_packet_put_zeros(&packet, sizeof *ih);
278 dp_packet_set_l4(&packet, ih);
279 ih->icmp6_base.icmp6_type = ICMP6_DST_UNREACH;
280 ih->icmp6_base.icmp6_code = 1;
281 ih->icmp6_base.icmp6_cksum = 0;
282
283 uint8_t *data = dp_packet_put_zeros(&packet, sizeof *nh);
284 memcpy(data, dp_packet_l3(pkt_in), sizeof(*nh));
285
286 icmpv6_csum = packet_csum_pseudoheader6(dp_packet_l3(&packet));
287 ih->icmp6_base.icmp6_cksum = csum_finish(
288 csum_continue(icmpv6_csum, ih,
289 sizeof(*nh) + ICMP6_ERROR_HEADER_LEN));
290 }
291
292 if (ip_flow->vlans[0].tci & htons(VLAN_CFI)) {
293 eth_push_vlan(&packet, htons(ETH_TYPE_VLAN_8021Q),
294 ip_flow->vlans[0].tci);
295 }
296
297 set_actions_and_enqueue_msg(&packet, md, userdata);
298 dp_packet_uninit(&packet);
299 }
300
301 static void
302 pinctrl_handle_tcp_reset(const struct flow *ip_flow, struct dp_packet *pkt_in,
303 const struct match *md, struct ofpbuf *userdata)
304 {
305 /* This action only works for TCP segments, and the switch should only send
306 * us TCP segments this way, but check here just to be sure. */
307 if (ip_flow->nw_proto != IPPROTO_TCP) {
308 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
309 VLOG_WARN_RL(&rl, "TCP_RESET action on non-TCP packet");
310 return;
311 }
312
313 uint64_t packet_stub[128 / 8];
314 struct dp_packet packet;
315
316 dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
317 dp_packet_clear(&packet);
318 packet.packet_type = htonl(PT_ETH);
319
320 struct eth_header *eh = dp_packet_put_zeros(&packet, sizeof *eh);
321 eh->eth_dst = ip_flow->dl_dst;
322 eh->eth_src = ip_flow->dl_src;
323
324 if (get_dl_type(ip_flow) == htons(ETH_TYPE_IPV6)) {
325 struct ip6_hdr *nh = dp_packet_put_zeros(&packet, sizeof *nh);
326
327 eh->eth_type = htons(ETH_TYPE_IPV6);
328 dp_packet_set_l3(&packet, nh);
329 nh->ip6_vfc = 0x60;
330 nh->ip6_nxt = IPPROTO_TCP;
331 nh->ip6_plen = htons(TCP_HEADER_LEN);
332 packet_set_ipv6(&packet, &ip_flow->ipv6_src, &ip_flow->ipv6_dst,
333 ip_flow->nw_tos, ip_flow->ipv6_label, 255);
334 } else {
335 struct ip_header *nh = dp_packet_put_zeros(&packet, sizeof *nh);
336
337 eh->eth_type = htons(ETH_TYPE_IP);
338 dp_packet_set_l3(&packet, nh);
339 nh->ip_ihl_ver = IP_IHL_VER(5, 4);
340 nh->ip_tot_len = htons(IP_HEADER_LEN + TCP_HEADER_LEN);
341 nh->ip_proto = IPPROTO_TCP;
342 nh->ip_frag_off = htons(IP_DF);
343 packet_set_ipv4(&packet, ip_flow->nw_src, ip_flow->nw_dst,
344 ip_flow->nw_tos, 255);
345 }
346
347 struct tcp_header *th = dp_packet_put_zeros(&packet, sizeof *th);
348 struct tcp_header *tcp_in = dp_packet_l4(pkt_in);
349 dp_packet_set_l4(&packet, th);
350 th->tcp_ctl = TCP_CTL(TCP_RST, 5);
351 if (ip_flow->tcp_flags & htons(TCP_ACK)) {
352 th->tcp_seq = tcp_in->tcp_ack;
353 } else {
354 uint32_t tcp_seq, ack_seq, tcp_len;
355
356 tcp_seq = ntohl(get_16aligned_be32(&tcp_in->tcp_seq));
357 tcp_len = TCP_OFFSET(tcp_in->tcp_ctl) * 4;
358 ack_seq = tcp_seq + dp_packet_l4_size(pkt_in) - tcp_len;
359 put_16aligned_be32(&th->tcp_ack, htonl(ack_seq));
360 put_16aligned_be32(&th->tcp_seq, 0);
361 }
362 packet_set_tcp_port(&packet, ip_flow->tp_dst, ip_flow->tp_src);
363
364 if (ip_flow->vlans[0].tci & htons(VLAN_CFI)) {
365 eth_push_vlan(&packet, htons(ETH_TYPE_VLAN_8021Q),
366 ip_flow->vlans[0].tci);
367 }
368
369 set_actions_and_enqueue_msg(&packet, md, userdata);
370 dp_packet_uninit(&packet);
371 }
372
373 static void
374 pinctrl_handle_put_dhcp_opts(
375 struct dp_packet *pkt_in, struct ofputil_packet_in *pin,
376 struct ofpbuf *userdata, struct ofpbuf *continuation)
377 {
378 enum ofp_version version = rconn_get_version(swconn);
379 enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
380 struct dp_packet *pkt_out_ptr = NULL;
381 uint32_t success = 0;
382
383 /* Parse result field. */
384 const struct mf_field *f;
385 enum ofperr ofperr = nx_pull_header(userdata, NULL, &f, NULL);
386 if (ofperr) {
387 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
388 VLOG_WARN_RL(&rl, "bad result OXM (%s)", ofperr_to_string(ofperr));
389 goto exit;
390 }
391
392 /* Parse result offset and offer IP. */
393 ovs_be32 *ofsp = ofpbuf_try_pull(userdata, sizeof *ofsp);
394 ovs_be32 *offer_ip = ofpbuf_try_pull(userdata, sizeof *offer_ip);
395 if (!ofsp || !offer_ip) {
396 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
397 VLOG_WARN_RL(&rl, "offset or offer_ip not present in the userdata");
398 goto exit;
399 }
400
401 /* Check that the result is valid and writable. */
402 struct mf_subfield dst = { .field = f, .ofs = ntohl(*ofsp), .n_bits = 1 };
403 ofperr = mf_check_dst(&dst, NULL);
404 if (ofperr) {
405 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
406 VLOG_WARN_RL(&rl, "bad result bit (%s)", ofperr_to_string(ofperr));
407 goto exit;
408 }
409
410 if (!userdata->size) {
411 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
412 VLOG_WARN_RL(&rl, "DHCP options not present in the userdata");
413 goto exit;
414 }
415
416 /* Validate the DHCP request packet.
417 * Format of the DHCP packet is
418 * ------------------------------------------------------------------------
419 *| UDP HEADER | DHCP HEADER | 4 Byte DHCP Cookie | DHCP OPTIONS(var len)|
420 * ------------------------------------------------------------------------
421 */
422 if (dp_packet_l4_size(pkt_in) < (UDP_HEADER_LEN +
423 sizeof (struct dhcp_header) + sizeof(uint32_t) + 3)) {
424 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
425 VLOG_WARN_RL(&rl, "Invalid or incomplete DHCP packet recieved");
426 goto exit;
427 }
428
429 struct dhcp_header const *in_dhcp_data = dp_packet_get_udp_payload(pkt_in);
430 if (in_dhcp_data->op != DHCP_OP_REQUEST) {
431 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
432 VLOG_WARN_RL(&rl, "Invalid opcode in the DHCP packet : %d",
433 in_dhcp_data->op);
434 goto exit;
435 }
436
437 /* DHCP options follow the DHCP header. The first 4 bytes of the DHCP
438 * options is the DHCP magic cookie followed by the actual DHCP options.
439 */
440 const uint8_t *in_dhcp_opt =
441 (const uint8_t *)dp_packet_get_udp_payload(pkt_in) +
442 sizeof (struct dhcp_header);
443
444 ovs_be32 magic_cookie = htonl(DHCP_MAGIC_COOKIE);
445 if (memcmp(in_dhcp_opt, &magic_cookie, sizeof(ovs_be32))) {
446 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
447 VLOG_WARN_RL(&rl, "DHCP magic cookie not present in the DHCP packet");
448 goto exit;
449 }
450
451 in_dhcp_opt += 4;
452 /* Check that the DHCP Message Type (opt 53) is present or not with
453 * valid values - DHCP_MSG_DISCOVER or DHCP_MSG_REQUEST as the first
454 * DHCP option.
455 */
456 if (!(in_dhcp_opt[0] == DHCP_OPT_MSG_TYPE && in_dhcp_opt[1] == 1 && (
457 in_dhcp_opt[2] == DHCP_MSG_DISCOVER ||
458 in_dhcp_opt[2] == DHCP_MSG_REQUEST))) {
459 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
460 VLOG_WARN_RL(&rl, "Invalid DHCP message type : opt code = %d,"
461 " opt value = %d", in_dhcp_opt[0], in_dhcp_opt[2]);
462 goto exit;
463 }
464
465 uint8_t msg_type;
466 if (in_dhcp_opt[2] == DHCP_MSG_DISCOVER) {
467 msg_type = DHCP_MSG_OFFER;
468 } else {
469 msg_type = DHCP_MSG_ACK;
470 }
471
472 /* Frame the DHCP reply packet
473 * Total DHCP options length will be options stored in the userdata +
474 * 16 bytes.
475 *
476 * --------------------------------------------------------------
477 *| 4 Bytes (dhcp cookie) | 3 Bytes (option type) | DHCP options |
478 * --------------------------------------------------------------
479 *| 4 Bytes padding | 1 Byte (option end 0xFF ) | 4 Bytes padding|
480 * --------------------------------------------------------------
481 */
482 uint16_t new_l4_size = UDP_HEADER_LEN + DHCP_HEADER_LEN + \
483 userdata->size + 16;
484 size_t new_packet_size = pkt_in->l4_ofs + new_l4_size;
485
486 struct dp_packet pkt_out;
487 dp_packet_init(&pkt_out, new_packet_size);
488 dp_packet_clear(&pkt_out);
489 dp_packet_prealloc_tailroom(&pkt_out, new_packet_size);
490 pkt_out_ptr = &pkt_out;
491
492 /* Copy the L2 and L3 headers from the pkt_in as they would remain same*/
493 dp_packet_put(
494 &pkt_out, dp_packet_pull(pkt_in, pkt_in->l4_ofs), pkt_in->l4_ofs);
495
496 pkt_out.l2_5_ofs = pkt_in->l2_5_ofs;
497 pkt_out.l2_pad_size = pkt_in->l2_pad_size;
498 pkt_out.l3_ofs = pkt_in->l3_ofs;
499 pkt_out.l4_ofs = pkt_in->l4_ofs;
500
501 struct udp_header *udp = dp_packet_put(
502 &pkt_out, dp_packet_pull(pkt_in, UDP_HEADER_LEN), UDP_HEADER_LEN);
503
504 struct dhcp_header *dhcp_data = dp_packet_put(
505 &pkt_out, dp_packet_pull(pkt_in, DHCP_HEADER_LEN), DHCP_HEADER_LEN);
506 dhcp_data->op = DHCP_OP_REPLY;
507 dhcp_data->yiaddr = *offer_ip;
508 dp_packet_put(&pkt_out, &magic_cookie, sizeof(ovs_be32));
509
510 uint8_t *out_dhcp_opts = dp_packet_put_zeros(&pkt_out,
511 userdata->size + 12);
512 /* DHCP option - type */
513 out_dhcp_opts[0] = DHCP_OPT_MSG_TYPE;
514 out_dhcp_opts[1] = 1;
515 out_dhcp_opts[2] = msg_type;
516 out_dhcp_opts += 3;
517
518 memcpy(out_dhcp_opts, userdata->data, userdata->size);
519 out_dhcp_opts += userdata->size;
520 /* Padding */
521 out_dhcp_opts += 4;
522 /* End */
523 out_dhcp_opts[0] = DHCP_OPT_END;
524
525 udp->udp_len = htons(new_l4_size);
526
527 struct ip_header *out_ip = dp_packet_l3(&pkt_out);
528 out_ip->ip_tot_len = htons(pkt_out.l4_ofs - pkt_out.l3_ofs + new_l4_size);
529 udp->udp_csum = 0;
530 /* Checksum needs to be initialized to zero. */
531 out_ip->ip_csum = 0;
532 out_ip->ip_csum = csum(out_ip, sizeof *out_ip);
533
534 pin->packet = dp_packet_data(&pkt_out);
535 pin->packet_len = dp_packet_size(&pkt_out);
536
537 /* Log the response. */
538 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(20, 40);
539 const struct eth_header *l2 = dp_packet_eth(&pkt_out);
540 VLOG_INFO_RL(&rl, "DHCP%s "ETH_ADDR_FMT" "IP_FMT"",
541 msg_type == DHCP_MSG_OFFER ? "OFFER" : "ACK",
542 ETH_ADDR_ARGS(l2->eth_src), IP_ARGS(*offer_ip));
543
544 success = 1;
545 exit:
546 if (!ofperr) {
547 union mf_subvalue sv;
548 sv.u8_val = success;
549 mf_write_subfield(&dst, &sv, &pin->flow_metadata);
550 }
551 queue_msg(ofputil_encode_resume(pin, continuation, proto));
552 if (pkt_out_ptr) {
553 dp_packet_uninit(pkt_out_ptr);
554 }
555 }
556
557 static bool
558 compose_out_dhcpv6_opts(struct ofpbuf *userdata,
559 struct ofpbuf *out_dhcpv6_opts, ovs_be32 iaid)
560 {
561 while (userdata->size) {
562 struct dhcp_opt6_header *userdata_opt = ofpbuf_try_pull(
563 userdata, sizeof *userdata_opt);
564 if (!userdata_opt) {
565 return false;
566 }
567
568 size_t size = ntohs(userdata_opt->size);
569 uint8_t *userdata_opt_data = ofpbuf_try_pull(userdata, size);
570 if (!userdata_opt_data) {
571 return false;
572 }
573
574 switch (ntohs(userdata_opt->opt_code)) {
575 case DHCPV6_OPT_SERVER_ID_CODE:
576 {
577 /* The Server Identifier option carries a DUID
578 * identifying a server between a client and a server.
579 * See RFC 3315 Sec 9 and Sec 22.3.
580 *
581 * We use DUID Based on Link-layer Address [DUID-LL].
582 */
583
584 struct dhcpv6_opt_server_id *opt_server_id = ofpbuf_put_zeros(
585 out_dhcpv6_opts, sizeof *opt_server_id);
586
587 opt_server_id->opt.code = htons(DHCPV6_OPT_SERVER_ID_CODE);
588 opt_server_id->opt.len = htons(size + 4);
589 opt_server_id->duid_type = htons(DHCPV6_DUID_LL);
590 opt_server_id->hw_type = htons(DHCPV6_HW_TYPE_ETH);
591 memcpy(&opt_server_id->mac, userdata_opt_data,
592 sizeof(struct eth_addr));
593 break;
594 }
595
596 case DHCPV6_OPT_IA_ADDR_CODE:
597 {
598 if (size != sizeof(struct in6_addr)) {
599 return false;
600 }
601
602 /* IA Address option is used to specify IPv6 addresses associated
603 * with an IA_NA or IA_TA. The IA Address option must be
604 * encapsulated in the Options field of an IA_NA or IA_TA option.
605 *
606 * We will encapsulate the IA Address within the IA_NA option.
607 * Please see RFC 3315 section 22.5 and 22.6
608 */
609 struct dhcpv6_opt_ia_na *opt_ia_na = ofpbuf_put_zeros(
610 out_dhcpv6_opts, sizeof *opt_ia_na);
611 opt_ia_na->opt.code = htons(DHCPV6_OPT_IA_NA_CODE);
612 /* IA_NA length (in bytes)-
613 * IAID - 4
614 * T1 - 4
615 * T2 - 4
616 * IA Address - sizeof(struct dhcpv6_opt_ia_addr)
617 */
618 opt_ia_na->opt.len = htons(12 + sizeof(struct dhcpv6_opt_ia_addr));
619 opt_ia_na->iaid = iaid;
620 /* Set the lifetime of the address(es) to infinity */
621 opt_ia_na->t1 = OVS_BE32_MAX;
622 opt_ia_na->t2 = OVS_BE32_MAX;
623
624 struct dhcpv6_opt_ia_addr *opt_ia_addr = ofpbuf_put_zeros(
625 out_dhcpv6_opts, sizeof *opt_ia_addr);
626 opt_ia_addr->opt.code = htons(DHCPV6_OPT_IA_ADDR_CODE);
627 opt_ia_addr->opt.len = htons(size + 8);
628 memcpy(opt_ia_addr->ipv6.s6_addr, userdata_opt_data, size);
629 opt_ia_addr->t1 = OVS_BE32_MAX;
630 opt_ia_addr->t2 = OVS_BE32_MAX;
631 break;
632 }
633
634 case DHCPV6_OPT_DNS_SERVER_CODE:
635 {
636 struct dhcpv6_opt_header *opt_dns = ofpbuf_put_zeros(
637 out_dhcpv6_opts, sizeof *opt_dns);
638 opt_dns->code = htons(DHCPV6_OPT_DNS_SERVER_CODE);
639 opt_dns->len = htons(size);
640 ofpbuf_put(out_dhcpv6_opts, userdata_opt_data, size);
641 break;
642 }
643
644 case DHCPV6_OPT_DOMAIN_SEARCH_CODE:
645 {
646 struct dhcpv6_opt_header *opt_dsl = ofpbuf_put_zeros(
647 out_dhcpv6_opts, sizeof *opt_dsl);
648 opt_dsl->code = htons(DHCPV6_OPT_DOMAIN_SEARCH_CODE);
649 opt_dsl->len = htons(size + 2);
650 uint8_t *data = ofpbuf_put_zeros(out_dhcpv6_opts, size + 2);
651 *data = size;
652 memcpy(data + 1, userdata_opt_data, size);
653 break;
654 }
655
656 default:
657 return false;
658 }
659 }
660 return true;
661 }
662
663 static void
664 pinctrl_handle_put_dhcpv6_opts(
665 struct dp_packet *pkt_in, struct ofputil_packet_in *pin,
666 struct ofpbuf *userdata, struct ofpbuf *continuation OVS_UNUSED)
667 {
668 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
669 enum ofp_version version = rconn_get_version(swconn);
670 enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
671 struct dp_packet *pkt_out_ptr = NULL;
672 uint32_t success = 0;
673
674 /* Parse result field. */
675 const struct mf_field *f;
676 enum ofperr ofperr = nx_pull_header(userdata, NULL, &f, NULL);
677 if (ofperr) {
678 VLOG_WARN_RL(&rl, "bad result OXM (%s)", ofperr_to_string(ofperr));
679 goto exit;
680 }
681
682 /* Parse result offset. */
683 ovs_be32 *ofsp = ofpbuf_try_pull(userdata, sizeof *ofsp);
684 if (!ofsp) {
685 VLOG_WARN_RL(&rl, "offset not present in the userdata");
686 goto exit;
687 }
688
689 /* Check that the result is valid and writable. */
690 struct mf_subfield dst = { .field = f, .ofs = ntohl(*ofsp), .n_bits = 1 };
691 ofperr = mf_check_dst(&dst, NULL);
692 if (ofperr) {
693 VLOG_WARN_RL(&rl, "bad result bit (%s)", ofperr_to_string(ofperr));
694 goto exit;
695 }
696
697 if (!userdata->size) {
698 VLOG_WARN_RL(&rl, "DHCPv6 options not present in the userdata");
699 goto exit;
700 }
701
702 struct udp_header *in_udp = dp_packet_l4(pkt_in);
703 const uint8_t *in_dhcpv6_data = dp_packet_get_udp_payload(pkt_in);
704 if (!in_udp || !in_dhcpv6_data) {
705 VLOG_WARN_RL(&rl, "truncated dhcpv6 packet");
706 goto exit;
707 }
708
709 uint8_t out_dhcpv6_msg_type;
710 switch(*in_dhcpv6_data) {
711 case DHCPV6_MSG_TYPE_SOLICIT:
712 out_dhcpv6_msg_type = DHCPV6_MSG_TYPE_ADVT;
713 break;
714
715 case DHCPV6_MSG_TYPE_REQUEST:
716 case DHCPV6_MSG_TYPE_CONFIRM:
717 case DHCPV6_MSG_TYPE_DECLINE:
718 out_dhcpv6_msg_type = DHCPV6_MSG_TYPE_REPLY;
719 break;
720
721 default:
722 /* Invalid or unsupported DHCPv6 message type */
723 goto exit;
724 }
725
726 /* Skip 4 bytes (message type (1 byte) + transaction ID (3 bytes). */
727 in_dhcpv6_data += 4;
728 /* We need to extract IAID from the IA-NA option of the client's DHCPv6
729 * solicit/request/confirm packet and copy the same IAID in the Server's
730 * response. */
731 ovs_be32 iaid = 0;
732 struct dhcpv6_opt_header const *in_opt_client_id = NULL;
733 size_t udp_len = ntohs(in_udp->udp_len);
734 size_t l4_len = dp_packet_l4_size(pkt_in);
735 uint8_t *end = (uint8_t *)in_udp + MIN(udp_len, l4_len);
736 while (in_dhcpv6_data < end) {
737 struct dhcpv6_opt_header const *in_opt =
738 (struct dhcpv6_opt_header *)in_dhcpv6_data;
739 switch(ntohs(in_opt->code)) {
740 case DHCPV6_OPT_IA_NA_CODE:
741 {
742 struct dhcpv6_opt_ia_na *opt_ia_na = (
743 struct dhcpv6_opt_ia_na *)in_opt;
744 iaid = opt_ia_na->iaid;
745 break;
746 }
747
748 case DHCPV6_OPT_CLIENT_ID_CODE:
749 in_opt_client_id = in_opt;
750 break;
751
752 default:
753 break;
754 }
755 in_dhcpv6_data += sizeof *in_opt + ntohs(in_opt->len);
756 }
757
758 if (!in_opt_client_id) {
759 VLOG_WARN_RL(&rl, "DHCPv6 option - Client id not present in the "
760 " DHCPv6 packet");
761 goto exit;
762 }
763
764 if (!iaid) {
765 VLOG_WARN_RL(&rl, "DHCPv6 option - IA NA not present in the "
766 " DHCPv6 packet");
767 goto exit;
768 }
769
770 uint64_t out_ofpacts_dhcpv6_opts_stub[256 / 8];
771 struct ofpbuf out_dhcpv6_opts =
772 OFPBUF_STUB_INITIALIZER(out_ofpacts_dhcpv6_opts_stub);
773
774 if (!compose_out_dhcpv6_opts(userdata, &out_dhcpv6_opts, iaid)) {
775 VLOG_WARN_RL(&rl, "Invalid userdata");
776 goto exit;
777 }
778
779 uint16_t new_l4_size
780 = (UDP_HEADER_LEN + 4 + sizeof *in_opt_client_id +
781 ntohs(in_opt_client_id->len) + out_dhcpv6_opts.size);
782 size_t new_packet_size = pkt_in->l4_ofs + new_l4_size;
783
784 struct dp_packet pkt_out;
785 dp_packet_init(&pkt_out, new_packet_size);
786 dp_packet_clear(&pkt_out);
787 dp_packet_prealloc_tailroom(&pkt_out, new_packet_size);
788 pkt_out_ptr = &pkt_out;
789
790 /* Copy L2 and L3 headers from pkt_in. */
791 dp_packet_put(&pkt_out, dp_packet_pull(pkt_in, pkt_in->l4_ofs),
792 pkt_in->l4_ofs);
793
794 pkt_out.l2_5_ofs = pkt_in->l2_5_ofs;
795 pkt_out.l2_pad_size = pkt_in->l2_pad_size;
796 pkt_out.l3_ofs = pkt_in->l3_ofs;
797 pkt_out.l4_ofs = pkt_in->l4_ofs;
798
799 /* Pull the DHCPv6 message type and transaction id from the pkt_in.
800 * Need to preserve the transaction id in the DHCPv6 reply packet. */
801 struct udp_header *out_udp = dp_packet_put(
802 &pkt_out, dp_packet_pull(pkt_in, UDP_HEADER_LEN), UDP_HEADER_LEN);
803 uint8_t *out_dhcpv6 = dp_packet_put(&pkt_out, dp_packet_pull(pkt_in, 4), 4);
804
805 /* Set the proper DHCPv6 message type. */
806 *out_dhcpv6 = out_dhcpv6_msg_type;
807
808 /* Copy the Client Identifier. */
809 dp_packet_put(&pkt_out, in_opt_client_id,
810 sizeof *in_opt_client_id + ntohs(in_opt_client_id->len));
811
812 /* Copy the DHCPv6 Options. */
813 dp_packet_put(&pkt_out, out_dhcpv6_opts.data, out_dhcpv6_opts.size);
814 out_udp->udp_len = htons(new_l4_size);
815 out_udp->udp_csum = 0;
816
817 struct ovs_16aligned_ip6_hdr *out_ip6 = dp_packet_l3(&pkt_out);
818 out_ip6->ip6_ctlun.ip6_un1.ip6_un1_plen = out_udp->udp_len;
819
820 uint32_t csum;
821 csum = packet_csum_pseudoheader6(dp_packet_l3(&pkt_out));
822 csum = csum_continue(csum, out_udp, dp_packet_size(&pkt_out) -
823 ((const unsigned char *)out_udp -
824 (const unsigned char *)dp_packet_eth(&pkt_out)));
825 out_udp->udp_csum = csum_finish(csum);
826 if (!out_udp->udp_csum) {
827 out_udp->udp_csum = htons(0xffff);
828 }
829
830 pin->packet = dp_packet_data(&pkt_out);
831 pin->packet_len = dp_packet_size(&pkt_out);
832 ofpbuf_uninit(&out_dhcpv6_opts);
833 success = 1;
834 exit:
835 if (!ofperr) {
836 union mf_subvalue sv;
837 sv.u8_val = success;
838 mf_write_subfield(&dst, &sv, &pin->flow_metadata);
839 }
840 queue_msg(ofputil_encode_resume(pin, continuation, proto));
841 dp_packet_uninit(pkt_out_ptr);
842 }
843
844 static void
845 put_be16(struct ofpbuf *buf, ovs_be16 x)
846 {
847 ofpbuf_put(buf, &x, sizeof x);
848 }
849
850 static void
851 put_be32(struct ofpbuf *buf, ovs_be32 x)
852 {
853 ofpbuf_put(buf, &x, sizeof x);
854 }
855
856 static void
857 pinctrl_handle_dns_lookup(
858 struct dp_packet *pkt_in, struct ofputil_packet_in *pin,
859 struct ofpbuf *userdata, struct ofpbuf *continuation,
860 struct controller_ctx *ctx)
861 {
862 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
863 enum ofp_version version = rconn_get_version(swconn);
864 enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
865 struct dp_packet *pkt_out_ptr = NULL;
866 uint32_t success = 0;
867
868 /* Parse result field. */
869 const struct mf_field *f;
870 enum ofperr ofperr = nx_pull_header(userdata, NULL, &f, NULL);
871 if (ofperr) {
872 VLOG_WARN_RL(&rl, "bad result OXM (%s)", ofperr_to_string(ofperr));
873 goto exit;
874 }
875
876 /* Parse result offset. */
877 ovs_be32 *ofsp = ofpbuf_try_pull(userdata, sizeof *ofsp);
878 if (!ofsp) {
879 VLOG_WARN_RL(&rl, "offset not present in the userdata");
880 goto exit;
881 }
882
883 /* Check that the result is valid and writable. */
884 struct mf_subfield dst = { .field = f, .ofs = ntohl(*ofsp), .n_bits = 1 };
885 ofperr = mf_check_dst(&dst, NULL);
886 if (ofperr) {
887 VLOG_WARN_RL(&rl, "bad result bit (%s)", ofperr_to_string(ofperr));
888 goto exit;
889 }
890
891 /* Extract the DNS header */
892 struct dns_header const *in_dns_header = dp_packet_get_udp_payload(pkt_in);
893 if (!in_dns_header) {
894 VLOG_WARN_RL(&rl, "truncated dns packet");
895 goto exit;
896 }
897
898 /* Check if it is DNS request or not */
899 if (in_dns_header->lo_flag & 0x80) {
900 /* It's a DNS response packet which we are not interested in */
901 goto exit;
902 }
903
904 /* Check if at least one query request is present */
905 if (!in_dns_header->qdcount) {
906 goto exit;
907 }
908
909 struct udp_header *in_udp = dp_packet_l4(pkt_in);
910 size_t udp_len = ntohs(in_udp->udp_len);
911 size_t l4_len = dp_packet_l4_size(pkt_in);
912 uint8_t *end = (uint8_t *)in_udp + MIN(udp_len, l4_len);
913 uint8_t *in_dns_data = (uint8_t *)(in_dns_header + 1);
914 uint8_t *in_queryname = in_dns_data;
915 uint8_t idx = 0;
916 struct ds query_name;
917 ds_init(&query_name);
918 /* Extract the query_name. If the query name is - 'www.ovn.org' it would be
919 * encoded as (in hex) - 03 77 77 77 03 6f 76 63 03 6f 72 67 00.
920 */
921 while ((in_dns_data + idx) < end && in_dns_data[idx]) {
922 uint8_t label_len = in_dns_data[idx++];
923 if (in_dns_data + idx + label_len > end) {
924 ds_destroy(&query_name);
925 goto exit;
926 }
927 ds_put_buffer(&query_name, (const char *) in_dns_data + idx, label_len);
928 idx += label_len;
929 ds_put_char(&query_name, '.');
930 }
931
932 idx++;
933 ds_chomp(&query_name, '.');
934 in_dns_data += idx;
935
936 /* Query should have TYPE and CLASS fields */
937 if (in_dns_data + (2 * sizeof(ovs_be16)) > end) {
938 ds_destroy(&query_name);
939 goto exit;
940 }
941
942 uint16_t query_type = ntohs(*ALIGNED_CAST(const ovs_be16 *, in_dns_data));
943 /* Supported query types - A, AAAA and ANY */
944 if (!(query_type == DNS_QUERY_TYPE_A || query_type == DNS_QUERY_TYPE_AAAA
945 || query_type == DNS_QUERY_TYPE_ANY)) {
946 ds_destroy(&query_name);
947 goto exit;
948 }
949
950 uint64_t dp_key = ntohll(pin->flow_metadata.flow.metadata);
951 const struct sbrec_dns *sbrec_dns;
952 const char *answer_ips = NULL;
953 SBREC_DNS_FOR_EACH(sbrec_dns, ctx->ovnsb_idl) {
954 for (size_t i = 0; i < sbrec_dns->n_datapaths; i++) {
955 if (sbrec_dns->datapaths[i]->tunnel_key == dp_key) {
956 answer_ips = smap_get(&sbrec_dns->records,
957 ds_cstr(&query_name));
958 if (answer_ips) {
959 break;
960 }
961 }
962 }
963
964 if (answer_ips) {
965 break;
966 }
967 }
968
969 ds_destroy(&query_name);
970 if (!answer_ips) {
971 goto exit;
972 }
973
974 struct lport_addresses ip_addrs;
975 if (!extract_ip_addresses(answer_ips, &ip_addrs)) {
976 goto exit;
977 }
978
979 uint16_t ancount = 0;
980 uint64_t dns_ans_stub[128 / 8];
981 struct ofpbuf dns_answer = OFPBUF_STUB_INITIALIZER(dns_ans_stub);
982
983 if (query_type == DNS_QUERY_TYPE_A || query_type == DNS_QUERY_TYPE_ANY) {
984 for (size_t i = 0; i < ip_addrs.n_ipv4_addrs; i++) {
985 /* Copy the answer section */
986 /* Format of the answer section is
987 * - NAME -> The domain name
988 * - TYPE -> 2 octets containing one of the RR type codes
989 * - CLASS -> 2 octets which specify the class of the data
990 * in the RDATA field.
991 * - TTL -> 32 bit unsigned int specifying the time
992 * interval (in secs) that the resource record
993 * may be cached before it should be discarded.
994 * - RDLENGTH -> 16 bit integer specifying the length of the
995 * RDATA field.
996 * - RDATA -> a variable length string of octets that
997 * describes the resource. In our case it will
998 * be IP address of the domain name.
999 */
1000 ofpbuf_put(&dns_answer, in_queryname, idx);
1001 put_be16(&dns_answer, htons(DNS_QUERY_TYPE_A));
1002 put_be16(&dns_answer, htons(DNS_CLASS_IN));
1003 put_be32(&dns_answer, htonl(DNS_DEFAULT_RR_TTL));
1004 put_be16(&dns_answer, htons(sizeof(ovs_be32)));
1005 put_be32(&dns_answer, ip_addrs.ipv4_addrs[i].addr);
1006 ancount++;
1007 }
1008 }
1009
1010 if (query_type == DNS_QUERY_TYPE_AAAA ||
1011 query_type == DNS_QUERY_TYPE_ANY) {
1012 for (size_t i = 0; i < ip_addrs.n_ipv6_addrs; i++) {
1013 ofpbuf_put(&dns_answer, in_queryname, idx);
1014 put_be16(&dns_answer, htons(DNS_QUERY_TYPE_AAAA));
1015 put_be16(&dns_answer, htons(DNS_CLASS_IN));
1016 put_be32(&dns_answer, htonl(DNS_DEFAULT_RR_TTL));
1017 const struct in6_addr *ip6 = &ip_addrs.ipv6_addrs[i].addr;
1018 put_be16(&dns_answer, htons(sizeof *ip6));
1019 ofpbuf_put(&dns_answer, ip6, sizeof *ip6);
1020 ancount++;
1021 }
1022 }
1023
1024 destroy_lport_addresses(&ip_addrs);
1025
1026 if (!ancount) {
1027 ofpbuf_uninit(&dns_answer);
1028 goto exit;
1029 }
1030
1031 uint16_t new_l4_size = ntohs(in_udp->udp_len) + dns_answer.size;
1032 size_t new_packet_size = pkt_in->l4_ofs + new_l4_size;
1033 struct dp_packet pkt_out;
1034 dp_packet_init(&pkt_out, new_packet_size);
1035 dp_packet_clear(&pkt_out);
1036 dp_packet_prealloc_tailroom(&pkt_out, new_packet_size);
1037 pkt_out_ptr = &pkt_out;
1038
1039 /* Copy the L2 and L3 headers from the pkt_in as they would remain same.*/
1040 dp_packet_put(
1041 &pkt_out, dp_packet_pull(pkt_in, pkt_in->l4_ofs), pkt_in->l4_ofs);
1042
1043 pkt_out.l2_5_ofs = pkt_in->l2_5_ofs;
1044 pkt_out.l2_pad_size = pkt_in->l2_pad_size;
1045 pkt_out.l3_ofs = pkt_in->l3_ofs;
1046 pkt_out.l4_ofs = pkt_in->l4_ofs;
1047
1048 struct udp_header *out_udp = dp_packet_put(
1049 &pkt_out, dp_packet_pull(pkt_in, UDP_HEADER_LEN), UDP_HEADER_LEN);
1050
1051 /* Copy the DNS header. */
1052 struct dns_header *out_dns_header = dp_packet_put(
1053 &pkt_out, dp_packet_pull(pkt_in, sizeof *out_dns_header),
1054 sizeof *out_dns_header);
1055
1056 /* Set the response bit to 1 in the flags. */
1057 out_dns_header->lo_flag |= 0x80;
1058
1059 /* Set the answer RR. */
1060 out_dns_header->ancount = htons(ancount);
1061
1062 /* Copy the Query section. */
1063 dp_packet_put(&pkt_out, dp_packet_data(pkt_in), dp_packet_size(pkt_in));
1064
1065 /* Copy the answer sections. */
1066 dp_packet_put(&pkt_out, dns_answer.data, dns_answer.size);
1067 ofpbuf_uninit(&dns_answer);
1068
1069 out_udp->udp_len = htons(new_l4_size);
1070 out_udp->udp_csum = 0;
1071
1072 struct eth_header *eth = dp_packet_data(&pkt_out);
1073 if (eth->eth_type == htons(ETH_TYPE_IP)) {
1074 struct ip_header *out_ip = dp_packet_l3(&pkt_out);
1075 out_ip->ip_tot_len = htons(pkt_out.l4_ofs - pkt_out.l3_ofs
1076 + new_l4_size);
1077 /* Checksum needs to be initialized to zero. */
1078 out_ip->ip_csum = 0;
1079 out_ip->ip_csum = csum(out_ip, sizeof *out_ip);
1080 } else {
1081 struct ovs_16aligned_ip6_hdr *nh = dp_packet_l3(&pkt_out);
1082 nh->ip6_plen = htons(new_l4_size);
1083
1084 /* IPv6 needs UDP checksum calculated */
1085 uint32_t csum;
1086 csum = packet_csum_pseudoheader6(nh);
1087 csum = csum_continue(csum, out_udp, dp_packet_size(&pkt_out) -
1088 ((const unsigned char *)out_udp -
1089 (const unsigned char *)eth));
1090 out_udp->udp_csum = csum_finish(csum);
1091 if (!out_udp->udp_csum) {
1092 out_udp->udp_csum = htons(0xffff);
1093 }
1094 }
1095
1096 pin->packet = dp_packet_data(&pkt_out);
1097 pin->packet_len = dp_packet_size(&pkt_out);
1098
1099 success = 1;
1100 exit:
1101 if (!ofperr) {
1102 union mf_subvalue sv;
1103 sv.u8_val = success;
1104 mf_write_subfield(&dst, &sv, &pin->flow_metadata);
1105 }
1106 queue_msg(ofputil_encode_resume(pin, continuation, proto));
1107 dp_packet_uninit(pkt_out_ptr);
1108 }
1109
1110 static void
1111 process_packet_in(const struct ofp_header *msg, struct controller_ctx *ctx)
1112 {
1113 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1114
1115 struct ofputil_packet_in pin;
1116 struct ofpbuf continuation;
1117 enum ofperr error = ofputil_decode_packet_in(msg, true, NULL, NULL, &pin,
1118 NULL, NULL, &continuation);
1119
1120 if (error) {
1121 VLOG_WARN_RL(&rl, "error decoding packet-in: %s",
1122 ofperr_to_string(error));
1123 return;
1124 }
1125 if (pin.reason != OFPR_ACTION) {
1126 return;
1127 }
1128
1129 struct ofpbuf userdata = ofpbuf_const_initializer(pin.userdata,
1130 pin.userdata_len);
1131 const struct action_header *ah = ofpbuf_pull(&userdata, sizeof *ah);
1132 if (!ah) {
1133 VLOG_WARN_RL(&rl, "packet-in userdata lacks action header");
1134 return;
1135 }
1136
1137 struct dp_packet packet;
1138 dp_packet_use_const(&packet, pin.packet, pin.packet_len);
1139 struct flow headers;
1140 flow_extract(&packet, &headers);
1141
1142 switch (ntohl(ah->opcode)) {
1143 case ACTION_OPCODE_ARP:
1144 pinctrl_handle_arp(&headers, &pin.flow_metadata, &userdata);
1145 break;
1146
1147 case ACTION_OPCODE_PUT_ARP:
1148 pinctrl_handle_put_mac_binding(&pin.flow_metadata.flow, &headers,
1149 true);
1150 break;
1151
1152 case ACTION_OPCODE_PUT_DHCP_OPTS:
1153 pinctrl_handle_put_dhcp_opts(&packet, &pin, &userdata, &continuation);
1154 break;
1155
1156 case ACTION_OPCODE_ND_NA:
1157 pinctrl_handle_nd_na(&headers, &pin.flow_metadata, &userdata);
1158 break;
1159
1160 case ACTION_OPCODE_PUT_ND:
1161 pinctrl_handle_put_mac_binding(&pin.flow_metadata.flow, &headers,
1162 false);
1163 break;
1164
1165 case ACTION_OPCODE_PUT_DHCPV6_OPTS:
1166 pinctrl_handle_put_dhcpv6_opts(&packet, &pin, &userdata,
1167 &continuation);
1168 break;
1169
1170 case ACTION_OPCODE_DNS_LOOKUP:
1171 pinctrl_handle_dns_lookup(&packet, &pin, &userdata, &continuation, ctx);
1172 break;
1173
1174 case ACTION_OPCODE_LOG:
1175 handle_acl_log(&headers, &userdata);
1176 break;
1177
1178 case ACTION_OPCODE_PUT_ND_RA_OPTS:
1179 pinctrl_handle_put_nd_ra_opts(&headers, &packet, &pin, &userdata,
1180 &continuation);
1181 break;
1182
1183 case ACTION_OPCODE_ND_NS:
1184 pinctrl_handle_nd_ns(&headers, &pin.flow_metadata, &userdata);
1185 break;
1186
1187 case ACTION_OPCODE_ICMP:
1188 pinctrl_handle_icmp(&headers, &packet, &pin.flow_metadata,
1189 &userdata);
1190 break;
1191
1192 case ACTION_OPCODE_TCP_RESET:
1193 pinctrl_handle_tcp_reset(&headers, &packet, &pin.flow_metadata,
1194 &userdata);
1195 break;
1196
1197 default:
1198 VLOG_WARN_RL(&rl, "unrecognized packet-in opcode %"PRIu32,
1199 ntohl(ah->opcode));
1200 break;
1201 }
1202 }
1203
1204 static void
1205 pinctrl_recv(const struct ofp_header *oh, enum ofptype type,
1206 struct controller_ctx *ctx)
1207 {
1208 if (type == OFPTYPE_ECHO_REQUEST) {
1209 queue_msg(ofputil_encode_echo_reply(oh));
1210 } else if (type == OFPTYPE_GET_CONFIG_REPLY) {
1211 /* Enable asynchronous messages */
1212 struct ofputil_switch_config config;
1213
1214 ofputil_decode_get_config_reply(oh, &config);
1215 config.miss_send_len = UINT16_MAX;
1216 set_switch_config(swconn, &config);
1217 } else if (type == OFPTYPE_PACKET_IN) {
1218 process_packet_in(oh, ctx);
1219 } else {
1220 if (VLOG_IS_DBG_ENABLED()) {
1221 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30, 300);
1222
1223 char *s = ofp_to_string(oh, ntohs(oh->length), NULL, NULL, 2);
1224
1225 VLOG_DBG_RL(&rl, "OpenFlow packet ignored: %s", s);
1226 free(s);
1227 }
1228 }
1229 }
1230
1231 void
1232 pinctrl_run(struct controller_ctx *ctx,
1233 const struct ovsrec_bridge *br_int,
1234 const struct sbrec_chassis *chassis,
1235 const struct chassis_index *chassis_index,
1236 struct hmap *local_datapaths,
1237 struct sset *active_tunnels)
1238 {
1239 char *target = xasprintf("unix:%s/%s.mgmt", ovs_rundir(), br_int->name);
1240 if (strcmp(target, rconn_get_target(swconn))) {
1241 VLOG_INFO("%s: connecting to switch", target);
1242 rconn_connect(swconn, target, target);
1243 }
1244 free(target);
1245
1246 rconn_run(swconn);
1247
1248 if (!rconn_is_connected(swconn)) {
1249 return;
1250 }
1251
1252 if (conn_seq_no != rconn_get_connection_seqno(swconn)) {
1253 pinctrl_setup();
1254 conn_seq_no = rconn_get_connection_seqno(swconn);
1255 flush_put_mac_bindings();
1256 }
1257
1258 /* Process a limited number of messages per call. */
1259 for (int i = 0; i < 50; i++) {
1260 struct ofpbuf *msg = rconn_recv(swconn);
1261 if (!msg) {
1262 break;
1263 }
1264
1265 const struct ofp_header *oh = msg->data;
1266 enum ofptype type;
1267
1268 ofptype_decode(&type, oh);
1269 pinctrl_recv(oh, type, ctx);
1270 ofpbuf_delete(msg);
1271 }
1272
1273 run_put_mac_bindings(ctx);
1274 send_garp_run(ctx, br_int, chassis, chassis_index, local_datapaths,
1275 active_tunnels);
1276 send_ipv6_ras(ctx, local_datapaths);
1277 }
1278
1279 /* Table of ipv6_ra_state structures, keyed on logical port name */
1280 static struct shash ipv6_ras;
1281
1282 /* Next IPV6 RA in seconds. */
1283 static long long int send_ipv6_ra_time;
1284
1285 struct ipv6_ra_config {
1286 time_t min_interval;
1287 time_t max_interval;
1288 struct eth_addr eth_src;
1289 struct eth_addr eth_dst;
1290 struct in6_addr ipv6_src;
1291 struct in6_addr ipv6_dst;
1292 int32_t mtu;
1293 uint8_t mo_flags; /* Managed/Other flags for RAs */
1294 uint8_t la_flags; /* On-link/autonomous flags for address prefixes */
1295 struct lport_addresses prefixes;
1296 };
1297
1298 struct ipv6_ra_state {
1299 long long int next_announce;
1300 struct ipv6_ra_config *config;
1301 int64_t port_key;
1302 int64_t metadata;
1303 bool delete_me;
1304 };
1305
1306 static void
1307 init_ipv6_ras(void)
1308 {
1309 shash_init(&ipv6_ras);
1310 send_ipv6_ra_time = LLONG_MAX;
1311 }
1312
1313 static void
1314 ipv6_ra_config_delete(struct ipv6_ra_config *config)
1315 {
1316 if (config) {
1317 destroy_lport_addresses(&config->prefixes);
1318 free(config);
1319 }
1320 }
1321
1322 static void
1323 ipv6_ra_delete(struct ipv6_ra_state *ra)
1324 {
1325 if (ra) {
1326 ipv6_ra_config_delete(ra->config);
1327 free(ra);
1328 }
1329 }
1330
1331 static void
1332 destroy_ipv6_ras(void)
1333 {
1334 struct shash_node *iter, *next;
1335 SHASH_FOR_EACH_SAFE (iter, next, &ipv6_ras) {
1336 struct ipv6_ra_state *ra = iter->data;
1337 ipv6_ra_delete(ra);
1338 shash_delete(&ipv6_ras, iter);
1339 }
1340 shash_destroy(&ipv6_ras);
1341 }
1342
1343 static struct ipv6_ra_config *
1344 ipv6_ra_update_config(const struct sbrec_port_binding *pb)
1345 {
1346 struct ipv6_ra_config *config;
1347
1348 config = xzalloc(sizeof *config);
1349
1350 config->max_interval = smap_get_int(&pb->options, "ipv6_ra_max_interval",
1351 ND_RA_MAX_INTERVAL_DEFAULT);
1352 config->min_interval = smap_get_int(&pb->options, "ipv6_ra_min_interval",
1353 nd_ra_min_interval_default(config->max_interval));
1354 config->mtu = smap_get_int(&pb->options, "ipv6_ra_mtu", ND_MTU_DEFAULT);
1355 config->la_flags = ND_PREFIX_ON_LINK;
1356
1357 const char *address_mode = smap_get(&pb->options, "ipv6_ra_address_mode");
1358 if (!address_mode) {
1359 VLOG_WARN("No address mode specified");
1360 goto fail;
1361 }
1362 if (!strcmp(address_mode, "dhcpv6_stateless")) {
1363 config->mo_flags = IPV6_ND_RA_FLAG_OTHER_ADDR_CONFIG;
1364 } else if (!strcmp(address_mode, "dhcpv6_stateful")) {
1365 config->mo_flags = IPV6_ND_RA_FLAG_MANAGED_ADDR_CONFIG;
1366 } else if (!strcmp(address_mode, "slaac")) {
1367 config->la_flags |= ND_PREFIX_AUTONOMOUS_ADDRESS;
1368 } else {
1369 VLOG_WARN("Invalid address mode %s", address_mode);
1370 goto fail;
1371 }
1372
1373 const char *prefixes = smap_get(&pb->options, "ipv6_ra_prefixes");
1374 if (prefixes && !extract_ip_addresses(prefixes, &config->prefixes)) {
1375 VLOG_WARN("Invalid IPv6 prefixes: %s", prefixes);
1376 goto fail;
1377 }
1378
1379 /* All nodes multicast addresses */
1380 config->eth_dst = (struct eth_addr) ETH_ADDR_C(33,33,00,00,00,01);
1381 ipv6_parse("ff02::1", &config->ipv6_dst);
1382
1383 const char *eth_addr = smap_get(&pb->options, "ipv6_ra_src_eth");
1384 if (!eth_addr || !eth_addr_from_string(eth_addr, &config->eth_src)) {
1385 VLOG_WARN("Invalid ethernet source %s", eth_addr);
1386 goto fail;
1387 }
1388 const char *ip_addr = smap_get(&pb->options, "ipv6_ra_src_addr");
1389 if (!ip_addr || !ipv6_parse(ip_addr, &config->ipv6_src)) {
1390 VLOG_WARN("Invalid IP source %s", ip_addr);
1391 goto fail;
1392 }
1393
1394 return config;
1395
1396 fail:
1397 ipv6_ra_config_delete(config);
1398 return NULL;
1399 }
1400
1401 static long long int
1402 ipv6_ra_calc_next_announce(time_t min_interval, time_t max_interval)
1403 {
1404 long long int min_interval_ms = min_interval * 1000LL;
1405 long long int max_interval_ms = max_interval * 1000LL;
1406
1407 return time_msec() + min_interval_ms +
1408 random_range(max_interval_ms - min_interval_ms);
1409 }
1410
1411 static void
1412 put_load(uint64_t value, enum mf_field_id dst, int ofs, int n_bits,
1413 struct ofpbuf *ofpacts)
1414 {
1415 struct ofpact_set_field *sf = ofpact_put_set_field(ofpacts,
1416 mf_from_id(dst), NULL,
1417 NULL);
1418 ovs_be64 n_value = htonll(value);
1419 bitwise_copy(&n_value, 8, 0, sf->value, sf->field->n_bytes, ofs, n_bits);
1420 bitwise_one(ofpact_set_field_mask(sf), sf->field->n_bytes, ofs, n_bits);
1421 }
1422
1423 static long long int
1424 ipv6_ra_send(struct ipv6_ra_state *ra)
1425 {
1426 if (time_msec() < ra->next_announce) {
1427 return ra->next_announce;
1428 }
1429
1430 uint64_t packet_stub[128 / 8];
1431 struct dp_packet packet;
1432 dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
1433 compose_nd_ra(&packet, ra->config->eth_src, ra->config->eth_dst,
1434 &ra->config->ipv6_src, &ra->config->ipv6_dst,
1435 255, ra->config->mo_flags, 0, 0, 0, ra->config->mtu);
1436
1437 for (int i = 0; i < ra->config->prefixes.n_ipv6_addrs; i++) {
1438 ovs_be128 addr;
1439 memcpy(&addr, &ra->config->prefixes.ipv6_addrs[i].addr, sizeof addr);
1440 packet_put_ra_prefix_opt(&packet,
1441 ra->config->prefixes.ipv6_addrs[i].plen,
1442 ra->config->la_flags, htonl(IPV6_ND_RA_OPT_PREFIX_VALID_LIFETIME),
1443 htonl(IPV6_ND_RA_OPT_PREFIX_PREFERRED_LIFETIME), addr);
1444 }
1445
1446 uint64_t ofpacts_stub[4096 / 8];
1447 struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
1448
1449 /* Set MFF_LOG_DATAPATH and MFF_LOG_INPORT. */
1450 uint32_t dp_key = ra->metadata;
1451 uint32_t port_key = ra->port_key;
1452 put_load(dp_key, MFF_LOG_DATAPATH, 0, 64, &ofpacts);
1453 put_load(port_key, MFF_LOG_INPORT, 0, 32, &ofpacts);
1454 put_load(1, MFF_LOG_FLAGS, MLF_LOCAL_ONLY_BIT, 1, &ofpacts);
1455 struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&ofpacts);
1456 resubmit->in_port = OFPP_CONTROLLER;
1457 resubmit->table_id = OFTABLE_LOG_INGRESS_PIPELINE;
1458
1459 struct ofputil_packet_out po = {
1460 .packet = dp_packet_data(&packet),
1461 .packet_len = dp_packet_size(&packet),
1462 .buffer_id = UINT32_MAX,
1463 .ofpacts = ofpacts.data,
1464 .ofpacts_len = ofpacts.size,
1465 };
1466
1467 match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
1468 enum ofp_version version = rconn_get_version(swconn);
1469 enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
1470 queue_msg(ofputil_encode_packet_out(&po, proto));
1471 dp_packet_uninit(&packet);
1472 ofpbuf_uninit(&ofpacts);
1473
1474 ra->next_announce = ipv6_ra_calc_next_announce(ra->config->min_interval,
1475 ra->config->max_interval);
1476
1477 return ra->next_announce;
1478 }
1479
1480 static void
1481 ipv6_ra_wait(void)
1482 {
1483 poll_timer_wait_until(send_ipv6_ra_time);
1484 }
1485
1486 static void
1487 send_ipv6_ras(const struct controller_ctx *ctx, struct hmap *local_datapaths)
1488 {
1489 struct shash_node *iter, *iter_next;
1490
1491 send_ipv6_ra_time = LLONG_MAX;
1492
1493 SHASH_FOR_EACH (iter, &ipv6_ras) {
1494 struct ipv6_ra_state *ra = iter->data;
1495 ra->delete_me = true;
1496 }
1497
1498 const struct local_datapath *ld;
1499 HMAP_FOR_EACH (ld, hmap_node, local_datapaths) {
1500 struct sbrec_port_binding *lpval;
1501 const struct sbrec_port_binding *pb;
1502 struct ovsdb_idl_index_cursor cursor;
1503
1504 lpval = sbrec_port_binding_index_init_row(ctx->ovnsb_idl,
1505 &sbrec_table_port_binding);
1506 sbrec_port_binding_index_set_datapath(lpval, ld->datapath);
1507 ovsdb_idl_initialize_cursor(ctx->ovnsb_idl, &sbrec_table_port_binding,
1508 "lport-by-datapath", &cursor);
1509 SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, &cursor, lpval) {
1510 if (!smap_get_bool(&pb->options, "ipv6_ra_send_periodic", false)) {
1511 continue;
1512 }
1513
1514 const char *peer_s = smap_get(&pb->options, "peer");
1515 if (!peer_s) {
1516 continue;
1517 }
1518
1519 const struct sbrec_port_binding *peer
1520 = lport_lookup_by_name(ctx->ovnsb_idl, peer_s);
1521 if (!peer) {
1522 continue;
1523 }
1524
1525 struct ipv6_ra_config *config = ipv6_ra_update_config(pb);
1526 if (!config) {
1527 continue;
1528 }
1529
1530 struct ipv6_ra_state *ra
1531 = shash_find_data(&ipv6_ras, pb->logical_port);
1532 if (!ra) {
1533 ra = xzalloc(sizeof *ra);
1534 ra->config = config;
1535 ra->next_announce = ipv6_ra_calc_next_announce(
1536 ra->config->min_interval,
1537 ra->config->max_interval);
1538 shash_add(&ipv6_ras, pb->logical_port, ra);
1539 } else {
1540 ipv6_ra_config_delete(ra->config);
1541 ra->config = config;
1542 }
1543
1544 /* Peer is the logical switch port that the logical
1545 * router port is connected to. The RA is injected
1546 * into that logical switch port.
1547 */
1548 ra->port_key = peer->tunnel_key;
1549 ra->metadata = peer->datapath->tunnel_key;
1550 ra->delete_me = false;
1551
1552 long long int next_ra = ipv6_ra_send(ra);
1553 if (send_ipv6_ra_time > next_ra) {
1554 send_ipv6_ra_time = next_ra;
1555 }
1556 }
1557 sbrec_port_binding_index_destroy_row(lpval);
1558 }
1559
1560 /* Remove those that are no longer in the SB database */
1561 SHASH_FOR_EACH_SAFE (iter, iter_next, &ipv6_ras) {
1562 struct ipv6_ra_state *ra = iter->data;
1563 if (ra->delete_me) {
1564 shash_delete(&ipv6_ras, iter);
1565 ipv6_ra_delete(ra);
1566 }
1567 }
1568 }
1569
1570 void
1571 pinctrl_wait(struct controller_ctx *ctx)
1572 {
1573 wait_put_mac_bindings(ctx);
1574 rconn_run_wait(swconn);
1575 rconn_recv_wait(swconn);
1576 send_garp_wait();
1577 ipv6_ra_wait();
1578 }
1579
1580 void
1581 pinctrl_destroy(void)
1582 {
1583 rconn_destroy(swconn);
1584 destroy_put_mac_bindings();
1585 destroy_send_garps();
1586 destroy_ipv6_ras();
1587 }
1588 \f
1589 /* Implementation of the "put_arp" and "put_nd" OVN actions. These
1590 * actions send a packet to ovn-controller, using the flow as an API
1591 * (see actions.h for details). This code implements the actions by
1592 * updating the MAC_Binding table in the southbound database.
1593 *
1594 * This code could be a lot simpler if the database could always be updated,
1595 * but in fact we can only update it when ctx->ovnsb_idl_txn is nonnull. Thus,
1596 * we buffer up a few put_mac_bindings (but we don't keep them longer
1597 * than 1 second) and apply them whenever a database transaction is
1598 * available. */
1599
1600 /* Buffered "put_mac_binding" operation. */
1601 struct put_mac_binding {
1602 struct hmap_node hmap_node; /* In 'put_mac_bindings'. */
1603
1604 long long int timestamp; /* In milliseconds. */
1605
1606 /* Key. */
1607 uint32_t dp_key;
1608 uint32_t port_key;
1609 char ip_s[INET6_ADDRSTRLEN + 1];
1610
1611 /* Value. */
1612 struct eth_addr mac;
1613 };
1614
1615 /* Contains "struct put_mac_binding"s. */
1616 static struct hmap put_mac_bindings;
1617
1618 static void
1619 init_put_mac_bindings(void)
1620 {
1621 hmap_init(&put_mac_bindings);
1622 }
1623
1624 static void
1625 destroy_put_mac_bindings(void)
1626 {
1627 flush_put_mac_bindings();
1628 hmap_destroy(&put_mac_bindings);
1629 }
1630
1631 static struct put_mac_binding *
1632 pinctrl_find_put_mac_binding(uint32_t dp_key, uint32_t port_key,
1633 const char *ip_s, uint32_t hash)
1634 {
1635 struct put_mac_binding *pa;
1636 HMAP_FOR_EACH_WITH_HASH (pa, hmap_node, hash, &put_mac_bindings) {
1637 if (pa->dp_key == dp_key
1638 && pa->port_key == port_key
1639 && !strcmp(pa->ip_s, ip_s)) {
1640 return pa;
1641 }
1642 }
1643 return NULL;
1644 }
1645
1646 static void
1647 pinctrl_handle_put_mac_binding(const struct flow *md,
1648 const struct flow *headers, bool is_arp)
1649 {
1650 uint32_t dp_key = ntohll(md->metadata);
1651 uint32_t port_key = md->regs[MFF_LOG_INPORT - MFF_REG0];
1652 char ip_s[INET6_ADDRSTRLEN];
1653
1654 if (is_arp) {
1655 ovs_be32 ip = htonl(md->regs[0]);
1656 inet_ntop(AF_INET, &ip, ip_s, sizeof(ip_s));
1657 } else {
1658 ovs_be128 ip6 = hton128(flow_get_xxreg(md, 0));
1659 inet_ntop(AF_INET6, &ip6, ip_s, sizeof(ip_s));
1660 }
1661 uint32_t hash = hash_string(ip_s, hash_2words(dp_key, port_key));
1662 struct put_mac_binding *pmb
1663 = pinctrl_find_put_mac_binding(dp_key, port_key, ip_s, hash);
1664 if (!pmb) {
1665 if (hmap_count(&put_mac_bindings) >= 1000) {
1666 COVERAGE_INC(pinctrl_drop_put_mac_binding);
1667 return;
1668 }
1669
1670 pmb = xmalloc(sizeof *pmb);
1671 hmap_insert(&put_mac_bindings, &pmb->hmap_node, hash);
1672 pmb->dp_key = dp_key;
1673 pmb->port_key = port_key;
1674 ovs_strlcpy_arrays(pmb->ip_s, ip_s);
1675 }
1676 pmb->timestamp = time_msec();
1677 pmb->mac = headers->dl_src;
1678 }
1679
1680 static void
1681 run_put_mac_binding(struct controller_ctx *ctx,
1682 const struct put_mac_binding *pmb)
1683 {
1684 if (time_msec() > pmb->timestamp + 1000) {
1685 return;
1686 }
1687
1688 /* Convert logical datapath and logical port key into lport. */
1689 const struct sbrec_port_binding *pb
1690 = lport_lookup_by_key(ctx->ovnsb_idl, pmb->dp_key, pmb->port_key);
1691 if (!pb) {
1692 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1693
1694 VLOG_WARN_RL(&rl, "unknown logical port with datapath %"PRIu32" "
1695 "and port %"PRIu32, pmb->dp_key, pmb->port_key);
1696 return;
1697 }
1698
1699 /* Convert ethernet argument to string form for database. */
1700 char mac_string[ETH_ADDR_STRLEN + 1];
1701 snprintf(mac_string, sizeof mac_string,
1702 ETH_ADDR_FMT, ETH_ADDR_ARGS(pmb->mac));
1703
1704 /* Check for an update an existing IP-MAC binding for this logical
1705 * port.
1706 *
1707 * XXX This is not very efficient. */
1708 const struct sbrec_mac_binding *b;
1709 SBREC_MAC_BINDING_FOR_EACH (b, ctx->ovnsb_idl) {
1710 if (!strcmp(b->logical_port, pb->logical_port)
1711 && !strcmp(b->ip, pmb->ip_s)) {
1712 if (strcmp(b->mac, mac_string)) {
1713 sbrec_mac_binding_set_mac(b, mac_string);
1714 }
1715 return;
1716 }
1717 }
1718
1719 /* Add new IP-MAC binding for this logical port. */
1720 b = sbrec_mac_binding_insert(ctx->ovnsb_idl_txn);
1721 sbrec_mac_binding_set_logical_port(b, pb->logical_port);
1722 sbrec_mac_binding_set_ip(b, pmb->ip_s);
1723 sbrec_mac_binding_set_mac(b, mac_string);
1724 sbrec_mac_binding_set_datapath(b, pb->datapath);
1725 }
1726
1727 static void
1728 run_put_mac_bindings(struct controller_ctx *ctx)
1729 {
1730 if (!ctx->ovnsb_idl_txn) {
1731 return;
1732 }
1733
1734 const struct put_mac_binding *pmb;
1735 HMAP_FOR_EACH (pmb, hmap_node, &put_mac_bindings) {
1736 run_put_mac_binding(ctx, pmb);
1737 }
1738 flush_put_mac_bindings();
1739 }
1740
1741 static void
1742 wait_put_mac_bindings(struct controller_ctx *ctx)
1743 {
1744 if (ctx->ovnsb_idl_txn && !hmap_is_empty(&put_mac_bindings)) {
1745 poll_immediate_wake();
1746 }
1747 }
1748
1749 static void
1750 flush_put_mac_bindings(void)
1751 {
1752 struct put_mac_binding *pmb;
1753 HMAP_FOR_EACH_POP (pmb, hmap_node, &put_mac_bindings) {
1754 free(pmb);
1755 }
1756 }
1757 \f
1758 /*
1759 * Send gratuitous ARP for vif on localnet.
1760 *
1761 * When a new vif on localnet is added, gratuitous ARPs are sent announcing
1762 * the port's mac,ip mapping. On localnet, such announcements are needed for
1763 * switches and routers on the broadcast segment to update their port-mac
1764 * and ARP tables.
1765 */
1766 struct garp_data {
1767 struct eth_addr ea; /* Ethernet address of port. */
1768 ovs_be32 ipv4; /* Ipv4 address of port. */
1769 long long int announce_time; /* Next announcement in ms. */
1770 int backoff; /* Backoff for the next announcement. */
1771 ofp_port_t ofport; /* ofport used to output this GARP. */
1772 int tag; /* VLAN tag of this GARP packet, or -1. */
1773 };
1774
1775 /* Contains GARPs to be sent. */
1776 static struct shash send_garp_data;
1777
1778 /* Next GARP announcement in ms. */
1779 static long long int send_garp_time;
1780
1781 static void
1782 init_send_garps(void)
1783 {
1784 shash_init(&send_garp_data);
1785 send_garp_time = LLONG_MAX;
1786 }
1787
1788 static void
1789 destroy_send_garps(void)
1790 {
1791 shash_destroy_free_data(&send_garp_data);
1792 }
1793
1794 static void
1795 add_garp(const char *name, ofp_port_t ofport, int tag,
1796 const struct eth_addr ea, ovs_be32 ip)
1797 {
1798 struct garp_data *garp = xmalloc(sizeof *garp);
1799 garp->ea = ea;
1800 garp->ipv4 = ip;
1801 garp->announce_time = time_msec() + 1000;
1802 garp->backoff = 1;
1803 garp->ofport = ofport;
1804 garp->tag = tag;
1805 shash_add(&send_garp_data, name, garp);
1806 }
1807
1808 /* Add or update a vif for which GARPs need to be announced. */
1809 static void
1810 send_garp_update(const struct sbrec_port_binding *binding_rec,
1811 struct simap *localnet_ofports, struct hmap *local_datapaths,
1812 struct shash *nat_addresses)
1813 {
1814 /* Find the localnet ofport to send this GARP. */
1815 struct local_datapath *ld
1816 = get_local_datapath(local_datapaths,
1817 binding_rec->datapath->tunnel_key);
1818 if (!ld || !ld->localnet_port) {
1819 return;
1820 }
1821 ofp_port_t ofport = u16_to_ofp(simap_get(localnet_ofports,
1822 ld->localnet_port->logical_port));
1823 int tag = ld->localnet_port->n_tag ? *ld->localnet_port->tag : -1;
1824
1825 volatile struct garp_data *garp = NULL;
1826 /* Update GARP for NAT IP if it exists. Consider port bindings with type
1827 * "l3gateway" for logical switch ports attached to gateway routers, and
1828 * port bindings with type "patch" for logical switch ports attached to
1829 * distributed gateway ports. */
1830 if (!strcmp(binding_rec->type, "l3gateway")
1831 || !strcmp(binding_rec->type, "patch")) {
1832 struct lport_addresses *laddrs = NULL;
1833 while ((laddrs = shash_find_and_delete(nat_addresses,
1834 binding_rec->logical_port))) {
1835 int i;
1836 for (i = 0; i < laddrs->n_ipv4_addrs; i++) {
1837 char *name = xasprintf("%s-%s", binding_rec->logical_port,
1838 laddrs->ipv4_addrs[i].addr_s);
1839 garp = shash_find_data(&send_garp_data, name);
1840 if (garp) {
1841 garp->ofport = ofport;
1842 garp->tag = tag;
1843 } else {
1844 add_garp(name, ofport, tag, laddrs->ea,
1845 laddrs->ipv4_addrs[i].addr);
1846 }
1847 free(name);
1848 }
1849 destroy_lport_addresses(laddrs);
1850 free(laddrs);
1851 }
1852 return;
1853 }
1854
1855 /* Update GARP for vif if it exists. */
1856 garp = shash_find_data(&send_garp_data, binding_rec->logical_port);
1857 if (garp) {
1858 garp->ofport = ofport;
1859 return;
1860 }
1861
1862 /* Add GARP for new vif. */
1863 int i;
1864 for (i = 0; i < binding_rec->n_mac; i++) {
1865 struct lport_addresses laddrs;
1866 if (!extract_lsp_addresses(binding_rec->mac[i], &laddrs)
1867 || !laddrs.n_ipv4_addrs) {
1868 continue;
1869 }
1870
1871 add_garp(binding_rec->logical_port, ofport, tag,
1872 laddrs.ea, laddrs.ipv4_addrs[0].addr);
1873
1874 destroy_lport_addresses(&laddrs);
1875 break;
1876 }
1877 }
1878
1879 /* Remove a vif from GARP announcements. */
1880 static void
1881 send_garp_delete(const char *lport)
1882 {
1883 struct garp_data *garp = shash_find_and_delete(&send_garp_data, lport);
1884 free(garp);
1885 }
1886
1887 static long long int
1888 send_garp(struct garp_data *garp, long long int current_time)
1889 {
1890 if (current_time < garp->announce_time) {
1891 return garp->announce_time;
1892 }
1893
1894 /* Compose a GARP request packet. */
1895 uint64_t packet_stub[128 / 8];
1896 struct dp_packet packet;
1897 dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
1898 compose_arp(&packet, ARP_OP_REQUEST, garp->ea, eth_addr_zero,
1899 true, garp->ipv4, garp->ipv4);
1900
1901 /* Compose a GARP request packet's vlan if exist. */
1902 if (garp->tag >= 0) {
1903 eth_push_vlan(&packet, htons(ETH_TYPE_VLAN), htons(garp->tag));
1904 }
1905
1906 /* Compose actions. The garp request is output on localnet ofport. */
1907 uint64_t ofpacts_stub[4096 / 8];
1908 struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
1909 enum ofp_version version = rconn_get_version(swconn);
1910 ofpact_put_OUTPUT(&ofpacts)->port = garp->ofport;
1911
1912 struct ofputil_packet_out po = {
1913 .packet = dp_packet_data(&packet),
1914 .packet_len = dp_packet_size(&packet),
1915 .buffer_id = UINT32_MAX,
1916 .ofpacts = ofpacts.data,
1917 .ofpacts_len = ofpacts.size,
1918 };
1919 match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
1920 enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
1921 queue_msg(ofputil_encode_packet_out(&po, proto));
1922 dp_packet_uninit(&packet);
1923 ofpbuf_uninit(&ofpacts);
1924
1925 /* Set the next announcement. At most 5 announcements are sent for a
1926 * vif. */
1927 if (garp->backoff < 16) {
1928 garp->backoff *= 2;
1929 garp->announce_time = current_time + garp->backoff * 1000;
1930 } else {
1931 garp->announce_time = LLONG_MAX;
1932 }
1933 return garp->announce_time;
1934 }
1935
1936 /* Get localnet vifs, local l3gw ports and ofport for localnet patch ports. */
1937 static void
1938 get_localnet_vifs_l3gwports(struct controller_ctx *ctx,
1939 const struct ovsrec_bridge *br_int,
1940 const struct sbrec_chassis *chassis,
1941 struct hmap *local_datapaths,
1942 struct sset *localnet_vifs,
1943 struct simap *localnet_ofports,
1944 struct sset *local_l3gw_ports)
1945 {
1946 for (int i = 0; i < br_int->n_ports; i++) {
1947 const struct ovsrec_port *port_rec = br_int->ports[i];
1948 if (!strcmp(port_rec->name, br_int->name)) {
1949 continue;
1950 }
1951 const char *chassis_id = smap_get(&port_rec->external_ids,
1952 "ovn-chassis-id");
1953 if (chassis_id && !strcmp(chassis_id, chassis->name)) {
1954 continue;
1955 }
1956 const char *localnet = smap_get(&port_rec->external_ids,
1957 "ovn-localnet-port");
1958 for (int j = 0; j < port_rec->n_interfaces; j++) {
1959 const struct ovsrec_interface *iface_rec = port_rec->interfaces[j];
1960 if (!iface_rec->n_ofport) {
1961 continue;
1962 }
1963 /* Get localnet port with its ofport. */
1964 if (localnet) {
1965 int64_t ofport = iface_rec->ofport[0];
1966 if (ofport < 1 || ofport > ofp_to_u16(OFPP_MAX)) {
1967 continue;
1968 }
1969 simap_put(localnet_ofports, localnet, ofport);
1970 continue;
1971 }
1972 /* Get localnet vif. */
1973 const char *iface_id = smap_get(&iface_rec->external_ids,
1974 "iface-id");
1975 if (!iface_id) {
1976 continue;
1977 }
1978 const struct sbrec_port_binding *pb
1979 = lport_lookup_by_name(ctx->ovnsb_idl, iface_id);
1980 if (!pb) {
1981 continue;
1982 }
1983 struct local_datapath *ld
1984 = get_local_datapath(local_datapaths,
1985 pb->datapath->tunnel_key);
1986 if (ld && ld->localnet_port) {
1987 sset_add(localnet_vifs, iface_id);
1988 }
1989 }
1990 }
1991
1992 const struct local_datapath *ld;
1993 struct ovsdb_idl_index_cursor cursor;
1994 struct sbrec_port_binding *lpval;
1995 lpval = sbrec_port_binding_index_init_row(ctx->ovnsb_idl,
1996 &sbrec_table_port_binding);
1997 ovsdb_idl_initialize_cursor(ctx->ovnsb_idl, &sbrec_table_port_binding,
1998 "lport-by-datapath", &cursor);
1999 HMAP_FOR_EACH (ld, hmap_node, local_datapaths) {
2000 const struct sbrec_port_binding *pb;
2001
2002 if (!ld->localnet_port) {
2003 continue;
2004 }
2005
2006 /* Get l3gw ports. Consider port bindings with type "l3gateway"
2007 * that connect to gateway routers (if local), and consider port
2008 * bindings of type "patch" since they might connect to
2009 * distributed gateway ports with NAT addresses. */
2010
2011 sbrec_port_binding_index_set_datapath(lpval, ld->datapath);
2012
2013 SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, &cursor, lpval) {
2014 if ((ld->has_local_l3gateway && !strcmp(pb->type, "l3gateway"))
2015 || !strcmp(pb->type, "patch")) {
2016 sset_add(local_l3gw_ports, pb->logical_port);
2017 }
2018 }
2019 }
2020 sbrec_port_binding_index_destroy_row(lpval);
2021 }
2022
2023 static bool
2024 pinctrl_is_chassis_resident(struct controller_ctx *ctx,
2025 const struct sbrec_chassis *chassis,
2026 const struct chassis_index *chassis_index,
2027 struct sset *active_tunnels,
2028 const char *port_name)
2029 {
2030 const struct sbrec_port_binding *pb
2031 = lport_lookup_by_name(ctx->ovnsb_idl, port_name);
2032 if (!pb || !pb->chassis) {
2033 return false;
2034 }
2035 if (strcmp(pb->type, "chassisredirect")) {
2036 return pb->chassis == chassis;
2037 } else {
2038 struct ovs_list *gateway_chassis =
2039 gateway_chassis_get_ordered(pb, chassis_index);
2040 bool active = gateway_chassis_is_active(gateway_chassis,
2041 chassis,
2042 active_tunnels);
2043 gateway_chassis_destroy(gateway_chassis);
2044 return active;
2045 }
2046 }
2047
2048 /* Extracts the mac, IPv4 and IPv6 addresses, and logical port from
2049 * 'addresses' which should be of the format 'MAC [IP1 IP2 ..]
2050 * [is_chassis_resident("LPORT_NAME")]', where IPn should be a valid IPv4
2051 * or IPv6 address, and stores them in the 'ipv4_addrs' and 'ipv6_addrs'
2052 * fields of 'laddrs'. The logical port name is stored in 'lport'.
2053 *
2054 * Returns true if at least 'MAC' is found in 'address', false otherwise.
2055 *
2056 * The caller must call destroy_lport_addresses() and free(*lport). */
2057 static bool
2058 extract_addresses_with_port(const char *addresses,
2059 struct lport_addresses *laddrs,
2060 char **lport)
2061 {
2062 int ofs;
2063 if (!extract_addresses(addresses, laddrs, &ofs)) {
2064 return false;
2065 } else if (ofs >= strlen(addresses)) {
2066 return true;
2067 }
2068
2069 struct lexer lexer;
2070 lexer_init(&lexer, addresses + ofs);
2071 lexer_get(&lexer);
2072
2073 if (lexer.error || lexer.token.type != LEX_T_ID
2074 || !lexer_match_id(&lexer, "is_chassis_resident")) {
2075 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
2076 VLOG_INFO_RL(&rl, "invalid syntax '%s' in address", addresses);
2077 lexer_destroy(&lexer);
2078 return true;
2079 }
2080
2081 if (!lexer_match(&lexer, LEX_T_LPAREN)) {
2082 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
2083 VLOG_INFO_RL(&rl, "Syntax error: expecting '(' after "
2084 "'is_chassis_resident' in address '%s'", addresses);
2085 lexer_destroy(&lexer);
2086 return false;
2087 }
2088
2089 if (lexer.token.type != LEX_T_STRING) {
2090 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
2091 VLOG_INFO_RL(&rl,
2092 "Syntax error: expecting quoted string after"
2093 " 'is_chassis_resident' in address '%s'", addresses);
2094 lexer_destroy(&lexer);
2095 return false;
2096 }
2097
2098 *lport = xstrdup(lexer.token.s);
2099
2100 lexer_get(&lexer);
2101 if (!lexer_match(&lexer, LEX_T_RPAREN)) {
2102 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
2103 VLOG_INFO_RL(&rl, "Syntax error: expecting ')' after quoted string in "
2104 "'is_chassis_resident()' in address '%s'",
2105 addresses);
2106 lexer_destroy(&lexer);
2107 return false;
2108 }
2109
2110 lexer_destroy(&lexer);
2111 return true;
2112 }
2113
2114 static void
2115 consider_nat_address(struct controller_ctx *ctx,
2116 const char *nat_address,
2117 const struct sbrec_port_binding *pb,
2118 struct sset *nat_address_keys,
2119 const struct sbrec_chassis *chassis,
2120 const struct chassis_index *chassis_index,
2121 struct sset *active_tunnels,
2122 struct shash *nat_addresses)
2123 {
2124 struct lport_addresses *laddrs = xmalloc(sizeof *laddrs);
2125 char *lport = NULL;
2126 if (!extract_addresses_with_port(nat_address, laddrs, &lport)
2127 || (!lport && !strcmp(pb->type, "patch"))
2128 || (lport && !pinctrl_is_chassis_resident(
2129 ctx, chassis, chassis_index, active_tunnels, lport))) {
2130 destroy_lport_addresses(laddrs);
2131 free(laddrs);
2132 free(lport);
2133 return;
2134 }
2135 free(lport);
2136
2137 int i;
2138 for (i = 0; i < laddrs->n_ipv4_addrs; i++) {
2139 char *name = xasprintf("%s-%s", pb->logical_port,
2140 laddrs->ipv4_addrs[i].addr_s);
2141 sset_add(nat_address_keys, name);
2142 free(name);
2143 }
2144 shash_add(nat_addresses, pb->logical_port, laddrs);
2145 }
2146
2147 static void
2148 get_nat_addresses_and_keys(struct controller_ctx *ctx,
2149 struct sset *nat_address_keys,
2150 struct sset *local_l3gw_ports,
2151 const struct sbrec_chassis *chassis,
2152 const struct chassis_index *chassis_index,
2153 struct sset *active_tunnels,
2154 struct shash *nat_addresses)
2155 {
2156 const char *gw_port;
2157 SSET_FOR_EACH(gw_port, local_l3gw_ports) {
2158 const struct sbrec_port_binding *pb;
2159
2160 pb = lport_lookup_by_name(ctx->ovnsb_idl, gw_port);
2161 if (!pb) {
2162 continue;
2163 }
2164
2165 if (pb->n_nat_addresses) {
2166 for (int i = 0; i < pb->n_nat_addresses; i++) {
2167 consider_nat_address(ctx, pb->nat_addresses[i], pb,
2168 nat_address_keys, chassis,
2169 chassis_index, active_tunnels,
2170 nat_addresses);
2171 }
2172 } else {
2173 /* Continue to support options:nat-addresses for version
2174 * upgrade. */
2175 const char *nat_addresses_options = smap_get(&pb->options,
2176 "nat-addresses");
2177 if (nat_addresses_options) {
2178 consider_nat_address(ctx, nat_addresses_options, pb,
2179 nat_address_keys, chassis,
2180 chassis_index, active_tunnels,
2181 nat_addresses);
2182 }
2183 }
2184 }
2185 }
2186
2187 static void
2188 send_garp_wait(void)
2189 {
2190 poll_timer_wait_until(send_garp_time);
2191 }
2192
2193 static void
2194 send_garp_run(struct controller_ctx *ctx,
2195 const struct ovsrec_bridge *br_int,
2196 const struct sbrec_chassis *chassis,
2197 const struct chassis_index *chassis_index,
2198 struct hmap *local_datapaths,
2199 struct sset *active_tunnels)
2200 {
2201 struct sset localnet_vifs = SSET_INITIALIZER(&localnet_vifs);
2202 struct sset local_l3gw_ports = SSET_INITIALIZER(&local_l3gw_ports);
2203 struct sset nat_ip_keys = SSET_INITIALIZER(&nat_ip_keys);
2204 struct simap localnet_ofports = SIMAP_INITIALIZER(&localnet_ofports);
2205 struct shash nat_addresses;
2206
2207 shash_init(&nat_addresses);
2208
2209 get_localnet_vifs_l3gwports(ctx, br_int, chassis, local_datapaths,
2210 &localnet_vifs, &localnet_ofports, &local_l3gw_ports);
2211
2212 get_nat_addresses_and_keys(ctx, &nat_ip_keys, &local_l3gw_ports,
2213 chassis, chassis_index, active_tunnels,
2214 &nat_addresses);
2215 /* For deleted ports and deleted nat ips, remove from send_garp_data. */
2216 struct shash_node *iter, *next;
2217 SHASH_FOR_EACH_SAFE (iter, next, &send_garp_data) {
2218 if (!sset_contains(&localnet_vifs, iter->name) &&
2219 !sset_contains(&nat_ip_keys, iter->name)) {
2220 send_garp_delete(iter->name);
2221 }
2222 }
2223
2224 /* Update send_garp_data. */
2225 const char *iface_id;
2226 SSET_FOR_EACH (iface_id, &localnet_vifs) {
2227 const struct sbrec_port_binding *pb;
2228
2229 pb = lport_lookup_by_name(ctx->ovnsb_idl, iface_id);
2230 if (pb) {
2231 send_garp_update(pb, &localnet_ofports, local_datapaths,
2232 &nat_addresses);
2233 }
2234 }
2235
2236 /* Update send_garp_data for nat-addresses. */
2237 const char *gw_port;
2238 SSET_FOR_EACH (gw_port, &local_l3gw_ports) {
2239 const struct sbrec_port_binding *pb;
2240
2241 pb = lport_lookup_by_name(ctx->ovnsb_idl, gw_port);
2242 if (pb) {
2243 send_garp_update(pb, &localnet_ofports, local_datapaths,
2244 &nat_addresses);
2245 }
2246 }
2247
2248 /* Send GARPs, and update the next announcement. */
2249 long long int current_time = time_msec();
2250 send_garp_time = LLONG_MAX;
2251 SHASH_FOR_EACH (iter, &send_garp_data) {
2252 long long int next_announce = send_garp(iter->data, current_time);
2253 if (send_garp_time > next_announce) {
2254 send_garp_time = next_announce;
2255 }
2256 }
2257 sset_destroy(&localnet_vifs);
2258 sset_destroy(&local_l3gw_ports);
2259 simap_destroy(&localnet_ofports);
2260
2261 SHASH_FOR_EACH_SAFE (iter, next, &nat_addresses) {
2262 struct lport_addresses *laddrs = iter->data;
2263 destroy_lport_addresses(laddrs);
2264 shash_delete(&nat_addresses, iter);
2265 free(laddrs);
2266 }
2267 shash_destroy(&nat_addresses);
2268
2269 sset_destroy(&nat_ip_keys);
2270 }
2271
2272 static void
2273 reload_metadata(struct ofpbuf *ofpacts, const struct match *md)
2274 {
2275 enum mf_field_id md_fields[] = {
2276 #if FLOW_N_REGS == 16
2277 MFF_REG0,
2278 MFF_REG1,
2279 MFF_REG2,
2280 MFF_REG3,
2281 MFF_REG4,
2282 MFF_REG5,
2283 MFF_REG6,
2284 MFF_REG7,
2285 MFF_REG8,
2286 MFF_REG9,
2287 MFF_REG10,
2288 MFF_REG11,
2289 MFF_REG12,
2290 MFF_REG13,
2291 MFF_REG14,
2292 MFF_REG15,
2293 #else
2294 #error
2295 #endif
2296 MFF_METADATA,
2297 };
2298 for (size_t i = 0; i < ARRAY_SIZE(md_fields); i++) {
2299 const struct mf_field *field = mf_from_id(md_fields[i]);
2300 if (!mf_is_all_wild(field, &md->wc)) {
2301 union mf_value value;
2302 mf_get_value(field, &md->flow, &value);
2303 ofpact_put_set_field(ofpacts, field, &value, NULL);
2304 }
2305 }
2306 }
2307
2308 static void
2309 pinctrl_handle_nd_na(const struct flow *ip_flow, const struct match *md,
2310 struct ofpbuf *userdata)
2311 {
2312 /* This action only works for IPv6 ND packets, and the switch should only
2313 * send us ND packets this way, but check here just to be sure. */
2314 if (!is_nd(ip_flow, NULL)) {
2315 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
2316 VLOG_WARN_RL(&rl, "NA action on non-ND packet");
2317 return;
2318 }
2319
2320 uint64_t packet_stub[128 / 8];
2321 struct dp_packet packet;
2322 dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
2323
2324 /* xxx These flags are not exactly correct. Look at section 7.2.4
2325 * xxx of RFC 4861. For example, we need to set ND_RSO_ROUTER for
2326 * xxx router's interfaces and ND_RSO_SOLICITED only if it was
2327 * xxx requested. */
2328 compose_nd_na(&packet, ip_flow->dl_dst, ip_flow->dl_src,
2329 &ip_flow->nd_target, &ip_flow->ipv6_src,
2330 htonl(ND_RSO_SOLICITED | ND_RSO_OVERRIDE));
2331
2332 /* Reload previous packet metadata and set actions from userdata. */
2333 set_actions_and_enqueue_msg(&packet, md, userdata);
2334 dp_packet_uninit(&packet);
2335 }
2336
2337 static void
2338 pinctrl_handle_nd_ns(const struct flow *ip_flow, const struct match *md,
2339 struct ofpbuf *userdata)
2340 {
2341 /* This action only works for IPv6 packets. */
2342 if (get_dl_type(ip_flow) != htons(ETH_TYPE_IPV6)) {
2343 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
2344 VLOG_WARN_RL(&rl, "NS action on non-IPv6 packet");
2345 return;
2346 }
2347
2348 uint64_t packet_stub[128 / 8];
2349 struct dp_packet packet;
2350 dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
2351
2352 compose_nd_ns(&packet, ip_flow->dl_src, &ip_flow->ipv6_src,
2353 &ip_flow->ipv6_dst);
2354
2355 /* Reload previous packet metadata and set actions from userdata. */
2356 set_actions_and_enqueue_msg(&packet, md, userdata);
2357 dp_packet_uninit(&packet);
2358 }
2359
2360 static void
2361 pinctrl_handle_put_nd_ra_opts(
2362 const struct flow *in_flow, struct dp_packet *pkt_in,
2363 struct ofputil_packet_in *pin, struct ofpbuf *userdata,
2364 struct ofpbuf *continuation)
2365 {
2366 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
2367 enum ofp_version version = rconn_get_version(swconn);
2368 enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
2369 struct dp_packet *pkt_out_ptr = NULL;
2370 uint32_t success = 0;
2371
2372 /* Parse result field. */
2373 const struct mf_field *f;
2374 enum ofperr ofperr = nx_pull_header(userdata, NULL, &f, NULL);
2375 if (ofperr) {
2376 VLOG_WARN_RL(&rl, "bad result OXM (%s)", ofperr_to_string(ofperr));
2377 goto exit;
2378 }
2379
2380 /* Parse result offset. */
2381 ovs_be32 *ofsp = ofpbuf_try_pull(userdata, sizeof *ofsp);
2382 if (!ofsp) {
2383 VLOG_WARN_RL(&rl, "offset not present in the userdata");
2384 goto exit;
2385 }
2386
2387 /* Check that the result is valid and writable. */
2388 struct mf_subfield dst = { .field = f, .ofs = ntohl(*ofsp), .n_bits = 1 };
2389 ofperr = mf_check_dst(&dst, NULL);
2390 if (ofperr) {
2391 VLOG_WARN_RL(&rl, "bad result bit (%s)", ofperr_to_string(ofperr));
2392 goto exit;
2393 }
2394
2395 if (!userdata->size) {
2396 VLOG_WARN_RL(&rl, "IPv6 ND RA options not present in the userdata");
2397 goto exit;
2398 }
2399
2400 if (!is_icmpv6(in_flow, NULL) || in_flow->tp_dst != htons(0) ||
2401 in_flow->tp_src != htons(ND_ROUTER_SOLICIT)) {
2402 VLOG_WARN_RL(&rl, "put_nd_ra action on invalid or unsupported packet");
2403 goto exit;
2404 }
2405
2406 size_t new_packet_size = pkt_in->l4_ofs + userdata->size;
2407 struct dp_packet pkt_out;
2408 dp_packet_init(&pkt_out, new_packet_size);
2409 dp_packet_clear(&pkt_out);
2410 dp_packet_prealloc_tailroom(&pkt_out, new_packet_size);
2411 pkt_out_ptr = &pkt_out;
2412
2413 /* Copy L2 and L3 headers from pkt_in. */
2414 dp_packet_put(&pkt_out, dp_packet_pull(pkt_in, pkt_in->l4_ofs),
2415 pkt_in->l4_ofs);
2416
2417 pkt_out.l2_5_ofs = pkt_in->l2_5_ofs;
2418 pkt_out.l2_pad_size = pkt_in->l2_pad_size;
2419 pkt_out.l3_ofs = pkt_in->l3_ofs;
2420 pkt_out.l4_ofs = pkt_in->l4_ofs;
2421
2422 /* Copy the ICMPv6 Router Advertisement data from 'userdata' field. */
2423 dp_packet_put(&pkt_out, userdata->data, userdata->size);
2424
2425 /* Set the IPv6 payload length and calculate the ICMPv6 checksum. */
2426 struct ovs_16aligned_ip6_hdr *nh = dp_packet_l3(&pkt_out);
2427 nh->ip6_plen = htons(userdata->size);
2428 struct ovs_ra_msg *ra = dp_packet_l4(&pkt_out);
2429 ra->icmph.icmp6_cksum = 0;
2430 uint32_t icmp_csum = packet_csum_pseudoheader6(nh);
2431 ra->icmph.icmp6_cksum = csum_finish(csum_continue(
2432 icmp_csum, ra, userdata->size));
2433 pin->packet = dp_packet_data(&pkt_out);
2434 pin->packet_len = dp_packet_size(&pkt_out);
2435 success = 1;
2436
2437 exit:
2438 if (!ofperr) {
2439 union mf_subvalue sv;
2440 sv.u8_val = success;
2441 mf_write_subfield(&dst, &sv, &pin->flow_metadata);
2442 }
2443 queue_msg(ofputil_encode_resume(pin, continuation, proto));
2444 dp_packet_uninit(pkt_out_ptr);
2445 }