]> git.proxmox.com Git - mirror_ovs.git/blob - ovn/controller/pinctrl.c
c816b2dd600209346bc896812f1ebef97b43578c
[mirror_ovs.git] / ovn / controller / pinctrl.c
1 /* Copyright (c) 2015, 2016, 2017 Red Hat, Inc.
2 * Copyright (c) 2017 Nicira, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <config.h>
18
19 #include "pinctrl.h"
20
21 #include "coverage.h"
22 #include "csum.h"
23 #include "dirs.h"
24 #include "dp-packet.h"
25 #include "flow.h"
26 #include "gchassis.h"
27 #include "lport.h"
28 #include "nx-match.h"
29 #include "ovn-controller.h"
30 #include "lib/packets.h"
31 #include "lib/sset.h"
32 #include "openvswitch/ofp-actions.h"
33 #include "openvswitch/ofp-msgs.h"
34 #include "openvswitch/ofp-packet.h"
35 #include "openvswitch/ofp-print.h"
36 #include "openvswitch/ofp-switch.h"
37 #include "openvswitch/ofp-util.h"
38 #include "openvswitch/vlog.h"
39
40 #include "lib/dhcp.h"
41 #include "ovn-controller.h"
42 #include "ovn/actions.h"
43 #include "ovn/lex.h"
44 #include "ovn/lib/acl-log.h"
45 #include "ovn/lib/logical-fields.h"
46 #include "ovn/lib/ovn-l7.h"
47 #include "ovn/lib/ovn-util.h"
48 #include "openvswitch/poll-loop.h"
49 #include "openvswitch/rconn.h"
50 #include "socket-util.h"
51 #include "timeval.h"
52 #include "vswitch-idl.h"
53 #include "lflow.h"
54
55 VLOG_DEFINE_THIS_MODULE(pinctrl);
56
57 /* OpenFlow connection to the switch. */
58 static struct rconn *swconn;
59
60 /* Last seen sequence number for 'swconn'. When this differs from
61 * rconn_get_connection_seqno(rconn), 'swconn' has reconnected. */
62 static unsigned int conn_seq_no;
63
64 static void pinctrl_handle_put_mac_binding(const struct flow *md,
65 const struct flow *headers,
66 bool is_arp);
67 static void init_put_mac_bindings(void);
68 static void destroy_put_mac_bindings(void);
69 static void run_put_mac_bindings(struct controller_ctx *);
70 static void wait_put_mac_bindings(struct controller_ctx *);
71 static void flush_put_mac_bindings(void);
72
73 static void init_send_garps(void);
74 static void destroy_send_garps(void);
75 static void send_garp_wait(void);
76 static void send_garp_run(struct controller_ctx *ctx,
77 const struct ovsrec_bridge *,
78 const struct sbrec_chassis *,
79 const struct chassis_index *chassis_index,
80 struct hmap *local_datapaths,
81 struct sset *active_tunnels);
82 static void pinctrl_handle_nd_na(const struct flow *ip_flow,
83 const struct match *md,
84 struct ofpbuf *userdata);
85 static void reload_metadata(struct ofpbuf *ofpacts,
86 const struct match *md);
87 static void pinctrl_handle_put_nd_ra_opts(
88 const struct flow *ip_flow, struct dp_packet *pkt_in,
89 struct ofputil_packet_in *pin, struct ofpbuf *userdata,
90 struct ofpbuf *continuation);
91 static void pinctrl_handle_nd_ns(const struct flow *ip_flow,
92 const struct match *md,
93 struct ofpbuf *userdata);
94 static void init_ipv6_ras(void);
95 static void destroy_ipv6_ras(void);
96 static void ipv6_ra_wait(void);
97 static void send_ipv6_ras(const struct controller_ctx *,
98 struct hmap *local_datapaths);
99
100 COVERAGE_DEFINE(pinctrl_drop_put_mac_binding);
101
102 void
103 pinctrl_init(void)
104 {
105 swconn = rconn_create(5, 0, DSCP_DEFAULT, 1 << OFP13_VERSION);
106 conn_seq_no = 0;
107 init_put_mac_bindings();
108 init_send_garps();
109 init_ipv6_ras();
110 }
111
112 static ovs_be32
113 queue_msg(struct ofpbuf *msg)
114 {
115 const struct ofp_header *oh = msg->data;
116 ovs_be32 xid = oh->xid;
117
118 rconn_send(swconn, msg, NULL);
119 return xid;
120 }
121
122 /* Sets up global 'swconn', a newly (re)connected connection to a switch. */
123 static void
124 pinctrl_setup(void)
125 {
126 /* Fetch the switch configuration. The response later will allow us to
127 * change the miss_send_len to UINT16_MAX, so that we can enable
128 * asynchronous messages. */
129 queue_msg(ofpraw_alloc(OFPRAW_OFPT_GET_CONFIG_REQUEST,
130 rconn_get_version(swconn), 0));
131
132 /* Set a packet-in format that supports userdata. */
133 queue_msg(ofputil_encode_set_packet_in_format(rconn_get_version(swconn),
134 OFPUTIL_PACKET_IN_NXT2));
135 }
136
137 static void
138 set_switch_config(struct rconn *swconn_,
139 const struct ofputil_switch_config *config)
140 {
141 enum ofp_version version = rconn_get_version(swconn_);
142 struct ofpbuf *request = ofputil_encode_set_config(config, version);
143 queue_msg(request);
144 }
145
146 static void
147 set_actions_and_enqueue_msg(const struct dp_packet *packet,
148 const struct match *md,
149 struct ofpbuf *userdata)
150 {
151 /* Copy metadata from 'md' into the packet-out via "set_field"
152 * actions, then add actions from 'userdata'.
153 */
154 uint64_t ofpacts_stub[4096 / 8];
155 struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
156 enum ofp_version version = rconn_get_version(swconn);
157
158 reload_metadata(&ofpacts, md);
159 enum ofperr error = ofpacts_pull_openflow_actions(userdata, userdata->size,
160 version, NULL, NULL,
161 &ofpacts);
162 if (error) {
163 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
164 VLOG_WARN_RL(&rl, "failed to parse actions from userdata (%s)",
165 ofperr_to_string(error));
166 ofpbuf_uninit(&ofpacts);
167 return;
168 }
169
170 struct ofputil_packet_out po = {
171 .packet = dp_packet_data(packet),
172 .packet_len = dp_packet_size(packet),
173 .buffer_id = UINT32_MAX,
174 .ofpacts = ofpacts.data,
175 .ofpacts_len = ofpacts.size,
176 };
177 match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
178 enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
179 queue_msg(ofputil_encode_packet_out(&po, proto));
180 ofpbuf_uninit(&ofpacts);
181 }
182
183 static void
184 pinctrl_handle_arp(const struct flow *ip_flow, const struct match *md,
185 struct ofpbuf *userdata)
186 {
187 /* This action only works for IP packets, and the switch should only send
188 * us IP packets this way, but check here just to be sure. */
189 if (ip_flow->dl_type != htons(ETH_TYPE_IP)) {
190 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
191 VLOG_WARN_RL(&rl, "ARP action on non-IP packet (Ethertype %"PRIx16")",
192 ntohs(ip_flow->dl_type));
193 return;
194 }
195
196 /* Compose an ARP packet. */
197 uint64_t packet_stub[128 / 8];
198 struct dp_packet packet;
199 dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
200 compose_arp__(&packet);
201
202 struct eth_header *eth = dp_packet_eth(&packet);
203 eth->eth_dst = ip_flow->dl_dst;
204 eth->eth_src = ip_flow->dl_src;
205
206 struct arp_eth_header *arp = dp_packet_l3(&packet);
207 arp->ar_op = htons(ARP_OP_REQUEST);
208 arp->ar_sha = ip_flow->dl_src;
209 put_16aligned_be32(&arp->ar_spa, ip_flow->nw_src);
210 arp->ar_tha = eth_addr_zero;
211 put_16aligned_be32(&arp->ar_tpa, ip_flow->nw_dst);
212
213 if (ip_flow->vlans[0].tci & htons(VLAN_CFI)) {
214 eth_push_vlan(&packet, htons(ETH_TYPE_VLAN_8021Q),
215 ip_flow->vlans[0].tci);
216 }
217
218 set_actions_and_enqueue_msg(&packet, md, userdata);
219 dp_packet_uninit(&packet);
220 }
221
222 static void
223 pinctrl_handle_icmp4(const struct flow *ip_flow, const struct match *md,
224 struct ofpbuf *userdata)
225 {
226 /* This action only works for IP packets, and the switch should only send
227 * us IP packets this way, but check here just to be sure. */
228 if (ip_flow->dl_type != htons(ETH_TYPE_IP)) {
229 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
230 VLOG_WARN_RL(&rl,
231 "ICMP4 action on non-IP packet (eth_type 0x%"PRIx16")",
232 ntohs(ip_flow->dl_type));
233 return;
234 }
235
236 uint64_t packet_stub[128 / 8];
237 struct dp_packet packet;
238
239 dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
240 dp_packet_clear(&packet);
241 packet.packet_type = htonl(PT_ETH);
242
243 struct eth_header *eh = dp_packet_put_zeros(&packet, sizeof *eh);
244 eh->eth_dst = ip_flow->dl_dst;
245 eh->eth_src = ip_flow->dl_src;
246 eh->eth_type = htons(ETH_TYPE_IP);
247
248 struct ip_header *nh = dp_packet_put_zeros(&packet, sizeof *nh);
249 dp_packet_set_l3(&packet, nh);
250 nh->ip_ihl_ver = IP_IHL_VER(5, 4);
251 nh->ip_tot_len = htons(sizeof(struct ip_header) +
252 sizeof(struct icmp_header));
253 nh->ip_proto = IPPROTO_ICMP;
254 nh->ip_frag_off = htons(IP_DF);
255 packet_set_ipv4(&packet, ip_flow->nw_src, ip_flow->nw_dst,
256 ip_flow->nw_tos, 255);
257
258 struct icmp_header *ih = dp_packet_put_zeros(&packet, sizeof *ih);
259 dp_packet_set_l4(&packet, ih);
260 packet_set_icmp(&packet, ICMP4_DST_UNREACH, 1);
261
262 if (ip_flow->vlans[0].tci & htons(VLAN_CFI)) {
263 eth_push_vlan(&packet, htons(ETH_TYPE_VLAN_8021Q),
264 ip_flow->vlans[0].tci);
265 }
266
267 set_actions_and_enqueue_msg(&packet, md, userdata);
268 dp_packet_uninit(&packet);
269 }
270
271 static void
272 pinctrl_handle_tcp_reset(const struct flow *ip_flow, struct dp_packet *pkt_in,
273 const struct match *md, struct ofpbuf *userdata)
274 {
275 /* This action only works for TCP segments, and the switch should only send
276 * us TCP segments this way, but check here just to be sure. */
277 if (ip_flow->nw_proto != IPPROTO_TCP) {
278 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
279 VLOG_WARN_RL(&rl, "TCP_RESET action on non-TCP packet");
280 return;
281 }
282
283 uint64_t packet_stub[128 / 8];
284 struct dp_packet packet;
285
286 dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
287 dp_packet_clear(&packet);
288 packet.packet_type = htonl(PT_ETH);
289
290 struct eth_header *eh = dp_packet_put_zeros(&packet, sizeof *eh);
291 eh->eth_dst = ip_flow->dl_dst;
292 eh->eth_src = ip_flow->dl_src;
293
294 if (get_dl_type(ip_flow) == htons(ETH_TYPE_IPV6)) {
295 struct ip6_hdr *nh = dp_packet_put_zeros(&packet, sizeof *nh);
296
297 eh->eth_type = htons(ETH_TYPE_IPV6);
298 dp_packet_set_l3(&packet, nh);
299 nh->ip6_vfc = 0x60;
300 nh->ip6_nxt = IPPROTO_TCP;
301 nh->ip6_plen = htons(TCP_HEADER_LEN);
302 packet_set_ipv6(&packet, &ip_flow->ipv6_src, &ip_flow->ipv6_dst,
303 ip_flow->nw_tos, ip_flow->ipv6_label, 255);
304 } else {
305 struct ip_header *nh = dp_packet_put_zeros(&packet, sizeof *nh);
306
307 eh->eth_type = htons(ETH_TYPE_IP);
308 dp_packet_set_l3(&packet, nh);
309 nh->ip_ihl_ver = IP_IHL_VER(5, 4);
310 nh->ip_tot_len = htons(IP_HEADER_LEN + TCP_HEADER_LEN);
311 nh->ip_proto = IPPROTO_TCP;
312 nh->ip_frag_off = htons(IP_DF);
313 packet_set_ipv4(&packet, ip_flow->nw_src, ip_flow->nw_dst,
314 ip_flow->nw_tos, 255);
315 }
316
317 struct tcp_header *th = dp_packet_put_zeros(&packet, sizeof *th);
318 struct tcp_header *tcp_in = dp_packet_l4(pkt_in);
319 dp_packet_set_l4(&packet, th);
320 th->tcp_ctl = TCP_CTL(TCP_RST, 5);
321 if (ip_flow->tcp_flags & htons(TCP_ACK)) {
322 th->tcp_seq = tcp_in->tcp_ack;
323 } else {
324 uint32_t tcp_seq, ack_seq, tcp_len;
325
326 tcp_seq = ntohl(get_16aligned_be32(&tcp_in->tcp_seq));
327 tcp_len = TCP_OFFSET(tcp_in->tcp_ctl) * 4;
328 ack_seq = tcp_seq + dp_packet_l4_size(pkt_in) - tcp_len;
329 put_16aligned_be32(&th->tcp_ack, htonl(ack_seq));
330 put_16aligned_be32(&th->tcp_seq, 0);
331 }
332 packet_set_tcp_port(&packet, ip_flow->tp_dst, ip_flow->tp_src);
333
334 if (ip_flow->vlans[0].tci & htons(VLAN_CFI)) {
335 eth_push_vlan(&packet, htons(ETH_TYPE_VLAN_8021Q),
336 ip_flow->vlans[0].tci);
337 }
338
339 set_actions_and_enqueue_msg(&packet, md, userdata);
340 dp_packet_uninit(&packet);
341 }
342
343 static void
344 pinctrl_handle_put_dhcp_opts(
345 struct dp_packet *pkt_in, struct ofputil_packet_in *pin,
346 struct ofpbuf *userdata, struct ofpbuf *continuation)
347 {
348 enum ofp_version version = rconn_get_version(swconn);
349 enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
350 struct dp_packet *pkt_out_ptr = NULL;
351 uint32_t success = 0;
352
353 /* Parse result field. */
354 const struct mf_field *f;
355 enum ofperr ofperr = nx_pull_header(userdata, NULL, &f, NULL);
356 if (ofperr) {
357 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
358 VLOG_WARN_RL(&rl, "bad result OXM (%s)", ofperr_to_string(ofperr));
359 goto exit;
360 }
361
362 /* Parse result offset and offer IP. */
363 ovs_be32 *ofsp = ofpbuf_try_pull(userdata, sizeof *ofsp);
364 ovs_be32 *offer_ip = ofpbuf_try_pull(userdata, sizeof *offer_ip);
365 if (!ofsp || !offer_ip) {
366 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
367 VLOG_WARN_RL(&rl, "offset or offer_ip not present in the userdata");
368 goto exit;
369 }
370
371 /* Check that the result is valid and writable. */
372 struct mf_subfield dst = { .field = f, .ofs = ntohl(*ofsp), .n_bits = 1 };
373 ofperr = mf_check_dst(&dst, NULL);
374 if (ofperr) {
375 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
376 VLOG_WARN_RL(&rl, "bad result bit (%s)", ofperr_to_string(ofperr));
377 goto exit;
378 }
379
380 if (!userdata->size) {
381 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
382 VLOG_WARN_RL(&rl, "DHCP options not present in the userdata");
383 goto exit;
384 }
385
386 /* Validate the DHCP request packet.
387 * Format of the DHCP packet is
388 * ------------------------------------------------------------------------
389 *| UDP HEADER | DHCP HEADER | 4 Byte DHCP Cookie | DHCP OPTIONS(var len)|
390 * ------------------------------------------------------------------------
391 */
392 if (dp_packet_l4_size(pkt_in) < (UDP_HEADER_LEN +
393 sizeof (struct dhcp_header) + sizeof(uint32_t) + 3)) {
394 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
395 VLOG_WARN_RL(&rl, "Invalid or incomplete DHCP packet recieved");
396 goto exit;
397 }
398
399 struct dhcp_header const *in_dhcp_data = dp_packet_get_udp_payload(pkt_in);
400 if (in_dhcp_data->op != DHCP_OP_REQUEST) {
401 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
402 VLOG_WARN_RL(&rl, "Invalid opcode in the DHCP packet : %d",
403 in_dhcp_data->op);
404 goto exit;
405 }
406
407 /* DHCP options follow the DHCP header. The first 4 bytes of the DHCP
408 * options is the DHCP magic cookie followed by the actual DHCP options.
409 */
410 const uint8_t *in_dhcp_opt =
411 (const uint8_t *)dp_packet_get_udp_payload(pkt_in) +
412 sizeof (struct dhcp_header);
413
414 ovs_be32 magic_cookie = htonl(DHCP_MAGIC_COOKIE);
415 if (memcmp(in_dhcp_opt, &magic_cookie, sizeof(ovs_be32))) {
416 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
417 VLOG_WARN_RL(&rl, "DHCP magic cookie not present in the DHCP packet");
418 goto exit;
419 }
420
421 in_dhcp_opt += 4;
422 /* Check that the DHCP Message Type (opt 53) is present or not with
423 * valid values - DHCP_MSG_DISCOVER or DHCP_MSG_REQUEST as the first
424 * DHCP option.
425 */
426 if (!(in_dhcp_opt[0] == DHCP_OPT_MSG_TYPE && in_dhcp_opt[1] == 1 && (
427 in_dhcp_opt[2] == DHCP_MSG_DISCOVER ||
428 in_dhcp_opt[2] == DHCP_MSG_REQUEST))) {
429 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
430 VLOG_WARN_RL(&rl, "Invalid DHCP message type : opt code = %d,"
431 " opt value = %d", in_dhcp_opt[0], in_dhcp_opt[2]);
432 goto exit;
433 }
434
435 uint8_t msg_type;
436 if (in_dhcp_opt[2] == DHCP_MSG_DISCOVER) {
437 msg_type = DHCP_MSG_OFFER;
438 } else {
439 msg_type = DHCP_MSG_ACK;
440 }
441
442 /* Frame the DHCP reply packet
443 * Total DHCP options length will be options stored in the userdata +
444 * 16 bytes.
445 *
446 * --------------------------------------------------------------
447 *| 4 Bytes (dhcp cookie) | 3 Bytes (option type) | DHCP options |
448 * --------------------------------------------------------------
449 *| 4 Bytes padding | 1 Byte (option end 0xFF ) | 4 Bytes padding|
450 * --------------------------------------------------------------
451 */
452 uint16_t new_l4_size = UDP_HEADER_LEN + DHCP_HEADER_LEN + \
453 userdata->size + 16;
454 size_t new_packet_size = pkt_in->l4_ofs + new_l4_size;
455
456 struct dp_packet pkt_out;
457 dp_packet_init(&pkt_out, new_packet_size);
458 dp_packet_clear(&pkt_out);
459 dp_packet_prealloc_tailroom(&pkt_out, new_packet_size);
460 pkt_out_ptr = &pkt_out;
461
462 /* Copy the L2 and L3 headers from the pkt_in as they would remain same*/
463 dp_packet_put(
464 &pkt_out, dp_packet_pull(pkt_in, pkt_in->l4_ofs), pkt_in->l4_ofs);
465
466 pkt_out.l2_5_ofs = pkt_in->l2_5_ofs;
467 pkt_out.l2_pad_size = pkt_in->l2_pad_size;
468 pkt_out.l3_ofs = pkt_in->l3_ofs;
469 pkt_out.l4_ofs = pkt_in->l4_ofs;
470
471 struct udp_header *udp = dp_packet_put(
472 &pkt_out, dp_packet_pull(pkt_in, UDP_HEADER_LEN), UDP_HEADER_LEN);
473
474 struct dhcp_header *dhcp_data = dp_packet_put(
475 &pkt_out, dp_packet_pull(pkt_in, DHCP_HEADER_LEN), DHCP_HEADER_LEN);
476 dhcp_data->op = DHCP_OP_REPLY;
477 dhcp_data->yiaddr = *offer_ip;
478 dp_packet_put(&pkt_out, &magic_cookie, sizeof(ovs_be32));
479
480 uint8_t *out_dhcp_opts = dp_packet_put_zeros(&pkt_out,
481 userdata->size + 12);
482 /* DHCP option - type */
483 out_dhcp_opts[0] = DHCP_OPT_MSG_TYPE;
484 out_dhcp_opts[1] = 1;
485 out_dhcp_opts[2] = msg_type;
486 out_dhcp_opts += 3;
487
488 memcpy(out_dhcp_opts, userdata->data, userdata->size);
489 out_dhcp_opts += userdata->size;
490 /* Padding */
491 out_dhcp_opts += 4;
492 /* End */
493 out_dhcp_opts[0] = DHCP_OPT_END;
494
495 udp->udp_len = htons(new_l4_size);
496
497 struct ip_header *out_ip = dp_packet_l3(&pkt_out);
498 out_ip->ip_tot_len = htons(pkt_out.l4_ofs - pkt_out.l3_ofs + new_l4_size);
499 udp->udp_csum = 0;
500 /* Checksum needs to be initialized to zero. */
501 out_ip->ip_csum = 0;
502 out_ip->ip_csum = csum(out_ip, sizeof *out_ip);
503
504 pin->packet = dp_packet_data(&pkt_out);
505 pin->packet_len = dp_packet_size(&pkt_out);
506
507 /* Log the response. */
508 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(20, 40);
509 const struct eth_header *l2 = dp_packet_eth(&pkt_out);
510 VLOG_INFO_RL(&rl, "DHCP%s "ETH_ADDR_FMT" "IP_FMT"",
511 msg_type == DHCP_MSG_OFFER ? "OFFER" : "ACK",
512 ETH_ADDR_ARGS(l2->eth_src), IP_ARGS(*offer_ip));
513
514 success = 1;
515 exit:
516 if (!ofperr) {
517 union mf_subvalue sv;
518 sv.u8_val = success;
519 mf_write_subfield(&dst, &sv, &pin->flow_metadata);
520 }
521 queue_msg(ofputil_encode_resume(pin, continuation, proto));
522 if (pkt_out_ptr) {
523 dp_packet_uninit(pkt_out_ptr);
524 }
525 }
526
527 static bool
528 compose_out_dhcpv6_opts(struct ofpbuf *userdata,
529 struct ofpbuf *out_dhcpv6_opts, ovs_be32 iaid)
530 {
531 while (userdata->size) {
532 struct dhcp_opt6_header *userdata_opt = ofpbuf_try_pull(
533 userdata, sizeof *userdata_opt);
534 if (!userdata_opt) {
535 return false;
536 }
537
538 size_t size = ntohs(userdata_opt->size);
539 uint8_t *userdata_opt_data = ofpbuf_try_pull(userdata, size);
540 if (!userdata_opt_data) {
541 return false;
542 }
543
544 switch (ntohs(userdata_opt->opt_code)) {
545 case DHCPV6_OPT_SERVER_ID_CODE:
546 {
547 /* The Server Identifier option carries a DUID
548 * identifying a server between a client and a server.
549 * See RFC 3315 Sec 9 and Sec 22.3.
550 *
551 * We use DUID Based on Link-layer Address [DUID-LL].
552 */
553
554 struct dhcpv6_opt_server_id *opt_server_id = ofpbuf_put_zeros(
555 out_dhcpv6_opts, sizeof *opt_server_id);
556
557 opt_server_id->opt.code = htons(DHCPV6_OPT_SERVER_ID_CODE);
558 opt_server_id->opt.len = htons(size + 4);
559 opt_server_id->duid_type = htons(DHCPV6_DUID_LL);
560 opt_server_id->hw_type = htons(DHCPV6_HW_TYPE_ETH);
561 memcpy(&opt_server_id->mac, userdata_opt_data,
562 sizeof(struct eth_addr));
563 break;
564 }
565
566 case DHCPV6_OPT_IA_ADDR_CODE:
567 {
568 if (size != sizeof(struct in6_addr)) {
569 return false;
570 }
571
572 /* IA Address option is used to specify IPv6 addresses associated
573 * with an IA_NA or IA_TA. The IA Address option must be
574 * encapsulated in the Options field of an IA_NA or IA_TA option.
575 *
576 * We will encapsulate the IA Address within the IA_NA option.
577 * Please see RFC 3315 section 22.5 and 22.6
578 */
579 struct dhcpv6_opt_ia_na *opt_ia_na = ofpbuf_put_zeros(
580 out_dhcpv6_opts, sizeof *opt_ia_na);
581 opt_ia_na->opt.code = htons(DHCPV6_OPT_IA_NA_CODE);
582 /* IA_NA length (in bytes)-
583 * IAID - 4
584 * T1 - 4
585 * T2 - 4
586 * IA Address - sizeof(struct dhcpv6_opt_ia_addr)
587 */
588 opt_ia_na->opt.len = htons(12 + sizeof(struct dhcpv6_opt_ia_addr));
589 opt_ia_na->iaid = iaid;
590 /* Set the lifetime of the address(es) to infinity */
591 opt_ia_na->t1 = OVS_BE32_MAX;
592 opt_ia_na->t2 = OVS_BE32_MAX;
593
594 struct dhcpv6_opt_ia_addr *opt_ia_addr = ofpbuf_put_zeros(
595 out_dhcpv6_opts, sizeof *opt_ia_addr);
596 opt_ia_addr->opt.code = htons(DHCPV6_OPT_IA_ADDR_CODE);
597 opt_ia_addr->opt.len = htons(size + 8);
598 memcpy(opt_ia_addr->ipv6.s6_addr, userdata_opt_data, size);
599 opt_ia_addr->t1 = OVS_BE32_MAX;
600 opt_ia_addr->t2 = OVS_BE32_MAX;
601 break;
602 }
603
604 case DHCPV6_OPT_DNS_SERVER_CODE:
605 {
606 struct dhcpv6_opt_header *opt_dns = ofpbuf_put_zeros(
607 out_dhcpv6_opts, sizeof *opt_dns);
608 opt_dns->code = htons(DHCPV6_OPT_DNS_SERVER_CODE);
609 opt_dns->len = htons(size);
610 ofpbuf_put(out_dhcpv6_opts, userdata_opt_data, size);
611 break;
612 }
613
614 case DHCPV6_OPT_DOMAIN_SEARCH_CODE:
615 {
616 struct dhcpv6_opt_header *opt_dsl = ofpbuf_put_zeros(
617 out_dhcpv6_opts, sizeof *opt_dsl);
618 opt_dsl->code = htons(DHCPV6_OPT_DOMAIN_SEARCH_CODE);
619 opt_dsl->len = htons(size + 2);
620 uint8_t *data = ofpbuf_put_zeros(out_dhcpv6_opts, size + 2);
621 *data = size;
622 memcpy(data + 1, userdata_opt_data, size);
623 break;
624 }
625
626 default:
627 return false;
628 }
629 }
630 return true;
631 }
632
633 static void
634 pinctrl_handle_put_dhcpv6_opts(
635 struct dp_packet *pkt_in, struct ofputil_packet_in *pin,
636 struct ofpbuf *userdata, struct ofpbuf *continuation OVS_UNUSED)
637 {
638 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
639 enum ofp_version version = rconn_get_version(swconn);
640 enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
641 struct dp_packet *pkt_out_ptr = NULL;
642 uint32_t success = 0;
643
644 /* Parse result field. */
645 const struct mf_field *f;
646 enum ofperr ofperr = nx_pull_header(userdata, NULL, &f, NULL);
647 if (ofperr) {
648 VLOG_WARN_RL(&rl, "bad result OXM (%s)", ofperr_to_string(ofperr));
649 goto exit;
650 }
651
652 /* Parse result offset. */
653 ovs_be32 *ofsp = ofpbuf_try_pull(userdata, sizeof *ofsp);
654 if (!ofsp) {
655 VLOG_WARN_RL(&rl, "offset not present in the userdata");
656 goto exit;
657 }
658
659 /* Check that the result is valid and writable. */
660 struct mf_subfield dst = { .field = f, .ofs = ntohl(*ofsp), .n_bits = 1 };
661 ofperr = mf_check_dst(&dst, NULL);
662 if (ofperr) {
663 VLOG_WARN_RL(&rl, "bad result bit (%s)", ofperr_to_string(ofperr));
664 goto exit;
665 }
666
667 if (!userdata->size) {
668 VLOG_WARN_RL(&rl, "DHCPv6 options not present in the userdata");
669 goto exit;
670 }
671
672 struct udp_header *in_udp = dp_packet_l4(pkt_in);
673 const uint8_t *in_dhcpv6_data = dp_packet_get_udp_payload(pkt_in);
674 if (!in_udp || !in_dhcpv6_data) {
675 VLOG_WARN_RL(&rl, "truncated dhcpv6 packet");
676 goto exit;
677 }
678
679 uint8_t out_dhcpv6_msg_type;
680 switch(*in_dhcpv6_data) {
681 case DHCPV6_MSG_TYPE_SOLICIT:
682 out_dhcpv6_msg_type = DHCPV6_MSG_TYPE_ADVT;
683 break;
684
685 case DHCPV6_MSG_TYPE_REQUEST:
686 case DHCPV6_MSG_TYPE_CONFIRM:
687 case DHCPV6_MSG_TYPE_DECLINE:
688 out_dhcpv6_msg_type = DHCPV6_MSG_TYPE_REPLY;
689 break;
690
691 default:
692 /* Invalid or unsupported DHCPv6 message type */
693 goto exit;
694 }
695
696 /* Skip 4 bytes (message type (1 byte) + transaction ID (3 bytes). */
697 in_dhcpv6_data += 4;
698 /* We need to extract IAID from the IA-NA option of the client's DHCPv6
699 * solicit/request/confirm packet and copy the same IAID in the Server's
700 * response. */
701 ovs_be32 iaid = 0;
702 struct dhcpv6_opt_header const *in_opt_client_id = NULL;
703 size_t udp_len = ntohs(in_udp->udp_len);
704 size_t l4_len = dp_packet_l4_size(pkt_in);
705 uint8_t *end = (uint8_t *)in_udp + MIN(udp_len, l4_len);
706 while (in_dhcpv6_data < end) {
707 struct dhcpv6_opt_header const *in_opt =
708 (struct dhcpv6_opt_header *)in_dhcpv6_data;
709 switch(ntohs(in_opt->code)) {
710 case DHCPV6_OPT_IA_NA_CODE:
711 {
712 struct dhcpv6_opt_ia_na *opt_ia_na = (
713 struct dhcpv6_opt_ia_na *)in_opt;
714 iaid = opt_ia_na->iaid;
715 break;
716 }
717
718 case DHCPV6_OPT_CLIENT_ID_CODE:
719 in_opt_client_id = in_opt;
720 break;
721
722 default:
723 break;
724 }
725 in_dhcpv6_data += sizeof *in_opt + ntohs(in_opt->len);
726 }
727
728 if (!in_opt_client_id) {
729 VLOG_WARN_RL(&rl, "DHCPv6 option - Client id not present in the "
730 " DHCPv6 packet");
731 goto exit;
732 }
733
734 if (!iaid) {
735 VLOG_WARN_RL(&rl, "DHCPv6 option - IA NA not present in the "
736 " DHCPv6 packet");
737 goto exit;
738 }
739
740 uint64_t out_ofpacts_dhcpv6_opts_stub[256 / 8];
741 struct ofpbuf out_dhcpv6_opts =
742 OFPBUF_STUB_INITIALIZER(out_ofpacts_dhcpv6_opts_stub);
743
744 if (!compose_out_dhcpv6_opts(userdata, &out_dhcpv6_opts, iaid)) {
745 VLOG_WARN_RL(&rl, "Invalid userdata");
746 goto exit;
747 }
748
749 uint16_t new_l4_size
750 = (UDP_HEADER_LEN + 4 + sizeof *in_opt_client_id +
751 ntohs(in_opt_client_id->len) + out_dhcpv6_opts.size);
752 size_t new_packet_size = pkt_in->l4_ofs + new_l4_size;
753
754 struct dp_packet pkt_out;
755 dp_packet_init(&pkt_out, new_packet_size);
756 dp_packet_clear(&pkt_out);
757 dp_packet_prealloc_tailroom(&pkt_out, new_packet_size);
758 pkt_out_ptr = &pkt_out;
759
760 /* Copy L2 and L3 headers from pkt_in. */
761 dp_packet_put(&pkt_out, dp_packet_pull(pkt_in, pkt_in->l4_ofs),
762 pkt_in->l4_ofs);
763
764 pkt_out.l2_5_ofs = pkt_in->l2_5_ofs;
765 pkt_out.l2_pad_size = pkt_in->l2_pad_size;
766 pkt_out.l3_ofs = pkt_in->l3_ofs;
767 pkt_out.l4_ofs = pkt_in->l4_ofs;
768
769 /* Pull the DHCPv6 message type and transaction id from the pkt_in.
770 * Need to preserve the transaction id in the DHCPv6 reply packet. */
771 struct udp_header *out_udp = dp_packet_put(
772 &pkt_out, dp_packet_pull(pkt_in, UDP_HEADER_LEN), UDP_HEADER_LEN);
773 uint8_t *out_dhcpv6 = dp_packet_put(&pkt_out, dp_packet_pull(pkt_in, 4), 4);
774
775 /* Set the proper DHCPv6 message type. */
776 *out_dhcpv6 = out_dhcpv6_msg_type;
777
778 /* Copy the Client Identifier. */
779 dp_packet_put(&pkt_out, in_opt_client_id,
780 sizeof *in_opt_client_id + ntohs(in_opt_client_id->len));
781
782 /* Copy the DHCPv6 Options. */
783 dp_packet_put(&pkt_out, out_dhcpv6_opts.data, out_dhcpv6_opts.size);
784 out_udp->udp_len = htons(new_l4_size);
785 out_udp->udp_csum = 0;
786
787 struct ovs_16aligned_ip6_hdr *out_ip6 = dp_packet_l3(&pkt_out);
788 out_ip6->ip6_ctlun.ip6_un1.ip6_un1_plen = out_udp->udp_len;
789
790 uint32_t csum;
791 csum = packet_csum_pseudoheader6(dp_packet_l3(&pkt_out));
792 csum = csum_continue(csum, out_udp, dp_packet_size(&pkt_out) -
793 ((const unsigned char *)out_udp -
794 (const unsigned char *)dp_packet_eth(&pkt_out)));
795 out_udp->udp_csum = csum_finish(csum);
796 if (!out_udp->udp_csum) {
797 out_udp->udp_csum = htons(0xffff);
798 }
799
800 pin->packet = dp_packet_data(&pkt_out);
801 pin->packet_len = dp_packet_size(&pkt_out);
802 ofpbuf_uninit(&out_dhcpv6_opts);
803 success = 1;
804 exit:
805 if (!ofperr) {
806 union mf_subvalue sv;
807 sv.u8_val = success;
808 mf_write_subfield(&dst, &sv, &pin->flow_metadata);
809 }
810 queue_msg(ofputil_encode_resume(pin, continuation, proto));
811 dp_packet_uninit(pkt_out_ptr);
812 }
813
814 static void
815 put_be16(struct ofpbuf *buf, ovs_be16 x)
816 {
817 ofpbuf_put(buf, &x, sizeof x);
818 }
819
820 static void
821 put_be32(struct ofpbuf *buf, ovs_be32 x)
822 {
823 ofpbuf_put(buf, &x, sizeof x);
824 }
825
826 static void
827 pinctrl_handle_dns_lookup(
828 struct dp_packet *pkt_in, struct ofputil_packet_in *pin,
829 struct ofpbuf *userdata, struct ofpbuf *continuation,
830 struct controller_ctx *ctx)
831 {
832 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
833 enum ofp_version version = rconn_get_version(swconn);
834 enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
835 struct dp_packet *pkt_out_ptr = NULL;
836 uint32_t success = 0;
837
838 /* Parse result field. */
839 const struct mf_field *f;
840 enum ofperr ofperr = nx_pull_header(userdata, NULL, &f, NULL);
841 if (ofperr) {
842 VLOG_WARN_RL(&rl, "bad result OXM (%s)", ofperr_to_string(ofperr));
843 goto exit;
844 }
845
846 /* Parse result offset. */
847 ovs_be32 *ofsp = ofpbuf_try_pull(userdata, sizeof *ofsp);
848 if (!ofsp) {
849 VLOG_WARN_RL(&rl, "offset not present in the userdata");
850 goto exit;
851 }
852
853 /* Check that the result is valid and writable. */
854 struct mf_subfield dst = { .field = f, .ofs = ntohl(*ofsp), .n_bits = 1 };
855 ofperr = mf_check_dst(&dst, NULL);
856 if (ofperr) {
857 VLOG_WARN_RL(&rl, "bad result bit (%s)", ofperr_to_string(ofperr));
858 goto exit;
859 }
860
861 /* Extract the DNS header */
862 struct dns_header const *in_dns_header = dp_packet_get_udp_payload(pkt_in);
863 if (!in_dns_header) {
864 VLOG_WARN_RL(&rl, "truncated dns packet");
865 goto exit;
866 }
867
868 /* Check if it is DNS request or not */
869 if (in_dns_header->lo_flag & 0x80) {
870 /* It's a DNS response packet which we are not interested in */
871 goto exit;
872 }
873
874 /* Check if at least one query request is present */
875 if (!in_dns_header->qdcount) {
876 goto exit;
877 }
878
879 struct udp_header *in_udp = dp_packet_l4(pkt_in);
880 size_t udp_len = ntohs(in_udp->udp_len);
881 size_t l4_len = dp_packet_l4_size(pkt_in);
882 uint8_t *end = (uint8_t *)in_udp + MIN(udp_len, l4_len);
883 uint8_t *in_dns_data = (uint8_t *)(in_dns_header + 1);
884 uint8_t *in_queryname = in_dns_data;
885 uint8_t idx = 0;
886 struct ds query_name;
887 ds_init(&query_name);
888 /* Extract the query_name. If the query name is - 'www.ovn.org' it would be
889 * encoded as (in hex) - 03 77 77 77 03 6f 76 63 03 6f 72 67 00.
890 */
891 while ((in_dns_data + idx) < end && in_dns_data[idx]) {
892 uint8_t label_len = in_dns_data[idx++];
893 if (in_dns_data + idx + label_len > end) {
894 ds_destroy(&query_name);
895 goto exit;
896 }
897 ds_put_buffer(&query_name, (const char *) in_dns_data + idx, label_len);
898 idx += label_len;
899 ds_put_char(&query_name, '.');
900 }
901
902 idx++;
903 ds_chomp(&query_name, '.');
904 in_dns_data += idx;
905
906 /* Query should have TYPE and CLASS fields */
907 if (in_dns_data + (2 * sizeof(ovs_be16)) > end) {
908 ds_destroy(&query_name);
909 goto exit;
910 }
911
912 uint16_t query_type = ntohs(*ALIGNED_CAST(const ovs_be16 *, in_dns_data));
913 /* Supported query types - A, AAAA and ANY */
914 if (!(query_type == DNS_QUERY_TYPE_A || query_type == DNS_QUERY_TYPE_AAAA
915 || query_type == DNS_QUERY_TYPE_ANY)) {
916 ds_destroy(&query_name);
917 goto exit;
918 }
919
920 uint64_t dp_key = ntohll(pin->flow_metadata.flow.metadata);
921 const struct sbrec_dns *sbrec_dns;
922 const char *answer_ips = NULL;
923 SBREC_DNS_FOR_EACH(sbrec_dns, ctx->ovnsb_idl) {
924 for (size_t i = 0; i < sbrec_dns->n_datapaths; i++) {
925 if (sbrec_dns->datapaths[i]->tunnel_key == dp_key) {
926 answer_ips = smap_get(&sbrec_dns->records,
927 ds_cstr(&query_name));
928 if (answer_ips) {
929 break;
930 }
931 }
932 }
933
934 if (answer_ips) {
935 break;
936 }
937 }
938
939 ds_destroy(&query_name);
940 if (!answer_ips) {
941 goto exit;
942 }
943
944 struct lport_addresses ip_addrs;
945 if (!extract_ip_addresses(answer_ips, &ip_addrs)) {
946 goto exit;
947 }
948
949 uint16_t ancount = 0;
950 uint64_t dns_ans_stub[128 / 8];
951 struct ofpbuf dns_answer = OFPBUF_STUB_INITIALIZER(dns_ans_stub);
952
953 if (query_type == DNS_QUERY_TYPE_A || query_type == DNS_QUERY_TYPE_ANY) {
954 for (size_t i = 0; i < ip_addrs.n_ipv4_addrs; i++) {
955 /* Copy the answer section */
956 /* Format of the answer section is
957 * - NAME -> The domain name
958 * - TYPE -> 2 octets containing one of the RR type codes
959 * - CLASS -> 2 octets which specify the class of the data
960 * in the RDATA field.
961 * - TTL -> 32 bit unsigned int specifying the time
962 * interval (in secs) that the resource record
963 * may be cached before it should be discarded.
964 * - RDLENGTH -> 16 bit integer specifying the length of the
965 * RDATA field.
966 * - RDATA -> a variable length string of octets that
967 * describes the resource. In our case it will
968 * be IP address of the domain name.
969 */
970 ofpbuf_put(&dns_answer, in_queryname, idx);
971 put_be16(&dns_answer, htons(DNS_QUERY_TYPE_A));
972 put_be16(&dns_answer, htons(DNS_CLASS_IN));
973 put_be32(&dns_answer, htonl(DNS_DEFAULT_RR_TTL));
974 put_be16(&dns_answer, htons(sizeof(ovs_be32)));
975 put_be32(&dns_answer, ip_addrs.ipv4_addrs[i].addr);
976 ancount++;
977 }
978 }
979
980 if (query_type == DNS_QUERY_TYPE_AAAA ||
981 query_type == DNS_QUERY_TYPE_ANY) {
982 for (size_t i = 0; i < ip_addrs.n_ipv6_addrs; i++) {
983 ofpbuf_put(&dns_answer, in_queryname, idx);
984 put_be16(&dns_answer, htons(DNS_QUERY_TYPE_AAAA));
985 put_be16(&dns_answer, htons(DNS_CLASS_IN));
986 put_be32(&dns_answer, htonl(DNS_DEFAULT_RR_TTL));
987 const struct in6_addr *ip6 = &ip_addrs.ipv6_addrs[i].addr;
988 put_be16(&dns_answer, htons(sizeof *ip6));
989 ofpbuf_put(&dns_answer, ip6, sizeof *ip6);
990 ancount++;
991 }
992 }
993
994 destroy_lport_addresses(&ip_addrs);
995
996 if (!ancount) {
997 ofpbuf_uninit(&dns_answer);
998 goto exit;
999 }
1000
1001 uint16_t new_l4_size = ntohs(in_udp->udp_len) + dns_answer.size;
1002 size_t new_packet_size = pkt_in->l4_ofs + new_l4_size;
1003 struct dp_packet pkt_out;
1004 dp_packet_init(&pkt_out, new_packet_size);
1005 dp_packet_clear(&pkt_out);
1006 dp_packet_prealloc_tailroom(&pkt_out, new_packet_size);
1007 pkt_out_ptr = &pkt_out;
1008
1009 /* Copy the L2 and L3 headers from the pkt_in as they would remain same.*/
1010 dp_packet_put(
1011 &pkt_out, dp_packet_pull(pkt_in, pkt_in->l4_ofs), pkt_in->l4_ofs);
1012
1013 pkt_out.l2_5_ofs = pkt_in->l2_5_ofs;
1014 pkt_out.l2_pad_size = pkt_in->l2_pad_size;
1015 pkt_out.l3_ofs = pkt_in->l3_ofs;
1016 pkt_out.l4_ofs = pkt_in->l4_ofs;
1017
1018 struct udp_header *out_udp = dp_packet_put(
1019 &pkt_out, dp_packet_pull(pkt_in, UDP_HEADER_LEN), UDP_HEADER_LEN);
1020
1021 /* Copy the DNS header. */
1022 struct dns_header *out_dns_header = dp_packet_put(
1023 &pkt_out, dp_packet_pull(pkt_in, sizeof *out_dns_header),
1024 sizeof *out_dns_header);
1025
1026 /* Set the response bit to 1 in the flags. */
1027 out_dns_header->lo_flag |= 0x80;
1028
1029 /* Set the answer RR. */
1030 out_dns_header->ancount = htons(ancount);
1031
1032 /* Copy the Query section. */
1033 dp_packet_put(&pkt_out, dp_packet_data(pkt_in), dp_packet_size(pkt_in));
1034
1035 /* Copy the answer sections. */
1036 dp_packet_put(&pkt_out, dns_answer.data, dns_answer.size);
1037 ofpbuf_uninit(&dns_answer);
1038
1039 out_udp->udp_len = htons(new_l4_size);
1040 out_udp->udp_csum = 0;
1041
1042 struct eth_header *eth = dp_packet_data(&pkt_out);
1043 if (eth->eth_type == htons(ETH_TYPE_IP)) {
1044 struct ip_header *out_ip = dp_packet_l3(&pkt_out);
1045 out_ip->ip_tot_len = htons(pkt_out.l4_ofs - pkt_out.l3_ofs
1046 + new_l4_size);
1047 /* Checksum needs to be initialized to zero. */
1048 out_ip->ip_csum = 0;
1049 out_ip->ip_csum = csum(out_ip, sizeof *out_ip);
1050 } else {
1051 struct ovs_16aligned_ip6_hdr *nh = dp_packet_l3(&pkt_out);
1052 nh->ip6_plen = htons(new_l4_size);
1053
1054 /* IPv6 needs UDP checksum calculated */
1055 uint32_t csum;
1056 csum = packet_csum_pseudoheader6(nh);
1057 csum = csum_continue(csum, out_udp, dp_packet_size(&pkt_out) -
1058 ((const unsigned char *)out_udp -
1059 (const unsigned char *)eth));
1060 out_udp->udp_csum = csum_finish(csum);
1061 if (!out_udp->udp_csum) {
1062 out_udp->udp_csum = htons(0xffff);
1063 }
1064 }
1065
1066 pin->packet = dp_packet_data(&pkt_out);
1067 pin->packet_len = dp_packet_size(&pkt_out);
1068
1069 success = 1;
1070 exit:
1071 if (!ofperr) {
1072 union mf_subvalue sv;
1073 sv.u8_val = success;
1074 mf_write_subfield(&dst, &sv, &pin->flow_metadata);
1075 }
1076 queue_msg(ofputil_encode_resume(pin, continuation, proto));
1077 dp_packet_uninit(pkt_out_ptr);
1078 }
1079
1080 static void
1081 process_packet_in(const struct ofp_header *msg, struct controller_ctx *ctx)
1082 {
1083 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1084
1085 struct ofputil_packet_in pin;
1086 struct ofpbuf continuation;
1087 enum ofperr error = ofputil_decode_packet_in(msg, true, NULL, NULL, &pin,
1088 NULL, NULL, &continuation);
1089
1090 if (error) {
1091 VLOG_WARN_RL(&rl, "error decoding packet-in: %s",
1092 ofperr_to_string(error));
1093 return;
1094 }
1095 if (pin.reason != OFPR_ACTION) {
1096 return;
1097 }
1098
1099 struct ofpbuf userdata = ofpbuf_const_initializer(pin.userdata,
1100 pin.userdata_len);
1101 const struct action_header *ah = ofpbuf_pull(&userdata, sizeof *ah);
1102 if (!ah) {
1103 VLOG_WARN_RL(&rl, "packet-in userdata lacks action header");
1104 return;
1105 }
1106
1107 struct dp_packet packet;
1108 dp_packet_use_const(&packet, pin.packet, pin.packet_len);
1109 struct flow headers;
1110 flow_extract(&packet, &headers);
1111
1112 switch (ntohl(ah->opcode)) {
1113 case ACTION_OPCODE_ARP:
1114 pinctrl_handle_arp(&headers, &pin.flow_metadata, &userdata);
1115 break;
1116
1117 case ACTION_OPCODE_PUT_ARP:
1118 pinctrl_handle_put_mac_binding(&pin.flow_metadata.flow, &headers,
1119 true);
1120 break;
1121
1122 case ACTION_OPCODE_PUT_DHCP_OPTS:
1123 pinctrl_handle_put_dhcp_opts(&packet, &pin, &userdata, &continuation);
1124 break;
1125
1126 case ACTION_OPCODE_ND_NA:
1127 pinctrl_handle_nd_na(&headers, &pin.flow_metadata, &userdata);
1128 break;
1129
1130 case ACTION_OPCODE_PUT_ND:
1131 pinctrl_handle_put_mac_binding(&pin.flow_metadata.flow, &headers,
1132 false);
1133 break;
1134
1135 case ACTION_OPCODE_PUT_DHCPV6_OPTS:
1136 pinctrl_handle_put_dhcpv6_opts(&packet, &pin, &userdata,
1137 &continuation);
1138 break;
1139
1140 case ACTION_OPCODE_DNS_LOOKUP:
1141 pinctrl_handle_dns_lookup(&packet, &pin, &userdata, &continuation, ctx);
1142 break;
1143
1144 case ACTION_OPCODE_LOG:
1145 handle_acl_log(&headers, &userdata);
1146 break;
1147
1148 case ACTION_OPCODE_PUT_ND_RA_OPTS:
1149 pinctrl_handle_put_nd_ra_opts(&headers, &packet, &pin, &userdata,
1150 &continuation);
1151 break;
1152
1153 case ACTION_OPCODE_ND_NS:
1154 pinctrl_handle_nd_ns(&headers, &pin.flow_metadata, &userdata);
1155 break;
1156
1157 case ACTION_OPCODE_ICMP4:
1158 pinctrl_handle_icmp4(&headers, &pin.flow_metadata, &userdata);
1159 break;
1160
1161 case ACTION_OPCODE_TCP_RESET:
1162 pinctrl_handle_tcp_reset(&headers, &packet, &pin.flow_metadata,
1163 &userdata);
1164 break;
1165
1166 default:
1167 VLOG_WARN_RL(&rl, "unrecognized packet-in opcode %"PRIu32,
1168 ntohl(ah->opcode));
1169 break;
1170 }
1171 }
1172
1173 static void
1174 pinctrl_recv(const struct ofp_header *oh, enum ofptype type,
1175 struct controller_ctx *ctx)
1176 {
1177 if (type == OFPTYPE_ECHO_REQUEST) {
1178 queue_msg(ofputil_encode_echo_reply(oh));
1179 } else if (type == OFPTYPE_GET_CONFIG_REPLY) {
1180 /* Enable asynchronous messages */
1181 struct ofputil_switch_config config;
1182
1183 ofputil_decode_get_config_reply(oh, &config);
1184 config.miss_send_len = UINT16_MAX;
1185 set_switch_config(swconn, &config);
1186 } else if (type == OFPTYPE_PACKET_IN) {
1187 process_packet_in(oh, ctx);
1188 } else {
1189 if (VLOG_IS_DBG_ENABLED()) {
1190 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30, 300);
1191
1192 char *s = ofp_to_string(oh, ntohs(oh->length), NULL, NULL, 2);
1193
1194 VLOG_DBG_RL(&rl, "OpenFlow packet ignored: %s", s);
1195 free(s);
1196 }
1197 }
1198 }
1199
1200 void
1201 pinctrl_run(struct controller_ctx *ctx,
1202 const struct ovsrec_bridge *br_int,
1203 const struct sbrec_chassis *chassis,
1204 const struct chassis_index *chassis_index,
1205 struct hmap *local_datapaths,
1206 struct sset *active_tunnels)
1207 {
1208 char *target = xasprintf("unix:%s/%s.mgmt", ovs_rundir(), br_int->name);
1209 if (strcmp(target, rconn_get_target(swconn))) {
1210 VLOG_INFO("%s: connecting to switch", target);
1211 rconn_connect(swconn, target, target);
1212 }
1213 free(target);
1214
1215 rconn_run(swconn);
1216
1217 if (!rconn_is_connected(swconn)) {
1218 return;
1219 }
1220
1221 if (conn_seq_no != rconn_get_connection_seqno(swconn)) {
1222 pinctrl_setup();
1223 conn_seq_no = rconn_get_connection_seqno(swconn);
1224 flush_put_mac_bindings();
1225 }
1226
1227 /* Process a limited number of messages per call. */
1228 for (int i = 0; i < 50; i++) {
1229 struct ofpbuf *msg = rconn_recv(swconn);
1230 if (!msg) {
1231 break;
1232 }
1233
1234 const struct ofp_header *oh = msg->data;
1235 enum ofptype type;
1236
1237 ofptype_decode(&type, oh);
1238 pinctrl_recv(oh, type, ctx);
1239 ofpbuf_delete(msg);
1240 }
1241
1242 run_put_mac_bindings(ctx);
1243 send_garp_run(ctx, br_int, chassis, chassis_index, local_datapaths,
1244 active_tunnels);
1245 send_ipv6_ras(ctx, local_datapaths);
1246 }
1247
1248 /* Table of ipv6_ra_state structures, keyed on logical port name */
1249 static struct shash ipv6_ras;
1250
1251 /* Next IPV6 RA in seconds. */
1252 static long long int send_ipv6_ra_time;
1253
1254 struct ipv6_ra_config {
1255 time_t min_interval;
1256 time_t max_interval;
1257 struct eth_addr eth_src;
1258 struct eth_addr eth_dst;
1259 struct in6_addr ipv6_src;
1260 struct in6_addr ipv6_dst;
1261 int32_t mtu;
1262 uint8_t mo_flags; /* Managed/Other flags for RAs */
1263 uint8_t la_flags; /* On-link/autonomous flags for address prefixes */
1264 struct lport_addresses prefixes;
1265 };
1266
1267 struct ipv6_ra_state {
1268 long long int next_announce;
1269 struct ipv6_ra_config *config;
1270 int64_t port_key;
1271 int64_t metadata;
1272 bool delete_me;
1273 };
1274
1275 static void
1276 init_ipv6_ras(void)
1277 {
1278 shash_init(&ipv6_ras);
1279 send_ipv6_ra_time = LLONG_MAX;
1280 }
1281
1282 static void
1283 ipv6_ra_config_delete(struct ipv6_ra_config *config)
1284 {
1285 if (config) {
1286 destroy_lport_addresses(&config->prefixes);
1287 free(config);
1288 }
1289 }
1290
1291 static void
1292 ipv6_ra_delete(struct ipv6_ra_state *ra)
1293 {
1294 if (ra) {
1295 ipv6_ra_config_delete(ra->config);
1296 free(ra);
1297 }
1298 }
1299
1300 static void
1301 destroy_ipv6_ras(void)
1302 {
1303 struct shash_node *iter, *next;
1304 SHASH_FOR_EACH_SAFE (iter, next, &ipv6_ras) {
1305 struct ipv6_ra_state *ra = iter->data;
1306 ipv6_ra_delete(ra);
1307 shash_delete(&ipv6_ras, iter);
1308 }
1309 shash_destroy(&ipv6_ras);
1310 }
1311
1312 static struct ipv6_ra_config *
1313 ipv6_ra_update_config(const struct sbrec_port_binding *pb)
1314 {
1315 struct ipv6_ra_config *config;
1316
1317 config = xzalloc(sizeof *config);
1318
1319 config->max_interval = smap_get_int(&pb->options, "ipv6_ra_max_interval",
1320 ND_RA_MAX_INTERVAL_DEFAULT);
1321 config->min_interval = smap_get_int(&pb->options, "ipv6_ra_min_interval",
1322 nd_ra_min_interval_default(config->max_interval));
1323 config->mtu = smap_get_int(&pb->options, "ipv6_ra_mtu", ND_MTU_DEFAULT);
1324 config->la_flags = ND_PREFIX_ON_LINK;
1325
1326 const char *address_mode = smap_get(&pb->options, "ipv6_ra_address_mode");
1327 if (!address_mode) {
1328 VLOG_WARN("No address mode specified");
1329 goto fail;
1330 }
1331 if (!strcmp(address_mode, "dhcpv6_stateless")) {
1332 config->mo_flags = IPV6_ND_RA_FLAG_OTHER_ADDR_CONFIG;
1333 } else if (!strcmp(address_mode, "dhcpv6_stateful")) {
1334 config->mo_flags = IPV6_ND_RA_FLAG_MANAGED_ADDR_CONFIG;
1335 } else if (!strcmp(address_mode, "slaac")) {
1336 config->la_flags |= ND_PREFIX_AUTONOMOUS_ADDRESS;
1337 } else {
1338 VLOG_WARN("Invalid address mode %s", address_mode);
1339 goto fail;
1340 }
1341
1342 const char *prefixes = smap_get(&pb->options, "ipv6_ra_prefixes");
1343 if (prefixes && !extract_ip_addresses(prefixes, &config->prefixes)) {
1344 VLOG_WARN("Invalid IPv6 prefixes: %s", prefixes);
1345 goto fail;
1346 }
1347
1348 /* All nodes multicast addresses */
1349 config->eth_dst = (struct eth_addr) ETH_ADDR_C(33,33,00,00,00,01);
1350 ipv6_parse("ff02::1", &config->ipv6_dst);
1351
1352 const char *eth_addr = smap_get(&pb->options, "ipv6_ra_src_eth");
1353 if (!eth_addr || !eth_addr_from_string(eth_addr, &config->eth_src)) {
1354 VLOG_WARN("Invalid ethernet source %s", eth_addr);
1355 goto fail;
1356 }
1357 const char *ip_addr = smap_get(&pb->options, "ipv6_ra_src_addr");
1358 if (!ip_addr || !ipv6_parse(ip_addr, &config->ipv6_src)) {
1359 VLOG_WARN("Invalid IP source %s", ip_addr);
1360 goto fail;
1361 }
1362
1363 return config;
1364
1365 fail:
1366 ipv6_ra_config_delete(config);
1367 return NULL;
1368 }
1369
1370 static long long int
1371 ipv6_ra_calc_next_announce(time_t min_interval, time_t max_interval)
1372 {
1373 long long int min_interval_ms = min_interval * 1000LL;
1374 long long int max_interval_ms = max_interval * 1000LL;
1375
1376 return time_msec() + min_interval_ms +
1377 random_range(max_interval_ms - min_interval_ms);
1378 }
1379
1380 static void
1381 put_load(uint64_t value, enum mf_field_id dst, int ofs, int n_bits,
1382 struct ofpbuf *ofpacts)
1383 {
1384 struct ofpact_set_field *sf = ofpact_put_set_field(ofpacts,
1385 mf_from_id(dst), NULL,
1386 NULL);
1387 ovs_be64 n_value = htonll(value);
1388 bitwise_copy(&n_value, 8, 0, sf->value, sf->field->n_bytes, ofs, n_bits);
1389 bitwise_one(ofpact_set_field_mask(sf), sf->field->n_bytes, ofs, n_bits);
1390 }
1391
1392 static long long int
1393 ipv6_ra_send(struct ipv6_ra_state *ra)
1394 {
1395 if (time_msec() < ra->next_announce) {
1396 return ra->next_announce;
1397 }
1398
1399 uint64_t packet_stub[128 / 8];
1400 struct dp_packet packet;
1401 dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
1402 compose_nd_ra(&packet, ra->config->eth_src, ra->config->eth_dst,
1403 &ra->config->ipv6_src, &ra->config->ipv6_dst,
1404 255, ra->config->mo_flags, 0, 0, 0, ra->config->mtu);
1405
1406 for (int i = 0; i < ra->config->prefixes.n_ipv6_addrs; i++) {
1407 ovs_be128 addr;
1408 memcpy(&addr, &ra->config->prefixes.ipv6_addrs[i].addr, sizeof addr);
1409 packet_put_ra_prefix_opt(&packet,
1410 ra->config->prefixes.ipv6_addrs[i].plen,
1411 ra->config->la_flags, htonl(IPV6_ND_RA_OPT_PREFIX_VALID_LIFETIME),
1412 htonl(IPV6_ND_RA_OPT_PREFIX_PREFERRED_LIFETIME), addr);
1413 }
1414
1415 uint64_t ofpacts_stub[4096 / 8];
1416 struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
1417
1418 /* Set MFF_LOG_DATAPATH and MFF_LOG_INPORT. */
1419 uint32_t dp_key = ra->metadata;
1420 uint32_t port_key = ra->port_key;
1421 put_load(dp_key, MFF_LOG_DATAPATH, 0, 64, &ofpacts);
1422 put_load(port_key, MFF_LOG_INPORT, 0, 32, &ofpacts);
1423 put_load(1, MFF_LOG_FLAGS, MLF_LOCAL_ONLY_BIT, 1, &ofpacts);
1424 struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&ofpacts);
1425 resubmit->in_port = OFPP_CONTROLLER;
1426 resubmit->table_id = OFTABLE_LOG_INGRESS_PIPELINE;
1427
1428 struct ofputil_packet_out po = {
1429 .packet = dp_packet_data(&packet),
1430 .packet_len = dp_packet_size(&packet),
1431 .buffer_id = UINT32_MAX,
1432 .ofpacts = ofpacts.data,
1433 .ofpacts_len = ofpacts.size,
1434 };
1435
1436 match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
1437 enum ofp_version version = rconn_get_version(swconn);
1438 enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
1439 queue_msg(ofputil_encode_packet_out(&po, proto));
1440 dp_packet_uninit(&packet);
1441 ofpbuf_uninit(&ofpacts);
1442
1443 ra->next_announce = ipv6_ra_calc_next_announce(ra->config->min_interval,
1444 ra->config->max_interval);
1445
1446 return ra->next_announce;
1447 }
1448
1449 static void
1450 ipv6_ra_wait(void)
1451 {
1452 poll_timer_wait_until(send_ipv6_ra_time);
1453 }
1454
1455 static void
1456 send_ipv6_ras(const struct controller_ctx *ctx, struct hmap *local_datapaths)
1457 {
1458 struct shash_node *iter, *iter_next;
1459
1460 send_ipv6_ra_time = LLONG_MAX;
1461
1462 SHASH_FOR_EACH (iter, &ipv6_ras) {
1463 struct ipv6_ra_state *ra = iter->data;
1464 ra->delete_me = true;
1465 }
1466
1467 const struct local_datapath *ld;
1468 HMAP_FOR_EACH (ld, hmap_node, local_datapaths) {
1469 struct sbrec_port_binding *lpval;
1470 const struct sbrec_port_binding *pb;
1471 struct ovsdb_idl_index_cursor cursor;
1472
1473 lpval = sbrec_port_binding_index_init_row(ctx->ovnsb_idl,
1474 &sbrec_table_port_binding);
1475 sbrec_port_binding_index_set_datapath(lpval, ld->datapath);
1476 ovsdb_idl_initialize_cursor(ctx->ovnsb_idl, &sbrec_table_port_binding,
1477 "lport-by-datapath", &cursor);
1478 SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, &cursor, lpval) {
1479 if (!smap_get_bool(&pb->options, "ipv6_ra_send_periodic", false)) {
1480 continue;
1481 }
1482
1483 const char *peer_s = smap_get(&pb->options, "peer");
1484 if (!peer_s) {
1485 continue;
1486 }
1487
1488 const struct sbrec_port_binding *peer
1489 = lport_lookup_by_name(ctx->ovnsb_idl, peer_s);
1490 if (!peer) {
1491 continue;
1492 }
1493
1494 struct ipv6_ra_config *config = ipv6_ra_update_config(pb);
1495 if (!config) {
1496 continue;
1497 }
1498
1499 struct ipv6_ra_state *ra
1500 = shash_find_data(&ipv6_ras, pb->logical_port);
1501 if (!ra) {
1502 ra = xzalloc(sizeof *ra);
1503 ra->config = config;
1504 ra->next_announce = ipv6_ra_calc_next_announce(
1505 ra->config->min_interval,
1506 ra->config->max_interval);
1507 shash_add(&ipv6_ras, pb->logical_port, ra);
1508 } else {
1509 ipv6_ra_config_delete(ra->config);
1510 ra->config = config;
1511 }
1512
1513 /* Peer is the logical switch port that the logical
1514 * router port is connected to. The RA is injected
1515 * into that logical switch port.
1516 */
1517 ra->port_key = peer->tunnel_key;
1518 ra->metadata = peer->datapath->tunnel_key;
1519 ra->delete_me = false;
1520
1521 long long int next_ra = ipv6_ra_send(ra);
1522 if (send_ipv6_ra_time > next_ra) {
1523 send_ipv6_ra_time = next_ra;
1524 }
1525 }
1526 sbrec_port_binding_index_destroy_row(lpval);
1527 }
1528
1529 /* Remove those that are no longer in the SB database */
1530 SHASH_FOR_EACH_SAFE (iter, iter_next, &ipv6_ras) {
1531 struct ipv6_ra_state *ra = iter->data;
1532 if (ra->delete_me) {
1533 shash_delete(&ipv6_ras, iter);
1534 ipv6_ra_delete(ra);
1535 }
1536 }
1537 }
1538
1539 void
1540 pinctrl_wait(struct controller_ctx *ctx)
1541 {
1542 wait_put_mac_bindings(ctx);
1543 rconn_run_wait(swconn);
1544 rconn_recv_wait(swconn);
1545 send_garp_wait();
1546 ipv6_ra_wait();
1547 }
1548
1549 void
1550 pinctrl_destroy(void)
1551 {
1552 rconn_destroy(swconn);
1553 destroy_put_mac_bindings();
1554 destroy_send_garps();
1555 destroy_ipv6_ras();
1556 }
1557 \f
1558 /* Implementation of the "put_arp" and "put_nd" OVN actions. These
1559 * actions send a packet to ovn-controller, using the flow as an API
1560 * (see actions.h for details). This code implements the actions by
1561 * updating the MAC_Binding table in the southbound database.
1562 *
1563 * This code could be a lot simpler if the database could always be updated,
1564 * but in fact we can only update it when ctx->ovnsb_idl_txn is nonnull. Thus,
1565 * we buffer up a few put_mac_bindings (but we don't keep them longer
1566 * than 1 second) and apply them whenever a database transaction is
1567 * available. */
1568
1569 /* Buffered "put_mac_binding" operation. */
1570 struct put_mac_binding {
1571 struct hmap_node hmap_node; /* In 'put_mac_bindings'. */
1572
1573 long long int timestamp; /* In milliseconds. */
1574
1575 /* Key. */
1576 uint32_t dp_key;
1577 uint32_t port_key;
1578 char ip_s[INET6_ADDRSTRLEN + 1];
1579
1580 /* Value. */
1581 struct eth_addr mac;
1582 };
1583
1584 /* Contains "struct put_mac_binding"s. */
1585 static struct hmap put_mac_bindings;
1586
1587 static void
1588 init_put_mac_bindings(void)
1589 {
1590 hmap_init(&put_mac_bindings);
1591 }
1592
1593 static void
1594 destroy_put_mac_bindings(void)
1595 {
1596 flush_put_mac_bindings();
1597 hmap_destroy(&put_mac_bindings);
1598 }
1599
1600 static struct put_mac_binding *
1601 pinctrl_find_put_mac_binding(uint32_t dp_key, uint32_t port_key,
1602 const char *ip_s, uint32_t hash)
1603 {
1604 struct put_mac_binding *pa;
1605 HMAP_FOR_EACH_WITH_HASH (pa, hmap_node, hash, &put_mac_bindings) {
1606 if (pa->dp_key == dp_key
1607 && pa->port_key == port_key
1608 && !strcmp(pa->ip_s, ip_s)) {
1609 return pa;
1610 }
1611 }
1612 return NULL;
1613 }
1614
1615 static void
1616 pinctrl_handle_put_mac_binding(const struct flow *md,
1617 const struct flow *headers, bool is_arp)
1618 {
1619 uint32_t dp_key = ntohll(md->metadata);
1620 uint32_t port_key = md->regs[MFF_LOG_INPORT - MFF_REG0];
1621 char ip_s[INET6_ADDRSTRLEN];
1622
1623 if (is_arp) {
1624 ovs_be32 ip = htonl(md->regs[0]);
1625 inet_ntop(AF_INET, &ip, ip_s, sizeof(ip_s));
1626 } else {
1627 ovs_be128 ip6 = hton128(flow_get_xxreg(md, 0));
1628 inet_ntop(AF_INET6, &ip6, ip_s, sizeof(ip_s));
1629 }
1630 uint32_t hash = hash_string(ip_s, hash_2words(dp_key, port_key));
1631 struct put_mac_binding *pmb
1632 = pinctrl_find_put_mac_binding(dp_key, port_key, ip_s, hash);
1633 if (!pmb) {
1634 if (hmap_count(&put_mac_bindings) >= 1000) {
1635 COVERAGE_INC(pinctrl_drop_put_mac_binding);
1636 return;
1637 }
1638
1639 pmb = xmalloc(sizeof *pmb);
1640 hmap_insert(&put_mac_bindings, &pmb->hmap_node, hash);
1641 pmb->dp_key = dp_key;
1642 pmb->port_key = port_key;
1643 ovs_strlcpy_arrays(pmb->ip_s, ip_s);
1644 }
1645 pmb->timestamp = time_msec();
1646 pmb->mac = headers->dl_src;
1647 }
1648
1649 static void
1650 run_put_mac_binding(struct controller_ctx *ctx,
1651 const struct put_mac_binding *pmb)
1652 {
1653 if (time_msec() > pmb->timestamp + 1000) {
1654 return;
1655 }
1656
1657 /* Convert logical datapath and logical port key into lport. */
1658 const struct sbrec_port_binding *pb
1659 = lport_lookup_by_key(ctx->ovnsb_idl, pmb->dp_key, pmb->port_key);
1660 if (!pb) {
1661 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1662
1663 VLOG_WARN_RL(&rl, "unknown logical port with datapath %"PRIu32" "
1664 "and port %"PRIu32, pmb->dp_key, pmb->port_key);
1665 return;
1666 }
1667
1668 /* Convert ethernet argument to string form for database. */
1669 char mac_string[ETH_ADDR_STRLEN + 1];
1670 snprintf(mac_string, sizeof mac_string,
1671 ETH_ADDR_FMT, ETH_ADDR_ARGS(pmb->mac));
1672
1673 /* Check for an update an existing IP-MAC binding for this logical
1674 * port.
1675 *
1676 * XXX This is not very efficient. */
1677 const struct sbrec_mac_binding *b;
1678 SBREC_MAC_BINDING_FOR_EACH (b, ctx->ovnsb_idl) {
1679 if (!strcmp(b->logical_port, pb->logical_port)
1680 && !strcmp(b->ip, pmb->ip_s)) {
1681 if (strcmp(b->mac, mac_string)) {
1682 sbrec_mac_binding_set_mac(b, mac_string);
1683 }
1684 return;
1685 }
1686 }
1687
1688 /* Add new IP-MAC binding for this logical port. */
1689 b = sbrec_mac_binding_insert(ctx->ovnsb_idl_txn);
1690 sbrec_mac_binding_set_logical_port(b, pb->logical_port);
1691 sbrec_mac_binding_set_ip(b, pmb->ip_s);
1692 sbrec_mac_binding_set_mac(b, mac_string);
1693 sbrec_mac_binding_set_datapath(b, pb->datapath);
1694 }
1695
1696 static void
1697 run_put_mac_bindings(struct controller_ctx *ctx)
1698 {
1699 if (!ctx->ovnsb_idl_txn) {
1700 return;
1701 }
1702
1703 const struct put_mac_binding *pmb;
1704 HMAP_FOR_EACH (pmb, hmap_node, &put_mac_bindings) {
1705 run_put_mac_binding(ctx, pmb);
1706 }
1707 flush_put_mac_bindings();
1708 }
1709
1710 static void
1711 wait_put_mac_bindings(struct controller_ctx *ctx)
1712 {
1713 if (ctx->ovnsb_idl_txn && !hmap_is_empty(&put_mac_bindings)) {
1714 poll_immediate_wake();
1715 }
1716 }
1717
1718 static void
1719 flush_put_mac_bindings(void)
1720 {
1721 struct put_mac_binding *pmb;
1722 HMAP_FOR_EACH_POP (pmb, hmap_node, &put_mac_bindings) {
1723 free(pmb);
1724 }
1725 }
1726 \f
1727 /*
1728 * Send gratuitous ARP for vif on localnet.
1729 *
1730 * When a new vif on localnet is added, gratuitous ARPs are sent announcing
1731 * the port's mac,ip mapping. On localnet, such announcements are needed for
1732 * switches and routers on the broadcast segment to update their port-mac
1733 * and ARP tables.
1734 */
1735 struct garp_data {
1736 struct eth_addr ea; /* Ethernet address of port. */
1737 ovs_be32 ipv4; /* Ipv4 address of port. */
1738 long long int announce_time; /* Next announcement in ms. */
1739 int backoff; /* Backoff for the next announcement. */
1740 ofp_port_t ofport; /* ofport used to output this GARP. */
1741 int tag; /* VLAN tag of this GARP packet, or -1. */
1742 };
1743
1744 /* Contains GARPs to be sent. */
1745 static struct shash send_garp_data;
1746
1747 /* Next GARP announcement in ms. */
1748 static long long int send_garp_time;
1749
1750 static void
1751 init_send_garps(void)
1752 {
1753 shash_init(&send_garp_data);
1754 send_garp_time = LLONG_MAX;
1755 }
1756
1757 static void
1758 destroy_send_garps(void)
1759 {
1760 shash_destroy_free_data(&send_garp_data);
1761 }
1762
1763 static void
1764 add_garp(const char *name, ofp_port_t ofport, int tag,
1765 const struct eth_addr ea, ovs_be32 ip)
1766 {
1767 struct garp_data *garp = xmalloc(sizeof *garp);
1768 garp->ea = ea;
1769 garp->ipv4 = ip;
1770 garp->announce_time = time_msec() + 1000;
1771 garp->backoff = 1;
1772 garp->ofport = ofport;
1773 garp->tag = tag;
1774 shash_add(&send_garp_data, name, garp);
1775 }
1776
1777 /* Add or update a vif for which GARPs need to be announced. */
1778 static void
1779 send_garp_update(const struct sbrec_port_binding *binding_rec,
1780 struct simap *localnet_ofports, struct hmap *local_datapaths,
1781 struct shash *nat_addresses)
1782 {
1783 /* Find the localnet ofport to send this GARP. */
1784 struct local_datapath *ld
1785 = get_local_datapath(local_datapaths,
1786 binding_rec->datapath->tunnel_key);
1787 if (!ld || !ld->localnet_port) {
1788 return;
1789 }
1790 ofp_port_t ofport = u16_to_ofp(simap_get(localnet_ofports,
1791 ld->localnet_port->logical_port));
1792 int tag = ld->localnet_port->n_tag ? *ld->localnet_port->tag : -1;
1793
1794 volatile struct garp_data *garp = NULL;
1795 /* Update GARP for NAT IP if it exists. Consider port bindings with type
1796 * "l3gateway" for logical switch ports attached to gateway routers, and
1797 * port bindings with type "patch" for logical switch ports attached to
1798 * distributed gateway ports. */
1799 if (!strcmp(binding_rec->type, "l3gateway")
1800 || !strcmp(binding_rec->type, "patch")) {
1801 struct lport_addresses *laddrs = NULL;
1802 while ((laddrs = shash_find_and_delete(nat_addresses,
1803 binding_rec->logical_port))) {
1804 int i;
1805 for (i = 0; i < laddrs->n_ipv4_addrs; i++) {
1806 char *name = xasprintf("%s-%s", binding_rec->logical_port,
1807 laddrs->ipv4_addrs[i].addr_s);
1808 garp = shash_find_data(&send_garp_data, name);
1809 if (garp) {
1810 garp->ofport = ofport;
1811 garp->tag = tag;
1812 } else {
1813 add_garp(name, ofport, tag, laddrs->ea,
1814 laddrs->ipv4_addrs[i].addr);
1815 }
1816 free(name);
1817 }
1818 destroy_lport_addresses(laddrs);
1819 free(laddrs);
1820 }
1821 return;
1822 }
1823
1824 /* Update GARP for vif if it exists. */
1825 garp = shash_find_data(&send_garp_data, binding_rec->logical_port);
1826 if (garp) {
1827 garp->ofport = ofport;
1828 return;
1829 }
1830
1831 /* Add GARP for new vif. */
1832 int i;
1833 for (i = 0; i < binding_rec->n_mac; i++) {
1834 struct lport_addresses laddrs;
1835 if (!extract_lsp_addresses(binding_rec->mac[i], &laddrs)
1836 || !laddrs.n_ipv4_addrs) {
1837 continue;
1838 }
1839
1840 add_garp(binding_rec->logical_port, ofport, tag,
1841 laddrs.ea, laddrs.ipv4_addrs[0].addr);
1842
1843 destroy_lport_addresses(&laddrs);
1844 break;
1845 }
1846 }
1847
1848 /* Remove a vif from GARP announcements. */
1849 static void
1850 send_garp_delete(const char *lport)
1851 {
1852 struct garp_data *garp = shash_find_and_delete(&send_garp_data, lport);
1853 free(garp);
1854 }
1855
1856 static long long int
1857 send_garp(struct garp_data *garp, long long int current_time)
1858 {
1859 if (current_time < garp->announce_time) {
1860 return garp->announce_time;
1861 }
1862
1863 /* Compose a GARP request packet. */
1864 uint64_t packet_stub[128 / 8];
1865 struct dp_packet packet;
1866 dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
1867 compose_arp(&packet, ARP_OP_REQUEST, garp->ea, eth_addr_zero,
1868 true, garp->ipv4, garp->ipv4);
1869
1870 /* Compose a GARP request packet's vlan if exist. */
1871 if (garp->tag >= 0) {
1872 eth_push_vlan(&packet, htons(ETH_TYPE_VLAN), htons(garp->tag));
1873 }
1874
1875 /* Compose actions. The garp request is output on localnet ofport. */
1876 uint64_t ofpacts_stub[4096 / 8];
1877 struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
1878 enum ofp_version version = rconn_get_version(swconn);
1879 ofpact_put_OUTPUT(&ofpacts)->port = garp->ofport;
1880
1881 struct ofputil_packet_out po = {
1882 .packet = dp_packet_data(&packet),
1883 .packet_len = dp_packet_size(&packet),
1884 .buffer_id = UINT32_MAX,
1885 .ofpacts = ofpacts.data,
1886 .ofpacts_len = ofpacts.size,
1887 };
1888 match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
1889 enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
1890 queue_msg(ofputil_encode_packet_out(&po, proto));
1891 dp_packet_uninit(&packet);
1892 ofpbuf_uninit(&ofpacts);
1893
1894 /* Set the next announcement. At most 5 announcements are sent for a
1895 * vif. */
1896 if (garp->backoff < 16) {
1897 garp->backoff *= 2;
1898 garp->announce_time = current_time + garp->backoff * 1000;
1899 } else {
1900 garp->announce_time = LLONG_MAX;
1901 }
1902 return garp->announce_time;
1903 }
1904
1905 /* Get localnet vifs, local l3gw ports and ofport for localnet patch ports. */
1906 static void
1907 get_localnet_vifs_l3gwports(struct controller_ctx *ctx,
1908 const struct ovsrec_bridge *br_int,
1909 const struct sbrec_chassis *chassis,
1910 struct hmap *local_datapaths,
1911 struct sset *localnet_vifs,
1912 struct simap *localnet_ofports,
1913 struct sset *local_l3gw_ports)
1914 {
1915 for (int i = 0; i < br_int->n_ports; i++) {
1916 const struct ovsrec_port *port_rec = br_int->ports[i];
1917 if (!strcmp(port_rec->name, br_int->name)) {
1918 continue;
1919 }
1920 const char *chassis_id = smap_get(&port_rec->external_ids,
1921 "ovn-chassis-id");
1922 if (chassis_id && !strcmp(chassis_id, chassis->name)) {
1923 continue;
1924 }
1925 const char *localnet = smap_get(&port_rec->external_ids,
1926 "ovn-localnet-port");
1927 for (int j = 0; j < port_rec->n_interfaces; j++) {
1928 const struct ovsrec_interface *iface_rec = port_rec->interfaces[j];
1929 if (!iface_rec->n_ofport) {
1930 continue;
1931 }
1932 /* Get localnet port with its ofport. */
1933 if (localnet) {
1934 int64_t ofport = iface_rec->ofport[0];
1935 if (ofport < 1 || ofport > ofp_to_u16(OFPP_MAX)) {
1936 continue;
1937 }
1938 simap_put(localnet_ofports, localnet, ofport);
1939 continue;
1940 }
1941 /* Get localnet vif. */
1942 const char *iface_id = smap_get(&iface_rec->external_ids,
1943 "iface-id");
1944 if (!iface_id) {
1945 continue;
1946 }
1947 const struct sbrec_port_binding *pb
1948 = lport_lookup_by_name(ctx->ovnsb_idl, iface_id);
1949 if (!pb) {
1950 continue;
1951 }
1952 struct local_datapath *ld
1953 = get_local_datapath(local_datapaths,
1954 pb->datapath->tunnel_key);
1955 if (ld && ld->localnet_port) {
1956 sset_add(localnet_vifs, iface_id);
1957 }
1958 }
1959 }
1960
1961 const struct local_datapath *ld;
1962 struct ovsdb_idl_index_cursor cursor;
1963 struct sbrec_port_binding *lpval;
1964 lpval = sbrec_port_binding_index_init_row(ctx->ovnsb_idl,
1965 &sbrec_table_port_binding);
1966 ovsdb_idl_initialize_cursor(ctx->ovnsb_idl, &sbrec_table_port_binding,
1967 "lport-by-datapath", &cursor);
1968 HMAP_FOR_EACH (ld, hmap_node, local_datapaths) {
1969 const struct sbrec_port_binding *pb;
1970
1971 if (!ld->localnet_port) {
1972 continue;
1973 }
1974
1975 /* Get l3gw ports. Consider port bindings with type "l3gateway"
1976 * that connect to gateway routers (if local), and consider port
1977 * bindings of type "patch" since they might connect to
1978 * distributed gateway ports with NAT addresses. */
1979
1980 sbrec_port_binding_index_set_datapath(lpval, ld->datapath);
1981
1982 SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, &cursor, lpval) {
1983 if ((ld->has_local_l3gateway && !strcmp(pb->type, "l3gateway"))
1984 || !strcmp(pb->type, "patch")) {
1985 sset_add(local_l3gw_ports, pb->logical_port);
1986 }
1987 }
1988 }
1989 sbrec_port_binding_index_destroy_row(lpval);
1990 }
1991
1992 static bool
1993 pinctrl_is_chassis_resident(struct controller_ctx *ctx,
1994 const struct sbrec_chassis *chassis,
1995 const struct chassis_index *chassis_index,
1996 struct sset *active_tunnels,
1997 const char *port_name)
1998 {
1999 const struct sbrec_port_binding *pb
2000 = lport_lookup_by_name(ctx->ovnsb_idl, port_name);
2001 if (!pb || !pb->chassis) {
2002 return false;
2003 }
2004 if (strcmp(pb->type, "chassisredirect")) {
2005 return pb->chassis == chassis;
2006 } else {
2007 struct ovs_list *gateway_chassis =
2008 gateway_chassis_get_ordered(pb, chassis_index);
2009 bool active = gateway_chassis_is_active(gateway_chassis,
2010 chassis,
2011 active_tunnels);
2012 gateway_chassis_destroy(gateway_chassis);
2013 return active;
2014 }
2015 }
2016
2017 /* Extracts the mac, IPv4 and IPv6 addresses, and logical port from
2018 * 'addresses' which should be of the format 'MAC [IP1 IP2 ..]
2019 * [is_chassis_resident("LPORT_NAME")]', where IPn should be a valid IPv4
2020 * or IPv6 address, and stores them in the 'ipv4_addrs' and 'ipv6_addrs'
2021 * fields of 'laddrs'. The logical port name is stored in 'lport'.
2022 *
2023 * Returns true if at least 'MAC' is found in 'address', false otherwise.
2024 *
2025 * The caller must call destroy_lport_addresses() and free(*lport). */
2026 static bool
2027 extract_addresses_with_port(const char *addresses,
2028 struct lport_addresses *laddrs,
2029 char **lport)
2030 {
2031 int ofs;
2032 if (!extract_addresses(addresses, laddrs, &ofs)) {
2033 return false;
2034 } else if (ofs >= strlen(addresses)) {
2035 return true;
2036 }
2037
2038 struct lexer lexer;
2039 lexer_init(&lexer, addresses + ofs);
2040 lexer_get(&lexer);
2041
2042 if (lexer.error || lexer.token.type != LEX_T_ID
2043 || !lexer_match_id(&lexer, "is_chassis_resident")) {
2044 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
2045 VLOG_INFO_RL(&rl, "invalid syntax '%s' in address", addresses);
2046 lexer_destroy(&lexer);
2047 return true;
2048 }
2049
2050 if (!lexer_match(&lexer, LEX_T_LPAREN)) {
2051 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
2052 VLOG_INFO_RL(&rl, "Syntax error: expecting '(' after "
2053 "'is_chassis_resident' in address '%s'", addresses);
2054 lexer_destroy(&lexer);
2055 return false;
2056 }
2057
2058 if (lexer.token.type != LEX_T_STRING) {
2059 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
2060 VLOG_INFO_RL(&rl,
2061 "Syntax error: expecting quoted string after"
2062 " 'is_chassis_resident' in address '%s'", addresses);
2063 lexer_destroy(&lexer);
2064 return false;
2065 }
2066
2067 *lport = xstrdup(lexer.token.s);
2068
2069 lexer_get(&lexer);
2070 if (!lexer_match(&lexer, LEX_T_RPAREN)) {
2071 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
2072 VLOG_INFO_RL(&rl, "Syntax error: expecting ')' after quoted string in "
2073 "'is_chassis_resident()' in address '%s'",
2074 addresses);
2075 lexer_destroy(&lexer);
2076 return false;
2077 }
2078
2079 lexer_destroy(&lexer);
2080 return true;
2081 }
2082
2083 static void
2084 consider_nat_address(struct controller_ctx *ctx,
2085 const char *nat_address,
2086 const struct sbrec_port_binding *pb,
2087 struct sset *nat_address_keys,
2088 const struct sbrec_chassis *chassis,
2089 const struct chassis_index *chassis_index,
2090 struct sset *active_tunnels,
2091 struct shash *nat_addresses)
2092 {
2093 struct lport_addresses *laddrs = xmalloc(sizeof *laddrs);
2094 char *lport = NULL;
2095 if (!extract_addresses_with_port(nat_address, laddrs, &lport)
2096 || (!lport && !strcmp(pb->type, "patch"))
2097 || (lport && !pinctrl_is_chassis_resident(
2098 ctx, chassis, chassis_index, active_tunnels, lport))) {
2099 destroy_lport_addresses(laddrs);
2100 free(laddrs);
2101 free(lport);
2102 return;
2103 }
2104 free(lport);
2105
2106 int i;
2107 for (i = 0; i < laddrs->n_ipv4_addrs; i++) {
2108 char *name = xasprintf("%s-%s", pb->logical_port,
2109 laddrs->ipv4_addrs[i].addr_s);
2110 sset_add(nat_address_keys, name);
2111 free(name);
2112 }
2113 shash_add(nat_addresses, pb->logical_port, laddrs);
2114 }
2115
2116 static void
2117 get_nat_addresses_and_keys(struct controller_ctx *ctx,
2118 struct sset *nat_address_keys,
2119 struct sset *local_l3gw_ports,
2120 const struct sbrec_chassis *chassis,
2121 const struct chassis_index *chassis_index,
2122 struct sset *active_tunnels,
2123 struct shash *nat_addresses)
2124 {
2125 const char *gw_port;
2126 SSET_FOR_EACH(gw_port, local_l3gw_ports) {
2127 const struct sbrec_port_binding *pb;
2128
2129 pb = lport_lookup_by_name(ctx->ovnsb_idl, gw_port);
2130 if (!pb) {
2131 continue;
2132 }
2133
2134 if (pb->n_nat_addresses) {
2135 for (int i = 0; i < pb->n_nat_addresses; i++) {
2136 consider_nat_address(ctx, pb->nat_addresses[i], pb,
2137 nat_address_keys, chassis,
2138 chassis_index, active_tunnels,
2139 nat_addresses);
2140 }
2141 } else {
2142 /* Continue to support options:nat-addresses for version
2143 * upgrade. */
2144 const char *nat_addresses_options = smap_get(&pb->options,
2145 "nat-addresses");
2146 if (nat_addresses_options) {
2147 consider_nat_address(ctx, nat_addresses_options, pb,
2148 nat_address_keys, chassis,
2149 chassis_index, active_tunnels,
2150 nat_addresses);
2151 }
2152 }
2153 }
2154 }
2155
2156 static void
2157 send_garp_wait(void)
2158 {
2159 poll_timer_wait_until(send_garp_time);
2160 }
2161
2162 static void
2163 send_garp_run(struct controller_ctx *ctx,
2164 const struct ovsrec_bridge *br_int,
2165 const struct sbrec_chassis *chassis,
2166 const struct chassis_index *chassis_index,
2167 struct hmap *local_datapaths,
2168 struct sset *active_tunnels)
2169 {
2170 struct sset localnet_vifs = SSET_INITIALIZER(&localnet_vifs);
2171 struct sset local_l3gw_ports = SSET_INITIALIZER(&local_l3gw_ports);
2172 struct sset nat_ip_keys = SSET_INITIALIZER(&nat_ip_keys);
2173 struct simap localnet_ofports = SIMAP_INITIALIZER(&localnet_ofports);
2174 struct shash nat_addresses;
2175
2176 shash_init(&nat_addresses);
2177
2178 get_localnet_vifs_l3gwports(ctx, br_int, chassis, local_datapaths,
2179 &localnet_vifs, &localnet_ofports, &local_l3gw_ports);
2180
2181 get_nat_addresses_and_keys(ctx, &nat_ip_keys, &local_l3gw_ports,
2182 chassis, chassis_index, active_tunnels,
2183 &nat_addresses);
2184 /* For deleted ports and deleted nat ips, remove from send_garp_data. */
2185 struct shash_node *iter, *next;
2186 SHASH_FOR_EACH_SAFE (iter, next, &send_garp_data) {
2187 if (!sset_contains(&localnet_vifs, iter->name) &&
2188 !sset_contains(&nat_ip_keys, iter->name)) {
2189 send_garp_delete(iter->name);
2190 }
2191 }
2192
2193 /* Update send_garp_data. */
2194 const char *iface_id;
2195 SSET_FOR_EACH (iface_id, &localnet_vifs) {
2196 const struct sbrec_port_binding *pb;
2197
2198 pb = lport_lookup_by_name(ctx->ovnsb_idl, iface_id);
2199 if (pb) {
2200 send_garp_update(pb, &localnet_ofports, local_datapaths,
2201 &nat_addresses);
2202 }
2203 }
2204
2205 /* Update send_garp_data for nat-addresses. */
2206 const char *gw_port;
2207 SSET_FOR_EACH (gw_port, &local_l3gw_ports) {
2208 const struct sbrec_port_binding *pb;
2209
2210 pb = lport_lookup_by_name(ctx->ovnsb_idl, gw_port);
2211 if (pb) {
2212 send_garp_update(pb, &localnet_ofports, local_datapaths,
2213 &nat_addresses);
2214 }
2215 }
2216
2217 /* Send GARPs, and update the next announcement. */
2218 long long int current_time = time_msec();
2219 send_garp_time = LLONG_MAX;
2220 SHASH_FOR_EACH (iter, &send_garp_data) {
2221 long long int next_announce = send_garp(iter->data, current_time);
2222 if (send_garp_time > next_announce) {
2223 send_garp_time = next_announce;
2224 }
2225 }
2226 sset_destroy(&localnet_vifs);
2227 sset_destroy(&local_l3gw_ports);
2228 simap_destroy(&localnet_ofports);
2229
2230 SHASH_FOR_EACH_SAFE (iter, next, &nat_addresses) {
2231 struct lport_addresses *laddrs = iter->data;
2232 destroy_lport_addresses(laddrs);
2233 shash_delete(&nat_addresses, iter);
2234 free(laddrs);
2235 }
2236 shash_destroy(&nat_addresses);
2237
2238 sset_destroy(&nat_ip_keys);
2239 }
2240
2241 static void
2242 reload_metadata(struct ofpbuf *ofpacts, const struct match *md)
2243 {
2244 enum mf_field_id md_fields[] = {
2245 #if FLOW_N_REGS == 16
2246 MFF_REG0,
2247 MFF_REG1,
2248 MFF_REG2,
2249 MFF_REG3,
2250 MFF_REG4,
2251 MFF_REG5,
2252 MFF_REG6,
2253 MFF_REG7,
2254 MFF_REG8,
2255 MFF_REG9,
2256 MFF_REG10,
2257 MFF_REG11,
2258 MFF_REG12,
2259 MFF_REG13,
2260 MFF_REG14,
2261 MFF_REG15,
2262 #else
2263 #error
2264 #endif
2265 MFF_METADATA,
2266 };
2267 for (size_t i = 0; i < ARRAY_SIZE(md_fields); i++) {
2268 const struct mf_field *field = mf_from_id(md_fields[i]);
2269 if (!mf_is_all_wild(field, &md->wc)) {
2270 union mf_value value;
2271 mf_get_value(field, &md->flow, &value);
2272 ofpact_put_set_field(ofpacts, field, &value, NULL);
2273 }
2274 }
2275 }
2276
2277 static void
2278 pinctrl_handle_nd_na(const struct flow *ip_flow, const struct match *md,
2279 struct ofpbuf *userdata)
2280 {
2281 /* This action only works for IPv6 ND packets, and the switch should only
2282 * send us ND packets this way, but check here just to be sure. */
2283 if (!is_nd(ip_flow, NULL)) {
2284 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
2285 VLOG_WARN_RL(&rl, "NA action on non-ND packet");
2286 return;
2287 }
2288
2289 uint64_t packet_stub[128 / 8];
2290 struct dp_packet packet;
2291 dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
2292
2293 /* xxx These flags are not exactly correct. Look at section 7.2.4
2294 * xxx of RFC 4861. For example, we need to set ND_RSO_ROUTER for
2295 * xxx router's interfaces and ND_RSO_SOLICITED only if it was
2296 * xxx requested. */
2297 compose_nd_na(&packet, ip_flow->dl_dst, ip_flow->dl_src,
2298 &ip_flow->nd_target, &ip_flow->ipv6_src,
2299 htonl(ND_RSO_SOLICITED | ND_RSO_OVERRIDE));
2300
2301 /* Reload previous packet metadata and set actions from userdata. */
2302 set_actions_and_enqueue_msg(&packet, md, userdata);
2303 dp_packet_uninit(&packet);
2304 }
2305
2306 static void
2307 pinctrl_handle_nd_ns(const struct flow *ip_flow, const struct match *md,
2308 struct ofpbuf *userdata)
2309 {
2310 /* This action only works for IPv6 packets. */
2311 if (get_dl_type(ip_flow) != htons(ETH_TYPE_IPV6)) {
2312 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
2313 VLOG_WARN_RL(&rl, "NS action on non-IPv6 packet");
2314 return;
2315 }
2316
2317 uint64_t packet_stub[128 / 8];
2318 struct dp_packet packet;
2319 dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
2320
2321 compose_nd_ns(&packet, ip_flow->dl_src, &ip_flow->ipv6_src,
2322 &ip_flow->ipv6_dst);
2323
2324 /* Reload previous packet metadata and set actions from userdata. */
2325 set_actions_and_enqueue_msg(&packet, md, userdata);
2326 dp_packet_uninit(&packet);
2327 }
2328
2329 static void
2330 pinctrl_handle_put_nd_ra_opts(
2331 const struct flow *in_flow, struct dp_packet *pkt_in,
2332 struct ofputil_packet_in *pin, struct ofpbuf *userdata,
2333 struct ofpbuf *continuation)
2334 {
2335 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
2336 enum ofp_version version = rconn_get_version(swconn);
2337 enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
2338 struct dp_packet *pkt_out_ptr = NULL;
2339 uint32_t success = 0;
2340
2341 /* Parse result field. */
2342 const struct mf_field *f;
2343 enum ofperr ofperr = nx_pull_header(userdata, NULL, &f, NULL);
2344 if (ofperr) {
2345 VLOG_WARN_RL(&rl, "bad result OXM (%s)", ofperr_to_string(ofperr));
2346 goto exit;
2347 }
2348
2349 /* Parse result offset. */
2350 ovs_be32 *ofsp = ofpbuf_try_pull(userdata, sizeof *ofsp);
2351 if (!ofsp) {
2352 VLOG_WARN_RL(&rl, "offset not present in the userdata");
2353 goto exit;
2354 }
2355
2356 /* Check that the result is valid and writable. */
2357 struct mf_subfield dst = { .field = f, .ofs = ntohl(*ofsp), .n_bits = 1 };
2358 ofperr = mf_check_dst(&dst, NULL);
2359 if (ofperr) {
2360 VLOG_WARN_RL(&rl, "bad result bit (%s)", ofperr_to_string(ofperr));
2361 goto exit;
2362 }
2363
2364 if (!userdata->size) {
2365 VLOG_WARN_RL(&rl, "IPv6 ND RA options not present in the userdata");
2366 goto exit;
2367 }
2368
2369 if (!is_icmpv6(in_flow, NULL) || in_flow->tp_dst != htons(0) ||
2370 in_flow->tp_src != htons(ND_ROUTER_SOLICIT)) {
2371 VLOG_WARN_RL(&rl, "put_nd_ra action on invalid or unsupported packet");
2372 goto exit;
2373 }
2374
2375 size_t new_packet_size = pkt_in->l4_ofs + userdata->size;
2376 struct dp_packet pkt_out;
2377 dp_packet_init(&pkt_out, new_packet_size);
2378 dp_packet_clear(&pkt_out);
2379 dp_packet_prealloc_tailroom(&pkt_out, new_packet_size);
2380 pkt_out_ptr = &pkt_out;
2381
2382 /* Copy L2 and L3 headers from pkt_in. */
2383 dp_packet_put(&pkt_out, dp_packet_pull(pkt_in, pkt_in->l4_ofs),
2384 pkt_in->l4_ofs);
2385
2386 pkt_out.l2_5_ofs = pkt_in->l2_5_ofs;
2387 pkt_out.l2_pad_size = pkt_in->l2_pad_size;
2388 pkt_out.l3_ofs = pkt_in->l3_ofs;
2389 pkt_out.l4_ofs = pkt_in->l4_ofs;
2390
2391 /* Copy the ICMPv6 Router Advertisement data from 'userdata' field. */
2392 dp_packet_put(&pkt_out, userdata->data, userdata->size);
2393
2394 /* Set the IPv6 payload length and calculate the ICMPv6 checksum. */
2395 struct ovs_16aligned_ip6_hdr *nh = dp_packet_l3(&pkt_out);
2396 nh->ip6_plen = htons(userdata->size);
2397 struct ovs_ra_msg *ra = dp_packet_l4(&pkt_out);
2398 ra->icmph.icmp6_cksum = 0;
2399 uint32_t icmp_csum = packet_csum_pseudoheader6(nh);
2400 ra->icmph.icmp6_cksum = csum_finish(csum_continue(
2401 icmp_csum, ra, userdata->size));
2402 pin->packet = dp_packet_data(&pkt_out);
2403 pin->packet_len = dp_packet_size(&pkt_out);
2404 success = 1;
2405
2406 exit:
2407 if (!ofperr) {
2408 union mf_subvalue sv;
2409 sv.u8_val = success;
2410 mf_write_subfield(&dst, &sv, &pin->flow_metadata);
2411 }
2412 queue_msg(ofputil_encode_resume(pin, continuation, proto));
2413 dp_packet_uninit(pkt_out_ptr);
2414 }