]> git.proxmox.com Git - mirror_ovs.git/blob - ovn/controller/pinctrl.c
ovsdb-idl: Redesign use of indexes.
[mirror_ovs.git] / ovn / controller / pinctrl.c
1 /* Copyright (c) 2015, 2016, 2017 Red Hat, Inc.
2 * Copyright (c) 2017 Nicira, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <config.h>
18
19 #include "pinctrl.h"
20
21 #include "coverage.h"
22 #include "csum.h"
23 #include "dirs.h"
24 #include "dp-packet.h"
25 #include "flow.h"
26 #include "gchassis.h"
27 #include "lport.h"
28 #include "nx-match.h"
29 #include "ovn-controller.h"
30 #include "lib/packets.h"
31 #include "lib/sset.h"
32 #include "openvswitch/ofp-actions.h"
33 #include "openvswitch/ofp-msgs.h"
34 #include "openvswitch/ofp-packet.h"
35 #include "openvswitch/ofp-print.h"
36 #include "openvswitch/ofp-switch.h"
37 #include "openvswitch/ofp-util.h"
38 #include "openvswitch/vlog.h"
39
40 #include "lib/dhcp.h"
41 #include "ovn-controller.h"
42 #include "ovn/actions.h"
43 #include "ovn/lex.h"
44 #include "ovn/lib/acl-log.h"
45 #include "ovn/lib/logical-fields.h"
46 #include "ovn/lib/ovn-l7.h"
47 #include "ovn/lib/ovn-util.h"
48 #include "openvswitch/poll-loop.h"
49 #include "openvswitch/rconn.h"
50 #include "socket-util.h"
51 #include "timeval.h"
52 #include "vswitch-idl.h"
53 #include "lflow.h"
54
55 VLOG_DEFINE_THIS_MODULE(pinctrl);
56
57 /* OpenFlow connection to the switch. */
58 static struct rconn *swconn;
59
60 /* Last seen sequence number for 'swconn'. When this differs from
61 * rconn_get_connection_seqno(rconn), 'swconn' has reconnected. */
62 static unsigned int conn_seq_no;
63
64 static void pinctrl_handle_put_mac_binding(const struct flow *md,
65 const struct flow *headers,
66 bool is_arp);
67 static void init_put_mac_bindings(void);
68 static void destroy_put_mac_bindings(void);
69 static void run_put_mac_bindings(
70 struct controller_ctx *,
71 struct ovsdb_idl_index *sbrec_datapath_binding_by_key,
72 struct ovsdb_idl_index *sbrec_port_binding_by_key,
73 const struct sbrec_mac_binding_table *);
74 static void wait_put_mac_bindings(struct controller_ctx *);
75 static void flush_put_mac_bindings(void);
76
77 static void init_send_garps(void);
78 static void destroy_send_garps(void);
79 static void send_garp_wait(void);
80 static void send_garp_run(
81 struct ovsdb_idl_index *sbrec_port_binding_by_datapath,
82 struct ovsdb_idl_index *sbrec_port_binding_by_name,
83 const struct ovsrec_bridge *,
84 const struct sbrec_chassis *,
85 const struct chassis_index *chassis_index,
86 const struct hmap *local_datapaths,
87 const struct sset *active_tunnels);
88 static void pinctrl_handle_nd_na(const struct flow *ip_flow,
89 const struct match *md,
90 struct ofpbuf *userdata,
91 bool is_router);
92 static void reload_metadata(struct ofpbuf *ofpacts,
93 const struct match *md);
94 static void pinctrl_handle_put_nd_ra_opts(
95 const struct flow *ip_flow, struct dp_packet *pkt_in,
96 struct ofputil_packet_in *pin, struct ofpbuf *userdata,
97 struct ofpbuf *continuation);
98 static void pinctrl_handle_nd_ns(const struct flow *ip_flow,
99 const struct match *md,
100 struct ofpbuf *userdata);
101 static void init_ipv6_ras(void);
102 static void destroy_ipv6_ras(void);
103 static void ipv6_ra_wait(void);
104 static void send_ipv6_ras(
105 struct ovsdb_idl_index *sbrec_port_binding_by_datapath,
106 struct ovsdb_idl_index *sbrec_port_binding_by_name,
107 const struct hmap *local_datapaths);
108 ;
109
110 COVERAGE_DEFINE(pinctrl_drop_put_mac_binding);
111
112 void
113 pinctrl_init(void)
114 {
115 swconn = rconn_create(5, 0, DSCP_DEFAULT, 1 << OFP13_VERSION);
116 conn_seq_no = 0;
117 init_put_mac_bindings();
118 init_send_garps();
119 init_ipv6_ras();
120 }
121
122 static ovs_be32
123 queue_msg(struct ofpbuf *msg)
124 {
125 const struct ofp_header *oh = msg->data;
126 ovs_be32 xid = oh->xid;
127
128 rconn_send(swconn, msg, NULL);
129 return xid;
130 }
131
132 /* Sets up global 'swconn', a newly (re)connected connection to a switch. */
133 static void
134 pinctrl_setup(void)
135 {
136 /* Fetch the switch configuration. The response later will allow us to
137 * change the miss_send_len to UINT16_MAX, so that we can enable
138 * asynchronous messages. */
139 queue_msg(ofpraw_alloc(OFPRAW_OFPT_GET_CONFIG_REQUEST,
140 rconn_get_version(swconn), 0));
141
142 /* Set a packet-in format that supports userdata. */
143 queue_msg(ofputil_encode_set_packet_in_format(rconn_get_version(swconn),
144 OFPUTIL_PACKET_IN_NXT2));
145 }
146
147 static void
148 set_switch_config(struct rconn *swconn_,
149 const struct ofputil_switch_config *config)
150 {
151 enum ofp_version version = rconn_get_version(swconn_);
152 struct ofpbuf *request = ofputil_encode_set_config(config, version);
153 queue_msg(request);
154 }
155
156 static void
157 set_actions_and_enqueue_msg(const struct dp_packet *packet,
158 const struct match *md,
159 struct ofpbuf *userdata)
160 {
161 /* Copy metadata from 'md' into the packet-out via "set_field"
162 * actions, then add actions from 'userdata'.
163 */
164 uint64_t ofpacts_stub[4096 / 8];
165 struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
166 enum ofp_version version = rconn_get_version(swconn);
167
168 reload_metadata(&ofpacts, md);
169 enum ofperr error = ofpacts_pull_openflow_actions(userdata, userdata->size,
170 version, NULL, NULL,
171 &ofpacts);
172 if (error) {
173 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
174 VLOG_WARN_RL(&rl, "failed to parse actions from userdata (%s)",
175 ofperr_to_string(error));
176 ofpbuf_uninit(&ofpacts);
177 return;
178 }
179
180 struct ofputil_packet_out po = {
181 .packet = dp_packet_data(packet),
182 .packet_len = dp_packet_size(packet),
183 .buffer_id = UINT32_MAX,
184 .ofpacts = ofpacts.data,
185 .ofpacts_len = ofpacts.size,
186 };
187 match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
188 enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
189 queue_msg(ofputil_encode_packet_out(&po, proto));
190 ofpbuf_uninit(&ofpacts);
191 }
192
193 static void
194 pinctrl_handle_arp(const struct flow *ip_flow, const struct match *md,
195 struct ofpbuf *userdata)
196 {
197 /* This action only works for IP packets, and the switch should only send
198 * us IP packets this way, but check here just to be sure. */
199 if (ip_flow->dl_type != htons(ETH_TYPE_IP)) {
200 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
201 VLOG_WARN_RL(&rl, "ARP action on non-IP packet (Ethertype %"PRIx16")",
202 ntohs(ip_flow->dl_type));
203 return;
204 }
205
206 /* Compose an ARP packet. */
207 uint64_t packet_stub[128 / 8];
208 struct dp_packet packet;
209 dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
210 compose_arp__(&packet);
211
212 struct eth_header *eth = dp_packet_eth(&packet);
213 eth->eth_dst = ip_flow->dl_dst;
214 eth->eth_src = ip_flow->dl_src;
215
216 struct arp_eth_header *arp = dp_packet_l3(&packet);
217 arp->ar_op = htons(ARP_OP_REQUEST);
218 arp->ar_sha = ip_flow->dl_src;
219 put_16aligned_be32(&arp->ar_spa, ip_flow->nw_src);
220 arp->ar_tha = eth_addr_zero;
221 put_16aligned_be32(&arp->ar_tpa, ip_flow->nw_dst);
222
223 if (ip_flow->vlans[0].tci & htons(VLAN_CFI)) {
224 eth_push_vlan(&packet, htons(ETH_TYPE_VLAN_8021Q),
225 ip_flow->vlans[0].tci);
226 }
227
228 set_actions_and_enqueue_msg(&packet, md, userdata);
229 dp_packet_uninit(&packet);
230 }
231
232 static void
233 pinctrl_handle_icmp(const struct flow *ip_flow, struct dp_packet *pkt_in,
234 const struct match *md, struct ofpbuf *userdata)
235 {
236 /* This action only works for IP packets, and the switch should only send
237 * us IP packets this way, but check here just to be sure. */
238 if (ip_flow->dl_type != htons(ETH_TYPE_IP) &&
239 ip_flow->dl_type != htons(ETH_TYPE_IPV6)) {
240 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
241 VLOG_WARN_RL(&rl,
242 "ICMP action on non-IP packet (eth_type 0x%"PRIx16")",
243 ntohs(ip_flow->dl_type));
244 return;
245 }
246
247 uint64_t packet_stub[128 / 8];
248 struct dp_packet packet;
249
250 dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
251 dp_packet_clear(&packet);
252 packet.packet_type = htonl(PT_ETH);
253
254 struct eth_header *eh = dp_packet_put_zeros(&packet, sizeof *eh);
255 eh->eth_dst = ip_flow->dl_dst;
256 eh->eth_src = ip_flow->dl_src;
257
258 if (get_dl_type(ip_flow) == htons(ETH_TYPE_IP)) {
259 struct ip_header *nh = dp_packet_put_zeros(&packet, sizeof *nh);
260
261 eh->eth_type = htons(ETH_TYPE_IP);
262 dp_packet_set_l3(&packet, nh);
263 nh->ip_ihl_ver = IP_IHL_VER(5, 4);
264 nh->ip_tot_len = htons(sizeof(struct ip_header) +
265 sizeof(struct icmp_header));
266 nh->ip_proto = IPPROTO_ICMP;
267 nh->ip_frag_off = htons(IP_DF);
268 packet_set_ipv4(&packet, ip_flow->nw_src, ip_flow->nw_dst,
269 ip_flow->nw_tos, 255);
270
271 struct icmp_header *ih = dp_packet_put_zeros(&packet, sizeof *ih);
272 dp_packet_set_l4(&packet, ih);
273 packet_set_icmp(&packet, ICMP4_DST_UNREACH, 1);
274 } else {
275 struct ip6_hdr *nh = dp_packet_put_zeros(&packet, sizeof *nh);
276 struct icmp6_error_header *ih;
277 uint32_t icmpv6_csum;
278
279 eh->eth_type = htons(ETH_TYPE_IPV6);
280 dp_packet_set_l3(&packet, nh);
281 nh->ip6_vfc = 0x60;
282 nh->ip6_nxt = IPPROTO_ICMPV6;
283 nh->ip6_plen = htons(sizeof(*nh) + ICMP6_ERROR_HEADER_LEN);
284 packet_set_ipv6(&packet, &ip_flow->ipv6_src, &ip_flow->ipv6_dst,
285 ip_flow->nw_tos, ip_flow->ipv6_label, 255);
286
287 ih = dp_packet_put_zeros(&packet, sizeof *ih);
288 dp_packet_set_l4(&packet, ih);
289 ih->icmp6_base.icmp6_type = ICMP6_DST_UNREACH;
290 ih->icmp6_base.icmp6_code = 1;
291 ih->icmp6_base.icmp6_cksum = 0;
292
293 uint8_t *data = dp_packet_put_zeros(&packet, sizeof *nh);
294 memcpy(data, dp_packet_l3(pkt_in), sizeof(*nh));
295
296 icmpv6_csum = packet_csum_pseudoheader6(dp_packet_l3(&packet));
297 ih->icmp6_base.icmp6_cksum = csum_finish(
298 csum_continue(icmpv6_csum, ih,
299 sizeof(*nh) + ICMP6_ERROR_HEADER_LEN));
300 }
301
302 if (ip_flow->vlans[0].tci & htons(VLAN_CFI)) {
303 eth_push_vlan(&packet, htons(ETH_TYPE_VLAN_8021Q),
304 ip_flow->vlans[0].tci);
305 }
306
307 set_actions_and_enqueue_msg(&packet, md, userdata);
308 dp_packet_uninit(&packet);
309 }
310
311 static void
312 pinctrl_handle_tcp_reset(const struct flow *ip_flow, struct dp_packet *pkt_in,
313 const struct match *md, struct ofpbuf *userdata)
314 {
315 /* This action only works for TCP segments, and the switch should only send
316 * us TCP segments this way, but check here just to be sure. */
317 if (ip_flow->nw_proto != IPPROTO_TCP) {
318 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
319 VLOG_WARN_RL(&rl, "TCP_RESET action on non-TCP packet");
320 return;
321 }
322
323 uint64_t packet_stub[128 / 8];
324 struct dp_packet packet;
325
326 dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
327 dp_packet_clear(&packet);
328 packet.packet_type = htonl(PT_ETH);
329
330 struct eth_header *eh = dp_packet_put_zeros(&packet, sizeof *eh);
331 eh->eth_dst = ip_flow->dl_dst;
332 eh->eth_src = ip_flow->dl_src;
333
334 if (get_dl_type(ip_flow) == htons(ETH_TYPE_IPV6)) {
335 struct ip6_hdr *nh = dp_packet_put_zeros(&packet, sizeof *nh);
336
337 eh->eth_type = htons(ETH_TYPE_IPV6);
338 dp_packet_set_l3(&packet, nh);
339 nh->ip6_vfc = 0x60;
340 nh->ip6_nxt = IPPROTO_TCP;
341 nh->ip6_plen = htons(TCP_HEADER_LEN);
342 packet_set_ipv6(&packet, &ip_flow->ipv6_src, &ip_flow->ipv6_dst,
343 ip_flow->nw_tos, ip_flow->ipv6_label, 255);
344 } else {
345 struct ip_header *nh = dp_packet_put_zeros(&packet, sizeof *nh);
346
347 eh->eth_type = htons(ETH_TYPE_IP);
348 dp_packet_set_l3(&packet, nh);
349 nh->ip_ihl_ver = IP_IHL_VER(5, 4);
350 nh->ip_tot_len = htons(IP_HEADER_LEN + TCP_HEADER_LEN);
351 nh->ip_proto = IPPROTO_TCP;
352 nh->ip_frag_off = htons(IP_DF);
353 packet_set_ipv4(&packet, ip_flow->nw_src, ip_flow->nw_dst,
354 ip_flow->nw_tos, 255);
355 }
356
357 struct tcp_header *th = dp_packet_put_zeros(&packet, sizeof *th);
358 struct tcp_header *tcp_in = dp_packet_l4(pkt_in);
359 dp_packet_set_l4(&packet, th);
360 th->tcp_ctl = TCP_CTL(TCP_RST, 5);
361 if (ip_flow->tcp_flags & htons(TCP_ACK)) {
362 th->tcp_seq = tcp_in->tcp_ack;
363 } else {
364 uint32_t tcp_seq, ack_seq, tcp_len;
365
366 tcp_seq = ntohl(get_16aligned_be32(&tcp_in->tcp_seq));
367 tcp_len = TCP_OFFSET(tcp_in->tcp_ctl) * 4;
368 ack_seq = tcp_seq + dp_packet_l4_size(pkt_in) - tcp_len;
369 put_16aligned_be32(&th->tcp_ack, htonl(ack_seq));
370 put_16aligned_be32(&th->tcp_seq, 0);
371 }
372 packet_set_tcp_port(&packet, ip_flow->tp_dst, ip_flow->tp_src);
373
374 if (ip_flow->vlans[0].tci & htons(VLAN_CFI)) {
375 eth_push_vlan(&packet, htons(ETH_TYPE_VLAN_8021Q),
376 ip_flow->vlans[0].tci);
377 }
378
379 set_actions_and_enqueue_msg(&packet, md, userdata);
380 dp_packet_uninit(&packet);
381 }
382
383 static void
384 pinctrl_handle_put_dhcp_opts(
385 struct dp_packet *pkt_in, struct ofputil_packet_in *pin,
386 struct ofpbuf *userdata, struct ofpbuf *continuation)
387 {
388 enum ofp_version version = rconn_get_version(swconn);
389 enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
390 struct dp_packet *pkt_out_ptr = NULL;
391 uint32_t success = 0;
392
393 /* Parse result field. */
394 const struct mf_field *f;
395 enum ofperr ofperr = nx_pull_header(userdata, NULL, &f, NULL);
396 if (ofperr) {
397 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
398 VLOG_WARN_RL(&rl, "bad result OXM (%s)", ofperr_to_string(ofperr));
399 goto exit;
400 }
401
402 /* Parse result offset and offer IP. */
403 ovs_be32 *ofsp = ofpbuf_try_pull(userdata, sizeof *ofsp);
404 ovs_be32 *offer_ip = ofpbuf_try_pull(userdata, sizeof *offer_ip);
405 if (!ofsp || !offer_ip) {
406 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
407 VLOG_WARN_RL(&rl, "offset or offer_ip not present in the userdata");
408 goto exit;
409 }
410
411 /* Check that the result is valid and writable. */
412 struct mf_subfield dst = { .field = f, .ofs = ntohl(*ofsp), .n_bits = 1 };
413 ofperr = mf_check_dst(&dst, NULL);
414 if (ofperr) {
415 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
416 VLOG_WARN_RL(&rl, "bad result bit (%s)", ofperr_to_string(ofperr));
417 goto exit;
418 }
419
420 if (!userdata->size) {
421 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
422 VLOG_WARN_RL(&rl, "DHCP options not present in the userdata");
423 goto exit;
424 }
425
426 /* Validate the DHCP request packet.
427 * Format of the DHCP packet is
428 * ------------------------------------------------------------------------
429 *| UDP HEADER | DHCP HEADER | 4 Byte DHCP Cookie | DHCP OPTIONS(var len)|
430 * ------------------------------------------------------------------------
431 */
432 if (dp_packet_l4_size(pkt_in) < (UDP_HEADER_LEN +
433 sizeof (struct dhcp_header) + sizeof(uint32_t) + 3)) {
434 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
435 VLOG_WARN_RL(&rl, "Invalid or incomplete DHCP packet recieved");
436 goto exit;
437 }
438
439 struct dhcp_header const *in_dhcp_data = dp_packet_get_udp_payload(pkt_in);
440 if (in_dhcp_data->op != DHCP_OP_REQUEST) {
441 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
442 VLOG_WARN_RL(&rl, "Invalid opcode in the DHCP packet : %d",
443 in_dhcp_data->op);
444 goto exit;
445 }
446
447 /* DHCP options follow the DHCP header. The first 4 bytes of the DHCP
448 * options is the DHCP magic cookie followed by the actual DHCP options.
449 */
450 const uint8_t *in_dhcp_opt =
451 (const uint8_t *)dp_packet_get_udp_payload(pkt_in) +
452 sizeof (struct dhcp_header);
453
454 ovs_be32 magic_cookie = htonl(DHCP_MAGIC_COOKIE);
455 if (memcmp(in_dhcp_opt, &magic_cookie, sizeof(ovs_be32))) {
456 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
457 VLOG_WARN_RL(&rl, "DHCP magic cookie not present in the DHCP packet");
458 goto exit;
459 }
460
461 in_dhcp_opt += 4;
462 /* Check that the DHCP Message Type (opt 53) is present or not with
463 * valid values - DHCP_MSG_DISCOVER or DHCP_MSG_REQUEST as the first
464 * DHCP option.
465 */
466 if (!(in_dhcp_opt[0] == DHCP_OPT_MSG_TYPE && in_dhcp_opt[1] == 1 && (
467 in_dhcp_opt[2] == DHCP_MSG_DISCOVER ||
468 in_dhcp_opt[2] == DHCP_MSG_REQUEST))) {
469 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
470 VLOG_WARN_RL(&rl, "Invalid DHCP message type : opt code = %d,"
471 " opt value = %d", in_dhcp_opt[0], in_dhcp_opt[2]);
472 goto exit;
473 }
474
475 uint8_t msg_type;
476 if (in_dhcp_opt[2] == DHCP_MSG_DISCOVER) {
477 msg_type = DHCP_MSG_OFFER;
478 } else {
479 msg_type = DHCP_MSG_ACK;
480 }
481
482 /* Frame the DHCP reply packet
483 * Total DHCP options length will be options stored in the userdata +
484 * 16 bytes.
485 *
486 * --------------------------------------------------------------
487 *| 4 Bytes (dhcp cookie) | 3 Bytes (option type) | DHCP options |
488 * --------------------------------------------------------------
489 *| 4 Bytes padding | 1 Byte (option end 0xFF ) | 4 Bytes padding|
490 * --------------------------------------------------------------
491 */
492 uint16_t new_l4_size = UDP_HEADER_LEN + DHCP_HEADER_LEN + \
493 userdata->size + 16;
494 size_t new_packet_size = pkt_in->l4_ofs + new_l4_size;
495
496 struct dp_packet pkt_out;
497 dp_packet_init(&pkt_out, new_packet_size);
498 dp_packet_clear(&pkt_out);
499 dp_packet_prealloc_tailroom(&pkt_out, new_packet_size);
500 pkt_out_ptr = &pkt_out;
501
502 /* Copy the L2 and L3 headers from the pkt_in as they would remain same*/
503 dp_packet_put(
504 &pkt_out, dp_packet_pull(pkt_in, pkt_in->l4_ofs), pkt_in->l4_ofs);
505
506 pkt_out.l2_5_ofs = pkt_in->l2_5_ofs;
507 pkt_out.l2_pad_size = pkt_in->l2_pad_size;
508 pkt_out.l3_ofs = pkt_in->l3_ofs;
509 pkt_out.l4_ofs = pkt_in->l4_ofs;
510
511 struct udp_header *udp = dp_packet_put(
512 &pkt_out, dp_packet_pull(pkt_in, UDP_HEADER_LEN), UDP_HEADER_LEN);
513
514 struct dhcp_header *dhcp_data = dp_packet_put(
515 &pkt_out, dp_packet_pull(pkt_in, DHCP_HEADER_LEN), DHCP_HEADER_LEN);
516 dhcp_data->op = DHCP_OP_REPLY;
517 dhcp_data->yiaddr = *offer_ip;
518 dp_packet_put(&pkt_out, &magic_cookie, sizeof(ovs_be32));
519
520 uint8_t *out_dhcp_opts = dp_packet_put_zeros(&pkt_out,
521 userdata->size + 12);
522 /* DHCP option - type */
523 out_dhcp_opts[0] = DHCP_OPT_MSG_TYPE;
524 out_dhcp_opts[1] = 1;
525 out_dhcp_opts[2] = msg_type;
526 out_dhcp_opts += 3;
527
528 memcpy(out_dhcp_opts, userdata->data, userdata->size);
529 out_dhcp_opts += userdata->size;
530 /* Padding */
531 out_dhcp_opts += 4;
532 /* End */
533 out_dhcp_opts[0] = DHCP_OPT_END;
534
535 udp->udp_len = htons(new_l4_size);
536
537 struct ip_header *out_ip = dp_packet_l3(&pkt_out);
538 out_ip->ip_tot_len = htons(pkt_out.l4_ofs - pkt_out.l3_ofs + new_l4_size);
539 udp->udp_csum = 0;
540 /* Checksum needs to be initialized to zero. */
541 out_ip->ip_csum = 0;
542 out_ip->ip_csum = csum(out_ip, sizeof *out_ip);
543
544 pin->packet = dp_packet_data(&pkt_out);
545 pin->packet_len = dp_packet_size(&pkt_out);
546
547 /* Log the response. */
548 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(20, 40);
549 const struct eth_header *l2 = dp_packet_eth(&pkt_out);
550 VLOG_INFO_RL(&rl, "DHCP%s "ETH_ADDR_FMT" "IP_FMT"",
551 msg_type == DHCP_MSG_OFFER ? "OFFER" : "ACK",
552 ETH_ADDR_ARGS(l2->eth_src), IP_ARGS(*offer_ip));
553
554 success = 1;
555 exit:
556 if (!ofperr) {
557 union mf_subvalue sv;
558 sv.u8_val = success;
559 mf_write_subfield(&dst, &sv, &pin->flow_metadata);
560 }
561 queue_msg(ofputil_encode_resume(pin, continuation, proto));
562 if (pkt_out_ptr) {
563 dp_packet_uninit(pkt_out_ptr);
564 }
565 }
566
567 static bool
568 compose_out_dhcpv6_opts(struct ofpbuf *userdata,
569 struct ofpbuf *out_dhcpv6_opts, ovs_be32 iaid)
570 {
571 while (userdata->size) {
572 struct dhcp_opt6_header *userdata_opt = ofpbuf_try_pull(
573 userdata, sizeof *userdata_opt);
574 if (!userdata_opt) {
575 return false;
576 }
577
578 size_t size = ntohs(userdata_opt->size);
579 uint8_t *userdata_opt_data = ofpbuf_try_pull(userdata, size);
580 if (!userdata_opt_data) {
581 return false;
582 }
583
584 switch (ntohs(userdata_opt->opt_code)) {
585 case DHCPV6_OPT_SERVER_ID_CODE:
586 {
587 /* The Server Identifier option carries a DUID
588 * identifying a server between a client and a server.
589 * See RFC 3315 Sec 9 and Sec 22.3.
590 *
591 * We use DUID Based on Link-layer Address [DUID-LL].
592 */
593
594 struct dhcpv6_opt_server_id *opt_server_id = ofpbuf_put_zeros(
595 out_dhcpv6_opts, sizeof *opt_server_id);
596
597 opt_server_id->opt.code = htons(DHCPV6_OPT_SERVER_ID_CODE);
598 opt_server_id->opt.len = htons(size + 4);
599 opt_server_id->duid_type = htons(DHCPV6_DUID_LL);
600 opt_server_id->hw_type = htons(DHCPV6_HW_TYPE_ETH);
601 memcpy(&opt_server_id->mac, userdata_opt_data,
602 sizeof(struct eth_addr));
603 break;
604 }
605
606 case DHCPV6_OPT_IA_ADDR_CODE:
607 {
608 if (size != sizeof(struct in6_addr)) {
609 return false;
610 }
611
612 /* IA Address option is used to specify IPv6 addresses associated
613 * with an IA_NA or IA_TA. The IA Address option must be
614 * encapsulated in the Options field of an IA_NA or IA_TA option.
615 *
616 * We will encapsulate the IA Address within the IA_NA option.
617 * Please see RFC 3315 section 22.5 and 22.6
618 */
619 struct dhcpv6_opt_ia_na *opt_ia_na = ofpbuf_put_zeros(
620 out_dhcpv6_opts, sizeof *opt_ia_na);
621 opt_ia_na->opt.code = htons(DHCPV6_OPT_IA_NA_CODE);
622 /* IA_NA length (in bytes)-
623 * IAID - 4
624 * T1 - 4
625 * T2 - 4
626 * IA Address - sizeof(struct dhcpv6_opt_ia_addr)
627 */
628 opt_ia_na->opt.len = htons(12 + sizeof(struct dhcpv6_opt_ia_addr));
629 opt_ia_na->iaid = iaid;
630 /* Set the lifetime of the address(es) to infinity */
631 opt_ia_na->t1 = OVS_BE32_MAX;
632 opt_ia_na->t2 = OVS_BE32_MAX;
633
634 struct dhcpv6_opt_ia_addr *opt_ia_addr = ofpbuf_put_zeros(
635 out_dhcpv6_opts, sizeof *opt_ia_addr);
636 opt_ia_addr->opt.code = htons(DHCPV6_OPT_IA_ADDR_CODE);
637 opt_ia_addr->opt.len = htons(size + 8);
638 memcpy(opt_ia_addr->ipv6.s6_addr, userdata_opt_data, size);
639 opt_ia_addr->t1 = OVS_BE32_MAX;
640 opt_ia_addr->t2 = OVS_BE32_MAX;
641 break;
642 }
643
644 case DHCPV6_OPT_DNS_SERVER_CODE:
645 {
646 struct dhcpv6_opt_header *opt_dns = ofpbuf_put_zeros(
647 out_dhcpv6_opts, sizeof *opt_dns);
648 opt_dns->code = htons(DHCPV6_OPT_DNS_SERVER_CODE);
649 opt_dns->len = htons(size);
650 ofpbuf_put(out_dhcpv6_opts, userdata_opt_data, size);
651 break;
652 }
653
654 case DHCPV6_OPT_DOMAIN_SEARCH_CODE:
655 {
656 struct dhcpv6_opt_header *opt_dsl = ofpbuf_put_zeros(
657 out_dhcpv6_opts, sizeof *opt_dsl);
658 opt_dsl->code = htons(DHCPV6_OPT_DOMAIN_SEARCH_CODE);
659 opt_dsl->len = htons(size + 2);
660 uint8_t *data = ofpbuf_put_zeros(out_dhcpv6_opts, size + 2);
661 *data = size;
662 memcpy(data + 1, userdata_opt_data, size);
663 break;
664 }
665
666 default:
667 return false;
668 }
669 }
670 return true;
671 }
672
673 static void
674 pinctrl_handle_put_dhcpv6_opts(
675 struct dp_packet *pkt_in, struct ofputil_packet_in *pin,
676 struct ofpbuf *userdata, struct ofpbuf *continuation OVS_UNUSED)
677 {
678 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
679 enum ofp_version version = rconn_get_version(swconn);
680 enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
681 struct dp_packet *pkt_out_ptr = NULL;
682 uint32_t success = 0;
683
684 /* Parse result field. */
685 const struct mf_field *f;
686 enum ofperr ofperr = nx_pull_header(userdata, NULL, &f, NULL);
687 if (ofperr) {
688 VLOG_WARN_RL(&rl, "bad result OXM (%s)", ofperr_to_string(ofperr));
689 goto exit;
690 }
691
692 /* Parse result offset. */
693 ovs_be32 *ofsp = ofpbuf_try_pull(userdata, sizeof *ofsp);
694 if (!ofsp) {
695 VLOG_WARN_RL(&rl, "offset not present in the userdata");
696 goto exit;
697 }
698
699 /* Check that the result is valid and writable. */
700 struct mf_subfield dst = { .field = f, .ofs = ntohl(*ofsp), .n_bits = 1 };
701 ofperr = mf_check_dst(&dst, NULL);
702 if (ofperr) {
703 VLOG_WARN_RL(&rl, "bad result bit (%s)", ofperr_to_string(ofperr));
704 goto exit;
705 }
706
707 if (!userdata->size) {
708 VLOG_WARN_RL(&rl, "DHCPv6 options not present in the userdata");
709 goto exit;
710 }
711
712 struct udp_header *in_udp = dp_packet_l4(pkt_in);
713 const uint8_t *in_dhcpv6_data = dp_packet_get_udp_payload(pkt_in);
714 if (!in_udp || !in_dhcpv6_data) {
715 VLOG_WARN_RL(&rl, "truncated dhcpv6 packet");
716 goto exit;
717 }
718
719 uint8_t out_dhcpv6_msg_type;
720 switch(*in_dhcpv6_data) {
721 case DHCPV6_MSG_TYPE_SOLICIT:
722 out_dhcpv6_msg_type = DHCPV6_MSG_TYPE_ADVT;
723 break;
724
725 case DHCPV6_MSG_TYPE_REQUEST:
726 case DHCPV6_MSG_TYPE_CONFIRM:
727 case DHCPV6_MSG_TYPE_DECLINE:
728 out_dhcpv6_msg_type = DHCPV6_MSG_TYPE_REPLY;
729 break;
730
731 default:
732 /* Invalid or unsupported DHCPv6 message type */
733 goto exit;
734 }
735
736 /* Skip 4 bytes (message type (1 byte) + transaction ID (3 bytes). */
737 in_dhcpv6_data += 4;
738 /* We need to extract IAID from the IA-NA option of the client's DHCPv6
739 * solicit/request/confirm packet and copy the same IAID in the Server's
740 * response. */
741 ovs_be32 iaid = 0;
742 struct dhcpv6_opt_header const *in_opt_client_id = NULL;
743 size_t udp_len = ntohs(in_udp->udp_len);
744 size_t l4_len = dp_packet_l4_size(pkt_in);
745 uint8_t *end = (uint8_t *)in_udp + MIN(udp_len, l4_len);
746 while (in_dhcpv6_data < end) {
747 struct dhcpv6_opt_header const *in_opt =
748 (struct dhcpv6_opt_header *)in_dhcpv6_data;
749 switch(ntohs(in_opt->code)) {
750 case DHCPV6_OPT_IA_NA_CODE:
751 {
752 struct dhcpv6_opt_ia_na *opt_ia_na = (
753 struct dhcpv6_opt_ia_na *)in_opt;
754 iaid = opt_ia_na->iaid;
755 break;
756 }
757
758 case DHCPV6_OPT_CLIENT_ID_CODE:
759 in_opt_client_id = in_opt;
760 break;
761
762 default:
763 break;
764 }
765 in_dhcpv6_data += sizeof *in_opt + ntohs(in_opt->len);
766 }
767
768 if (!in_opt_client_id) {
769 VLOG_WARN_RL(&rl, "DHCPv6 option - Client id not present in the "
770 " DHCPv6 packet");
771 goto exit;
772 }
773
774 if (!iaid) {
775 VLOG_WARN_RL(&rl, "DHCPv6 option - IA NA not present in the "
776 " DHCPv6 packet");
777 goto exit;
778 }
779
780 uint64_t out_ofpacts_dhcpv6_opts_stub[256 / 8];
781 struct ofpbuf out_dhcpv6_opts =
782 OFPBUF_STUB_INITIALIZER(out_ofpacts_dhcpv6_opts_stub);
783
784 if (!compose_out_dhcpv6_opts(userdata, &out_dhcpv6_opts, iaid)) {
785 VLOG_WARN_RL(&rl, "Invalid userdata");
786 goto exit;
787 }
788
789 uint16_t new_l4_size
790 = (UDP_HEADER_LEN + 4 + sizeof *in_opt_client_id +
791 ntohs(in_opt_client_id->len) + out_dhcpv6_opts.size);
792 size_t new_packet_size = pkt_in->l4_ofs + new_l4_size;
793
794 struct dp_packet pkt_out;
795 dp_packet_init(&pkt_out, new_packet_size);
796 dp_packet_clear(&pkt_out);
797 dp_packet_prealloc_tailroom(&pkt_out, new_packet_size);
798 pkt_out_ptr = &pkt_out;
799
800 /* Copy L2 and L3 headers from pkt_in. */
801 dp_packet_put(&pkt_out, dp_packet_pull(pkt_in, pkt_in->l4_ofs),
802 pkt_in->l4_ofs);
803
804 pkt_out.l2_5_ofs = pkt_in->l2_5_ofs;
805 pkt_out.l2_pad_size = pkt_in->l2_pad_size;
806 pkt_out.l3_ofs = pkt_in->l3_ofs;
807 pkt_out.l4_ofs = pkt_in->l4_ofs;
808
809 /* Pull the DHCPv6 message type and transaction id from the pkt_in.
810 * Need to preserve the transaction id in the DHCPv6 reply packet. */
811 struct udp_header *out_udp = dp_packet_put(
812 &pkt_out, dp_packet_pull(pkt_in, UDP_HEADER_LEN), UDP_HEADER_LEN);
813 uint8_t *out_dhcpv6 = dp_packet_put(&pkt_out, dp_packet_pull(pkt_in, 4), 4);
814
815 /* Set the proper DHCPv6 message type. */
816 *out_dhcpv6 = out_dhcpv6_msg_type;
817
818 /* Copy the Client Identifier. */
819 dp_packet_put(&pkt_out, in_opt_client_id,
820 sizeof *in_opt_client_id + ntohs(in_opt_client_id->len));
821
822 /* Copy the DHCPv6 Options. */
823 dp_packet_put(&pkt_out, out_dhcpv6_opts.data, out_dhcpv6_opts.size);
824 out_udp->udp_len = htons(new_l4_size);
825 out_udp->udp_csum = 0;
826
827 struct ovs_16aligned_ip6_hdr *out_ip6 = dp_packet_l3(&pkt_out);
828 out_ip6->ip6_ctlun.ip6_un1.ip6_un1_plen = out_udp->udp_len;
829
830 uint32_t csum;
831 csum = packet_csum_pseudoheader6(dp_packet_l3(&pkt_out));
832 csum = csum_continue(csum, out_udp, dp_packet_size(&pkt_out) -
833 ((const unsigned char *)out_udp -
834 (const unsigned char *)dp_packet_eth(&pkt_out)));
835 out_udp->udp_csum = csum_finish(csum);
836 if (!out_udp->udp_csum) {
837 out_udp->udp_csum = htons(0xffff);
838 }
839
840 pin->packet = dp_packet_data(&pkt_out);
841 pin->packet_len = dp_packet_size(&pkt_out);
842 ofpbuf_uninit(&out_dhcpv6_opts);
843 success = 1;
844 exit:
845 if (!ofperr) {
846 union mf_subvalue sv;
847 sv.u8_val = success;
848 mf_write_subfield(&dst, &sv, &pin->flow_metadata);
849 }
850 queue_msg(ofputil_encode_resume(pin, continuation, proto));
851 dp_packet_uninit(pkt_out_ptr);
852 }
853
854 static void
855 put_be16(struct ofpbuf *buf, ovs_be16 x)
856 {
857 ofpbuf_put(buf, &x, sizeof x);
858 }
859
860 static void
861 put_be32(struct ofpbuf *buf, ovs_be32 x)
862 {
863 ofpbuf_put(buf, &x, sizeof x);
864 }
865
866 static void
867 pinctrl_handle_dns_lookup(
868 const struct sbrec_dns_table *dns_table,
869 struct dp_packet *pkt_in, struct ofputil_packet_in *pin,
870 struct ofpbuf *userdata, struct ofpbuf *continuation)
871 {
872 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
873 enum ofp_version version = rconn_get_version(swconn);
874 enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
875 struct dp_packet *pkt_out_ptr = NULL;
876 uint32_t success = 0;
877
878 /* Parse result field. */
879 const struct mf_field *f;
880 enum ofperr ofperr = nx_pull_header(userdata, NULL, &f, NULL);
881 if (ofperr) {
882 VLOG_WARN_RL(&rl, "bad result OXM (%s)", ofperr_to_string(ofperr));
883 goto exit;
884 }
885
886 /* Parse result offset. */
887 ovs_be32 *ofsp = ofpbuf_try_pull(userdata, sizeof *ofsp);
888 if (!ofsp) {
889 VLOG_WARN_RL(&rl, "offset not present in the userdata");
890 goto exit;
891 }
892
893 /* Check that the result is valid and writable. */
894 struct mf_subfield dst = { .field = f, .ofs = ntohl(*ofsp), .n_bits = 1 };
895 ofperr = mf_check_dst(&dst, NULL);
896 if (ofperr) {
897 VLOG_WARN_RL(&rl, "bad result bit (%s)", ofperr_to_string(ofperr));
898 goto exit;
899 }
900
901 /* Extract the DNS header */
902 struct dns_header const *in_dns_header = dp_packet_get_udp_payload(pkt_in);
903 if (!in_dns_header) {
904 VLOG_WARN_RL(&rl, "truncated dns packet");
905 goto exit;
906 }
907
908 /* Check if it is DNS request or not */
909 if (in_dns_header->lo_flag & 0x80) {
910 /* It's a DNS response packet which we are not interested in */
911 goto exit;
912 }
913
914 /* Check if at least one query request is present */
915 if (!in_dns_header->qdcount) {
916 goto exit;
917 }
918
919 struct udp_header *in_udp = dp_packet_l4(pkt_in);
920 size_t udp_len = ntohs(in_udp->udp_len);
921 size_t l4_len = dp_packet_l4_size(pkt_in);
922 uint8_t *end = (uint8_t *)in_udp + MIN(udp_len, l4_len);
923 uint8_t *in_dns_data = (uint8_t *)(in_dns_header + 1);
924 uint8_t *in_queryname = in_dns_data;
925 uint8_t idx = 0;
926 struct ds query_name;
927 ds_init(&query_name);
928 /* Extract the query_name. If the query name is - 'www.ovn.org' it would be
929 * encoded as (in hex) - 03 77 77 77 03 6f 76 63 03 6f 72 67 00.
930 */
931 while ((in_dns_data + idx) < end && in_dns_data[idx]) {
932 uint8_t label_len = in_dns_data[idx++];
933 if (in_dns_data + idx + label_len > end) {
934 ds_destroy(&query_name);
935 goto exit;
936 }
937 ds_put_buffer(&query_name, (const char *) in_dns_data + idx, label_len);
938 idx += label_len;
939 ds_put_char(&query_name, '.');
940 }
941
942 idx++;
943 ds_chomp(&query_name, '.');
944 in_dns_data += idx;
945
946 /* Query should have TYPE and CLASS fields */
947 if (in_dns_data + (2 * sizeof(ovs_be16)) > end) {
948 ds_destroy(&query_name);
949 goto exit;
950 }
951
952 uint16_t query_type = ntohs(*ALIGNED_CAST(const ovs_be16 *, in_dns_data));
953 /* Supported query types - A, AAAA and ANY */
954 if (!(query_type == DNS_QUERY_TYPE_A || query_type == DNS_QUERY_TYPE_AAAA
955 || query_type == DNS_QUERY_TYPE_ANY)) {
956 ds_destroy(&query_name);
957 goto exit;
958 }
959
960 uint64_t dp_key = ntohll(pin->flow_metadata.flow.metadata);
961 const struct sbrec_dns *sbrec_dns;
962 const char *answer_ips = NULL;
963 SBREC_DNS_TABLE_FOR_EACH (sbrec_dns, dns_table) {
964 for (size_t i = 0; i < sbrec_dns->n_datapaths; i++) {
965 if (sbrec_dns->datapaths[i]->tunnel_key == dp_key) {
966 answer_ips = smap_get(&sbrec_dns->records,
967 ds_cstr(&query_name));
968 if (answer_ips) {
969 break;
970 }
971 }
972 }
973
974 if (answer_ips) {
975 break;
976 }
977 }
978
979 ds_destroy(&query_name);
980 if (!answer_ips) {
981 goto exit;
982 }
983
984 struct lport_addresses ip_addrs;
985 if (!extract_ip_addresses(answer_ips, &ip_addrs)) {
986 goto exit;
987 }
988
989 uint16_t ancount = 0;
990 uint64_t dns_ans_stub[128 / 8];
991 struct ofpbuf dns_answer = OFPBUF_STUB_INITIALIZER(dns_ans_stub);
992
993 if (query_type == DNS_QUERY_TYPE_A || query_type == DNS_QUERY_TYPE_ANY) {
994 for (size_t i = 0; i < ip_addrs.n_ipv4_addrs; i++) {
995 /* Copy the answer section */
996 /* Format of the answer section is
997 * - NAME -> The domain name
998 * - TYPE -> 2 octets containing one of the RR type codes
999 * - CLASS -> 2 octets which specify the class of the data
1000 * in the RDATA field.
1001 * - TTL -> 32 bit unsigned int specifying the time
1002 * interval (in secs) that the resource record
1003 * may be cached before it should be discarded.
1004 * - RDLENGTH -> 16 bit integer specifying the length of the
1005 * RDATA field.
1006 * - RDATA -> a variable length string of octets that
1007 * describes the resource. In our case it will
1008 * be IP address of the domain name.
1009 */
1010 ofpbuf_put(&dns_answer, in_queryname, idx);
1011 put_be16(&dns_answer, htons(DNS_QUERY_TYPE_A));
1012 put_be16(&dns_answer, htons(DNS_CLASS_IN));
1013 put_be32(&dns_answer, htonl(DNS_DEFAULT_RR_TTL));
1014 put_be16(&dns_answer, htons(sizeof(ovs_be32)));
1015 put_be32(&dns_answer, ip_addrs.ipv4_addrs[i].addr);
1016 ancount++;
1017 }
1018 }
1019
1020 if (query_type == DNS_QUERY_TYPE_AAAA ||
1021 query_type == DNS_QUERY_TYPE_ANY) {
1022 for (size_t i = 0; i < ip_addrs.n_ipv6_addrs; i++) {
1023 ofpbuf_put(&dns_answer, in_queryname, idx);
1024 put_be16(&dns_answer, htons(DNS_QUERY_TYPE_AAAA));
1025 put_be16(&dns_answer, htons(DNS_CLASS_IN));
1026 put_be32(&dns_answer, htonl(DNS_DEFAULT_RR_TTL));
1027 const struct in6_addr *ip6 = &ip_addrs.ipv6_addrs[i].addr;
1028 put_be16(&dns_answer, htons(sizeof *ip6));
1029 ofpbuf_put(&dns_answer, ip6, sizeof *ip6);
1030 ancount++;
1031 }
1032 }
1033
1034 destroy_lport_addresses(&ip_addrs);
1035
1036 if (!ancount) {
1037 ofpbuf_uninit(&dns_answer);
1038 goto exit;
1039 }
1040
1041 uint16_t new_l4_size = ntohs(in_udp->udp_len) + dns_answer.size;
1042 size_t new_packet_size = pkt_in->l4_ofs + new_l4_size;
1043 struct dp_packet pkt_out;
1044 dp_packet_init(&pkt_out, new_packet_size);
1045 dp_packet_clear(&pkt_out);
1046 dp_packet_prealloc_tailroom(&pkt_out, new_packet_size);
1047 pkt_out_ptr = &pkt_out;
1048
1049 /* Copy the L2 and L3 headers from the pkt_in as they would remain same.*/
1050 dp_packet_put(
1051 &pkt_out, dp_packet_pull(pkt_in, pkt_in->l4_ofs), pkt_in->l4_ofs);
1052
1053 pkt_out.l2_5_ofs = pkt_in->l2_5_ofs;
1054 pkt_out.l2_pad_size = pkt_in->l2_pad_size;
1055 pkt_out.l3_ofs = pkt_in->l3_ofs;
1056 pkt_out.l4_ofs = pkt_in->l4_ofs;
1057
1058 struct udp_header *out_udp = dp_packet_put(
1059 &pkt_out, dp_packet_pull(pkt_in, UDP_HEADER_LEN), UDP_HEADER_LEN);
1060
1061 /* Copy the DNS header. */
1062 struct dns_header *out_dns_header = dp_packet_put(
1063 &pkt_out, dp_packet_pull(pkt_in, sizeof *out_dns_header),
1064 sizeof *out_dns_header);
1065
1066 /* Set the response bit to 1 in the flags. */
1067 out_dns_header->lo_flag |= 0x80;
1068
1069 /* Set the answer RR. */
1070 out_dns_header->ancount = htons(ancount);
1071
1072 /* Copy the Query section. */
1073 dp_packet_put(&pkt_out, dp_packet_data(pkt_in), dp_packet_size(pkt_in));
1074
1075 /* Copy the answer sections. */
1076 dp_packet_put(&pkt_out, dns_answer.data, dns_answer.size);
1077 ofpbuf_uninit(&dns_answer);
1078
1079 out_udp->udp_len = htons(new_l4_size);
1080 out_udp->udp_csum = 0;
1081
1082 struct eth_header *eth = dp_packet_data(&pkt_out);
1083 if (eth->eth_type == htons(ETH_TYPE_IP)) {
1084 struct ip_header *out_ip = dp_packet_l3(&pkt_out);
1085 out_ip->ip_tot_len = htons(pkt_out.l4_ofs - pkt_out.l3_ofs
1086 + new_l4_size);
1087 /* Checksum needs to be initialized to zero. */
1088 out_ip->ip_csum = 0;
1089 out_ip->ip_csum = csum(out_ip, sizeof *out_ip);
1090 } else {
1091 struct ovs_16aligned_ip6_hdr *nh = dp_packet_l3(&pkt_out);
1092 nh->ip6_plen = htons(new_l4_size);
1093
1094 /* IPv6 needs UDP checksum calculated */
1095 uint32_t csum;
1096 csum = packet_csum_pseudoheader6(nh);
1097 csum = csum_continue(csum, out_udp, dp_packet_size(&pkt_out) -
1098 ((const unsigned char *)out_udp -
1099 (const unsigned char *)eth));
1100 out_udp->udp_csum = csum_finish(csum);
1101 if (!out_udp->udp_csum) {
1102 out_udp->udp_csum = htons(0xffff);
1103 }
1104 }
1105
1106 pin->packet = dp_packet_data(&pkt_out);
1107 pin->packet_len = dp_packet_size(&pkt_out);
1108
1109 success = 1;
1110 exit:
1111 if (!ofperr) {
1112 union mf_subvalue sv;
1113 sv.u8_val = success;
1114 mf_write_subfield(&dst, &sv, &pin->flow_metadata);
1115 }
1116 queue_msg(ofputil_encode_resume(pin, continuation, proto));
1117 dp_packet_uninit(pkt_out_ptr);
1118 }
1119
1120 static void
1121 process_packet_in(const struct ofp_header *msg,
1122 const struct sbrec_dns_table *dns_table)
1123 {
1124 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1125
1126 struct ofputil_packet_in pin;
1127 struct ofpbuf continuation;
1128 enum ofperr error = ofputil_decode_packet_in(msg, true, NULL, NULL, &pin,
1129 NULL, NULL, &continuation);
1130
1131 if (error) {
1132 VLOG_WARN_RL(&rl, "error decoding packet-in: %s",
1133 ofperr_to_string(error));
1134 return;
1135 }
1136 if (pin.reason != OFPR_ACTION) {
1137 return;
1138 }
1139
1140 struct ofpbuf userdata = ofpbuf_const_initializer(pin.userdata,
1141 pin.userdata_len);
1142 const struct action_header *ah = ofpbuf_pull(&userdata, sizeof *ah);
1143 if (!ah) {
1144 VLOG_WARN_RL(&rl, "packet-in userdata lacks action header");
1145 return;
1146 }
1147
1148 struct dp_packet packet;
1149 dp_packet_use_const(&packet, pin.packet, pin.packet_len);
1150 struct flow headers;
1151 flow_extract(&packet, &headers);
1152
1153 switch (ntohl(ah->opcode)) {
1154 case ACTION_OPCODE_ARP:
1155 pinctrl_handle_arp(&headers, &pin.flow_metadata, &userdata);
1156 break;
1157
1158 case ACTION_OPCODE_PUT_ARP:
1159 pinctrl_handle_put_mac_binding(&pin.flow_metadata.flow, &headers,
1160 true);
1161 break;
1162
1163 case ACTION_OPCODE_PUT_DHCP_OPTS:
1164 pinctrl_handle_put_dhcp_opts(&packet, &pin, &userdata, &continuation);
1165 break;
1166
1167 case ACTION_OPCODE_ND_NA:
1168 pinctrl_handle_nd_na(&headers, &pin.flow_metadata, &userdata, false);
1169 break;
1170
1171 case ACTION_OPCODE_ND_NA_ROUTER:
1172 pinctrl_handle_nd_na(&headers, &pin.flow_metadata, &userdata, true);
1173 break;
1174
1175 case ACTION_OPCODE_PUT_ND:
1176 pinctrl_handle_put_mac_binding(&pin.flow_metadata.flow, &headers,
1177 false);
1178 break;
1179
1180 case ACTION_OPCODE_PUT_DHCPV6_OPTS:
1181 pinctrl_handle_put_dhcpv6_opts(&packet, &pin, &userdata,
1182 &continuation);
1183 break;
1184
1185 case ACTION_OPCODE_DNS_LOOKUP:
1186 pinctrl_handle_dns_lookup(dns_table,
1187 &packet, &pin, &userdata, &continuation);
1188 break;
1189
1190 case ACTION_OPCODE_LOG:
1191 handle_acl_log(&headers, &userdata);
1192 break;
1193
1194 case ACTION_OPCODE_PUT_ND_RA_OPTS:
1195 pinctrl_handle_put_nd_ra_opts(&headers, &packet, &pin, &userdata,
1196 &continuation);
1197 break;
1198
1199 case ACTION_OPCODE_ND_NS:
1200 pinctrl_handle_nd_ns(&headers, &pin.flow_metadata, &userdata);
1201 break;
1202
1203 case ACTION_OPCODE_ICMP:
1204 pinctrl_handle_icmp(&headers, &packet, &pin.flow_metadata,
1205 &userdata);
1206 break;
1207
1208 case ACTION_OPCODE_TCP_RESET:
1209 pinctrl_handle_tcp_reset(&headers, &packet, &pin.flow_metadata,
1210 &userdata);
1211 break;
1212
1213 default:
1214 VLOG_WARN_RL(&rl, "unrecognized packet-in opcode %"PRIu32,
1215 ntohl(ah->opcode));
1216 break;
1217 }
1218 }
1219
1220 static void
1221 pinctrl_recv(const struct sbrec_dns_table *dns_table,
1222 const struct ofp_header *oh, enum ofptype type)
1223 {
1224 if (type == OFPTYPE_ECHO_REQUEST) {
1225 queue_msg(ofputil_encode_echo_reply(oh));
1226 } else if (type == OFPTYPE_GET_CONFIG_REPLY) {
1227 /* Enable asynchronous messages */
1228 struct ofputil_switch_config config;
1229
1230 ofputil_decode_get_config_reply(oh, &config);
1231 config.miss_send_len = UINT16_MAX;
1232 set_switch_config(swconn, &config);
1233 } else if (type == OFPTYPE_PACKET_IN) {
1234 process_packet_in(oh, dns_table);
1235 } else {
1236 if (VLOG_IS_DBG_ENABLED()) {
1237 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30, 300);
1238
1239 char *s = ofp_to_string(oh, ntohs(oh->length), NULL, NULL, 2);
1240
1241 VLOG_DBG_RL(&rl, "OpenFlow packet ignored: %s", s);
1242 free(s);
1243 }
1244 }
1245 }
1246
1247 void
1248 pinctrl_run(struct controller_ctx *ctx,
1249 struct ovsdb_idl_index *sbrec_datapath_binding_by_key,
1250 struct ovsdb_idl_index *sbrec_port_binding_by_datapath,
1251 struct ovsdb_idl_index *sbrec_port_binding_by_key,
1252 struct ovsdb_idl_index *sbrec_port_binding_by_name,
1253 const struct sbrec_dns_table *dns_table,
1254 const struct sbrec_mac_binding_table *mac_binding_table,
1255 const struct ovsrec_bridge *br_int,
1256 const struct sbrec_chassis *chassis,
1257 const struct chassis_index *chassis_index,
1258 const struct hmap *local_datapaths,
1259 const struct sset *active_tunnels)
1260 {
1261 char *target = xasprintf("unix:%s/%s.mgmt", ovs_rundir(), br_int->name);
1262 if (strcmp(target, rconn_get_target(swconn))) {
1263 VLOG_INFO("%s: connecting to switch", target);
1264 rconn_connect(swconn, target, target);
1265 }
1266 free(target);
1267
1268 rconn_run(swconn);
1269
1270 if (!rconn_is_connected(swconn)) {
1271 return;
1272 }
1273
1274 if (conn_seq_no != rconn_get_connection_seqno(swconn)) {
1275 pinctrl_setup();
1276 conn_seq_no = rconn_get_connection_seqno(swconn);
1277 flush_put_mac_bindings();
1278 }
1279
1280 /* Process a limited number of messages per call. */
1281 for (int i = 0; i < 50; i++) {
1282 struct ofpbuf *msg = rconn_recv(swconn);
1283 if (!msg) {
1284 break;
1285 }
1286
1287 const struct ofp_header *oh = msg->data;
1288 enum ofptype type;
1289
1290 ofptype_decode(&type, oh);
1291 pinctrl_recv(dns_table, oh, type);
1292 ofpbuf_delete(msg);
1293 }
1294
1295 run_put_mac_bindings(ctx, sbrec_datapath_binding_by_key,
1296 sbrec_port_binding_by_key, mac_binding_table);
1297 send_garp_run(sbrec_port_binding_by_datapath,
1298 sbrec_port_binding_by_name, br_int, chassis, chassis_index,
1299 local_datapaths, active_tunnels);
1300 send_ipv6_ras(sbrec_port_binding_by_datapath,
1301 sbrec_port_binding_by_name, local_datapaths);
1302 }
1303
1304 /* Table of ipv6_ra_state structures, keyed on logical port name */
1305 static struct shash ipv6_ras;
1306
1307 /* Next IPV6 RA in seconds. */
1308 static long long int send_ipv6_ra_time;
1309
1310 struct ipv6_ra_config {
1311 time_t min_interval;
1312 time_t max_interval;
1313 struct eth_addr eth_src;
1314 struct eth_addr eth_dst;
1315 struct in6_addr ipv6_src;
1316 struct in6_addr ipv6_dst;
1317 int32_t mtu;
1318 uint8_t mo_flags; /* Managed/Other flags for RAs */
1319 uint8_t la_flags; /* On-link/autonomous flags for address prefixes */
1320 struct lport_addresses prefixes;
1321 };
1322
1323 struct ipv6_ra_state {
1324 long long int next_announce;
1325 struct ipv6_ra_config *config;
1326 int64_t port_key;
1327 int64_t metadata;
1328 bool delete_me;
1329 };
1330
1331 static void
1332 init_ipv6_ras(void)
1333 {
1334 shash_init(&ipv6_ras);
1335 send_ipv6_ra_time = LLONG_MAX;
1336 }
1337
1338 static void
1339 ipv6_ra_config_delete(struct ipv6_ra_config *config)
1340 {
1341 if (config) {
1342 destroy_lport_addresses(&config->prefixes);
1343 free(config);
1344 }
1345 }
1346
1347 static void
1348 ipv6_ra_delete(struct ipv6_ra_state *ra)
1349 {
1350 if (ra) {
1351 ipv6_ra_config_delete(ra->config);
1352 free(ra);
1353 }
1354 }
1355
1356 static void
1357 destroy_ipv6_ras(void)
1358 {
1359 struct shash_node *iter, *next;
1360 SHASH_FOR_EACH_SAFE (iter, next, &ipv6_ras) {
1361 struct ipv6_ra_state *ra = iter->data;
1362 ipv6_ra_delete(ra);
1363 shash_delete(&ipv6_ras, iter);
1364 }
1365 shash_destroy(&ipv6_ras);
1366 }
1367
1368 static struct ipv6_ra_config *
1369 ipv6_ra_update_config(const struct sbrec_port_binding *pb)
1370 {
1371 struct ipv6_ra_config *config;
1372
1373 config = xzalloc(sizeof *config);
1374
1375 config->max_interval = smap_get_int(&pb->options, "ipv6_ra_max_interval",
1376 ND_RA_MAX_INTERVAL_DEFAULT);
1377 config->min_interval = smap_get_int(&pb->options, "ipv6_ra_min_interval",
1378 nd_ra_min_interval_default(config->max_interval));
1379 config->mtu = smap_get_int(&pb->options, "ipv6_ra_mtu", ND_MTU_DEFAULT);
1380 config->la_flags = ND_PREFIX_ON_LINK;
1381
1382 const char *address_mode = smap_get(&pb->options, "ipv6_ra_address_mode");
1383 if (!address_mode) {
1384 VLOG_WARN("No address mode specified");
1385 goto fail;
1386 }
1387 if (!strcmp(address_mode, "dhcpv6_stateless")) {
1388 config->mo_flags = IPV6_ND_RA_FLAG_OTHER_ADDR_CONFIG;
1389 } else if (!strcmp(address_mode, "dhcpv6_stateful")) {
1390 config->mo_flags = IPV6_ND_RA_FLAG_MANAGED_ADDR_CONFIG;
1391 } else if (!strcmp(address_mode, "slaac")) {
1392 config->la_flags |= ND_PREFIX_AUTONOMOUS_ADDRESS;
1393 } else {
1394 VLOG_WARN("Invalid address mode %s", address_mode);
1395 goto fail;
1396 }
1397
1398 const char *prefixes = smap_get(&pb->options, "ipv6_ra_prefixes");
1399 if (prefixes && !extract_ip_addresses(prefixes, &config->prefixes)) {
1400 VLOG_WARN("Invalid IPv6 prefixes: %s", prefixes);
1401 goto fail;
1402 }
1403
1404 /* All nodes multicast addresses */
1405 config->eth_dst = (struct eth_addr) ETH_ADDR_C(33,33,00,00,00,01);
1406 ipv6_parse("ff02::1", &config->ipv6_dst);
1407
1408 const char *eth_addr = smap_get(&pb->options, "ipv6_ra_src_eth");
1409 if (!eth_addr || !eth_addr_from_string(eth_addr, &config->eth_src)) {
1410 VLOG_WARN("Invalid ethernet source %s", eth_addr);
1411 goto fail;
1412 }
1413 const char *ip_addr = smap_get(&pb->options, "ipv6_ra_src_addr");
1414 if (!ip_addr || !ipv6_parse(ip_addr, &config->ipv6_src)) {
1415 VLOG_WARN("Invalid IP source %s", ip_addr);
1416 goto fail;
1417 }
1418
1419 return config;
1420
1421 fail:
1422 ipv6_ra_config_delete(config);
1423 return NULL;
1424 }
1425
1426 static long long int
1427 ipv6_ra_calc_next_announce(time_t min_interval, time_t max_interval)
1428 {
1429 long long int min_interval_ms = min_interval * 1000LL;
1430 long long int max_interval_ms = max_interval * 1000LL;
1431
1432 return time_msec() + min_interval_ms +
1433 random_range(max_interval_ms - min_interval_ms);
1434 }
1435
1436 static void
1437 put_load(uint64_t value, enum mf_field_id dst, int ofs, int n_bits,
1438 struct ofpbuf *ofpacts)
1439 {
1440 struct ofpact_set_field *sf = ofpact_put_set_field(ofpacts,
1441 mf_from_id(dst), NULL,
1442 NULL);
1443 ovs_be64 n_value = htonll(value);
1444 bitwise_copy(&n_value, 8, 0, sf->value, sf->field->n_bytes, ofs, n_bits);
1445 bitwise_one(ofpact_set_field_mask(sf), sf->field->n_bytes, ofs, n_bits);
1446 }
1447
1448 static long long int
1449 ipv6_ra_send(struct ipv6_ra_state *ra)
1450 {
1451 if (time_msec() < ra->next_announce) {
1452 return ra->next_announce;
1453 }
1454
1455 uint64_t packet_stub[128 / 8];
1456 struct dp_packet packet;
1457 dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
1458 compose_nd_ra(&packet, ra->config->eth_src, ra->config->eth_dst,
1459 &ra->config->ipv6_src, &ra->config->ipv6_dst,
1460 255, ra->config->mo_flags, htons(IPV6_ND_RA_LIFETIME), 0, 0,
1461 ra->config->mtu);
1462
1463 for (int i = 0; i < ra->config->prefixes.n_ipv6_addrs; i++) {
1464 ovs_be128 addr;
1465 memcpy(&addr, &ra->config->prefixes.ipv6_addrs[i].addr, sizeof addr);
1466 packet_put_ra_prefix_opt(&packet,
1467 ra->config->prefixes.ipv6_addrs[i].plen,
1468 ra->config->la_flags, htonl(IPV6_ND_RA_OPT_PREFIX_VALID_LIFETIME),
1469 htonl(IPV6_ND_RA_OPT_PREFIX_PREFERRED_LIFETIME), addr);
1470 }
1471
1472 uint64_t ofpacts_stub[4096 / 8];
1473 struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
1474
1475 /* Set MFF_LOG_DATAPATH and MFF_LOG_INPORT. */
1476 uint32_t dp_key = ra->metadata;
1477 uint32_t port_key = ra->port_key;
1478 put_load(dp_key, MFF_LOG_DATAPATH, 0, 64, &ofpacts);
1479 put_load(port_key, MFF_LOG_INPORT, 0, 32, &ofpacts);
1480 put_load(1, MFF_LOG_FLAGS, MLF_LOCAL_ONLY_BIT, 1, &ofpacts);
1481 struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&ofpacts);
1482 resubmit->in_port = OFPP_CONTROLLER;
1483 resubmit->table_id = OFTABLE_LOG_INGRESS_PIPELINE;
1484
1485 struct ofputil_packet_out po = {
1486 .packet = dp_packet_data(&packet),
1487 .packet_len = dp_packet_size(&packet),
1488 .buffer_id = UINT32_MAX,
1489 .ofpacts = ofpacts.data,
1490 .ofpacts_len = ofpacts.size,
1491 };
1492
1493 match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
1494 enum ofp_version version = rconn_get_version(swconn);
1495 enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
1496 queue_msg(ofputil_encode_packet_out(&po, proto));
1497 dp_packet_uninit(&packet);
1498 ofpbuf_uninit(&ofpacts);
1499
1500 ra->next_announce = ipv6_ra_calc_next_announce(ra->config->min_interval,
1501 ra->config->max_interval);
1502
1503 return ra->next_announce;
1504 }
1505
1506 static void
1507 ipv6_ra_wait(void)
1508 {
1509 poll_timer_wait_until(send_ipv6_ra_time);
1510 }
1511
1512 static void
1513 send_ipv6_ras(struct ovsdb_idl_index *sbrec_port_binding_by_datapath,
1514 struct ovsdb_idl_index *sbrec_port_binding_by_name,
1515 const struct hmap *local_datapaths)
1516 {
1517 struct shash_node *iter, *iter_next;
1518
1519 send_ipv6_ra_time = LLONG_MAX;
1520
1521 SHASH_FOR_EACH (iter, &ipv6_ras) {
1522 struct ipv6_ra_state *ra = iter->data;
1523 ra->delete_me = true;
1524 }
1525
1526 const struct local_datapath *ld;
1527 HMAP_FOR_EACH (ld, hmap_node, local_datapaths) {
1528 struct sbrec_port_binding *target = sbrec_port_binding_index_init_row(
1529 sbrec_port_binding_by_datapath);
1530 sbrec_port_binding_index_set_datapath(target, ld->datapath);
1531
1532 struct sbrec_port_binding *pb;
1533 SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, target,
1534 sbrec_port_binding_by_datapath) {
1535 if (!smap_get_bool(&pb->options, "ipv6_ra_send_periodic", false)) {
1536 continue;
1537 }
1538
1539 const char *peer_s = smap_get(&pb->options, "peer");
1540 if (!peer_s) {
1541 continue;
1542 }
1543
1544 const struct sbrec_port_binding *peer
1545 = lport_lookup_by_name(sbrec_port_binding_by_name, peer_s);
1546 if (!peer) {
1547 continue;
1548 }
1549
1550 struct ipv6_ra_config *config = ipv6_ra_update_config(pb);
1551 if (!config) {
1552 continue;
1553 }
1554
1555 struct ipv6_ra_state *ra
1556 = shash_find_data(&ipv6_ras, pb->logical_port);
1557 if (!ra) {
1558 ra = xzalloc(sizeof *ra);
1559 ra->config = config;
1560 ra->next_announce = ipv6_ra_calc_next_announce(
1561 ra->config->min_interval,
1562 ra->config->max_interval);
1563 shash_add(&ipv6_ras, pb->logical_port, ra);
1564 } else {
1565 ipv6_ra_config_delete(ra->config);
1566 ra->config = config;
1567 }
1568
1569 /* Peer is the logical switch port that the logical
1570 * router port is connected to. The RA is injected
1571 * into that logical switch port.
1572 */
1573 ra->port_key = peer->tunnel_key;
1574 ra->metadata = peer->datapath->tunnel_key;
1575 ra->delete_me = false;
1576
1577 long long int next_ra = ipv6_ra_send(ra);
1578 if (send_ipv6_ra_time > next_ra) {
1579 send_ipv6_ra_time = next_ra;
1580 }
1581 }
1582 sbrec_port_binding_index_destroy_row(target);
1583 }
1584
1585 /* Remove those that are no longer in the SB database */
1586 SHASH_FOR_EACH_SAFE (iter, iter_next, &ipv6_ras) {
1587 struct ipv6_ra_state *ra = iter->data;
1588 if (ra->delete_me) {
1589 shash_delete(&ipv6_ras, iter);
1590 ipv6_ra_delete(ra);
1591 }
1592 }
1593 }
1594
1595 void
1596 pinctrl_wait(struct controller_ctx *ctx)
1597 {
1598 wait_put_mac_bindings(ctx);
1599 rconn_run_wait(swconn);
1600 rconn_recv_wait(swconn);
1601 send_garp_wait();
1602 ipv6_ra_wait();
1603 }
1604
1605 void
1606 pinctrl_destroy(void)
1607 {
1608 rconn_destroy(swconn);
1609 destroy_put_mac_bindings();
1610 destroy_send_garps();
1611 destroy_ipv6_ras();
1612 }
1613 \f
1614 /* Implementation of the "put_arp" and "put_nd" OVN actions. These
1615 * actions send a packet to ovn-controller, using the flow as an API
1616 * (see actions.h for details). This code implements the actions by
1617 * updating the MAC_Binding table in the southbound database.
1618 *
1619 * This code could be a lot simpler if the database could always be updated,
1620 * but in fact we can only update it when ctx->ovnsb_idl_txn is nonnull. Thus,
1621 * we buffer up a few put_mac_bindings (but we don't keep them longer
1622 * than 1 second) and apply them whenever a database transaction is
1623 * available. */
1624
1625 /* Buffered "put_mac_binding" operation. */
1626 struct put_mac_binding {
1627 struct hmap_node hmap_node; /* In 'put_mac_bindings'. */
1628
1629 long long int timestamp; /* In milliseconds. */
1630
1631 /* Key. */
1632 uint32_t dp_key;
1633 uint32_t port_key;
1634 char ip_s[INET6_ADDRSTRLEN + 1];
1635
1636 /* Value. */
1637 struct eth_addr mac;
1638 };
1639
1640 /* Contains "struct put_mac_binding"s. */
1641 static struct hmap put_mac_bindings;
1642
1643 static void
1644 init_put_mac_bindings(void)
1645 {
1646 hmap_init(&put_mac_bindings);
1647 }
1648
1649 static void
1650 destroy_put_mac_bindings(void)
1651 {
1652 flush_put_mac_bindings();
1653 hmap_destroy(&put_mac_bindings);
1654 }
1655
1656 static struct put_mac_binding *
1657 pinctrl_find_put_mac_binding(uint32_t dp_key, uint32_t port_key,
1658 const char *ip_s, uint32_t hash)
1659 {
1660 struct put_mac_binding *pa;
1661 HMAP_FOR_EACH_WITH_HASH (pa, hmap_node, hash, &put_mac_bindings) {
1662 if (pa->dp_key == dp_key
1663 && pa->port_key == port_key
1664 && !strcmp(pa->ip_s, ip_s)) {
1665 return pa;
1666 }
1667 }
1668 return NULL;
1669 }
1670
1671 static void
1672 pinctrl_handle_put_mac_binding(const struct flow *md,
1673 const struct flow *headers, bool is_arp)
1674 {
1675 uint32_t dp_key = ntohll(md->metadata);
1676 uint32_t port_key = md->regs[MFF_LOG_INPORT - MFF_REG0];
1677 char ip_s[INET6_ADDRSTRLEN];
1678
1679 if (is_arp) {
1680 ovs_be32 ip = htonl(md->regs[0]);
1681 inet_ntop(AF_INET, &ip, ip_s, sizeof(ip_s));
1682 } else {
1683 ovs_be128 ip6 = hton128(flow_get_xxreg(md, 0));
1684 inet_ntop(AF_INET6, &ip6, ip_s, sizeof(ip_s));
1685 }
1686 uint32_t hash = hash_string(ip_s, hash_2words(dp_key, port_key));
1687 struct put_mac_binding *pmb
1688 = pinctrl_find_put_mac_binding(dp_key, port_key, ip_s, hash);
1689 if (!pmb) {
1690 if (hmap_count(&put_mac_bindings) >= 1000) {
1691 COVERAGE_INC(pinctrl_drop_put_mac_binding);
1692 return;
1693 }
1694
1695 pmb = xmalloc(sizeof *pmb);
1696 hmap_insert(&put_mac_bindings, &pmb->hmap_node, hash);
1697 pmb->dp_key = dp_key;
1698 pmb->port_key = port_key;
1699 ovs_strlcpy_arrays(pmb->ip_s, ip_s);
1700 }
1701 pmb->timestamp = time_msec();
1702 pmb->mac = headers->dl_src;
1703 }
1704
1705 static void
1706 run_put_mac_binding(struct controller_ctx *ctx,
1707 struct ovsdb_idl_index *sbrec_datapath_binding_by_key,
1708 struct ovsdb_idl_index *sbrec_port_binding_by_key,
1709 const struct sbrec_mac_binding_table *mac_binding_table,
1710 const struct put_mac_binding *pmb)
1711 {
1712 if (time_msec() > pmb->timestamp + 1000) {
1713 return;
1714 }
1715
1716 /* Convert logical datapath and logical port key into lport. */
1717 const struct sbrec_port_binding *pb = lport_lookup_by_key(
1718 sbrec_datapath_binding_by_key, sbrec_port_binding_by_key,
1719 pmb->dp_key, pmb->port_key);
1720 if (!pb) {
1721 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1722
1723 VLOG_WARN_RL(&rl, "unknown logical port with datapath %"PRIu32" "
1724 "and port %"PRIu32, pmb->dp_key, pmb->port_key);
1725 return;
1726 }
1727
1728 /* Convert ethernet argument to string form for database. */
1729 char mac_string[ETH_ADDR_STRLEN + 1];
1730 snprintf(mac_string, sizeof mac_string,
1731 ETH_ADDR_FMT, ETH_ADDR_ARGS(pmb->mac));
1732
1733 /* Check for an update an existing IP-MAC binding for this logical
1734 * port.
1735 *
1736 * XXX This is not very efficient. */
1737 const struct sbrec_mac_binding *b;
1738 SBREC_MAC_BINDING_TABLE_FOR_EACH (b, mac_binding_table) {
1739 if (!strcmp(b->logical_port, pb->logical_port)
1740 && !strcmp(b->ip, pmb->ip_s)) {
1741 if (strcmp(b->mac, mac_string)) {
1742 sbrec_mac_binding_set_mac(b, mac_string);
1743 }
1744 return;
1745 }
1746 }
1747
1748 /* Add new IP-MAC binding for this logical port. */
1749 b = sbrec_mac_binding_insert(ctx->ovnsb_idl_txn);
1750 sbrec_mac_binding_set_logical_port(b, pb->logical_port);
1751 sbrec_mac_binding_set_ip(b, pmb->ip_s);
1752 sbrec_mac_binding_set_mac(b, mac_string);
1753 sbrec_mac_binding_set_datapath(b, pb->datapath);
1754 }
1755
1756 static void
1757 run_put_mac_bindings(struct controller_ctx *ctx,
1758 struct ovsdb_idl_index *sbrec_datapath_binding_by_key,
1759 struct ovsdb_idl_index *sbrec_port_binding_by_key,
1760 const struct sbrec_mac_binding_table *mac_binding_table)
1761 {
1762 if (!ctx->ovnsb_idl_txn) {
1763 return;
1764 }
1765
1766 const struct put_mac_binding *pmb;
1767 HMAP_FOR_EACH (pmb, hmap_node, &put_mac_bindings) {
1768 run_put_mac_binding(ctx, sbrec_datapath_binding_by_key,
1769 sbrec_port_binding_by_key, mac_binding_table, pmb);
1770 }
1771 flush_put_mac_bindings();
1772 }
1773
1774 static void
1775 wait_put_mac_bindings(struct controller_ctx *ctx)
1776 {
1777 if (ctx->ovnsb_idl_txn && !hmap_is_empty(&put_mac_bindings)) {
1778 poll_immediate_wake();
1779 }
1780 }
1781
1782 static void
1783 flush_put_mac_bindings(void)
1784 {
1785 struct put_mac_binding *pmb;
1786 HMAP_FOR_EACH_POP (pmb, hmap_node, &put_mac_bindings) {
1787 free(pmb);
1788 }
1789 }
1790 \f
1791 /*
1792 * Send gratuitous ARP for vif on localnet.
1793 *
1794 * When a new vif on localnet is added, gratuitous ARPs are sent announcing
1795 * the port's mac,ip mapping. On localnet, such announcements are needed for
1796 * switches and routers on the broadcast segment to update their port-mac
1797 * and ARP tables.
1798 */
1799 struct garp_data {
1800 struct eth_addr ea; /* Ethernet address of port. */
1801 ovs_be32 ipv4; /* Ipv4 address of port. */
1802 long long int announce_time; /* Next announcement in ms. */
1803 int backoff; /* Backoff for the next announcement. */
1804 ofp_port_t ofport; /* ofport used to output this GARP. */
1805 int tag; /* VLAN tag of this GARP packet, or -1. */
1806 };
1807
1808 /* Contains GARPs to be sent. */
1809 static struct shash send_garp_data;
1810
1811 /* Next GARP announcement in ms. */
1812 static long long int send_garp_time;
1813
1814 static void
1815 init_send_garps(void)
1816 {
1817 shash_init(&send_garp_data);
1818 send_garp_time = LLONG_MAX;
1819 }
1820
1821 static void
1822 destroy_send_garps(void)
1823 {
1824 shash_destroy_free_data(&send_garp_data);
1825 }
1826
1827 static void
1828 add_garp(const char *name, ofp_port_t ofport, int tag,
1829 const struct eth_addr ea, ovs_be32 ip)
1830 {
1831 struct garp_data *garp = xmalloc(sizeof *garp);
1832 garp->ea = ea;
1833 garp->ipv4 = ip;
1834 garp->announce_time = time_msec() + 1000;
1835 garp->backoff = 1;
1836 garp->ofport = ofport;
1837 garp->tag = tag;
1838 shash_add(&send_garp_data, name, garp);
1839 }
1840
1841 /* Add or update a vif for which GARPs need to be announced. */
1842 static void
1843 send_garp_update(const struct sbrec_port_binding *binding_rec,
1844 struct simap *localnet_ofports,
1845 const struct hmap *local_datapaths,
1846 struct shash *nat_addresses)
1847 {
1848 /* Find the localnet ofport to send this GARP. */
1849 struct local_datapath *ld
1850 = get_local_datapath(local_datapaths,
1851 binding_rec->datapath->tunnel_key);
1852 if (!ld || !ld->localnet_port) {
1853 return;
1854 }
1855 ofp_port_t ofport = u16_to_ofp(simap_get(localnet_ofports,
1856 ld->localnet_port->logical_port));
1857 int tag = ld->localnet_port->n_tag ? *ld->localnet_port->tag : -1;
1858
1859 volatile struct garp_data *garp = NULL;
1860 /* Update GARP for NAT IP if it exists. Consider port bindings with type
1861 * "l3gateway" for logical switch ports attached to gateway routers, and
1862 * port bindings with type "patch" for logical switch ports attached to
1863 * distributed gateway ports. */
1864 if (!strcmp(binding_rec->type, "l3gateway")
1865 || !strcmp(binding_rec->type, "patch")) {
1866 struct lport_addresses *laddrs = NULL;
1867 while ((laddrs = shash_find_and_delete(nat_addresses,
1868 binding_rec->logical_port))) {
1869 int i;
1870 for (i = 0; i < laddrs->n_ipv4_addrs; i++) {
1871 char *name = xasprintf("%s-%s", binding_rec->logical_port,
1872 laddrs->ipv4_addrs[i].addr_s);
1873 garp = shash_find_data(&send_garp_data, name);
1874 if (garp) {
1875 garp->ofport = ofport;
1876 garp->tag = tag;
1877 } else {
1878 add_garp(name, ofport, tag, laddrs->ea,
1879 laddrs->ipv4_addrs[i].addr);
1880 }
1881 free(name);
1882 }
1883 destroy_lport_addresses(laddrs);
1884 free(laddrs);
1885 }
1886 return;
1887 }
1888
1889 /* Update GARP for vif if it exists. */
1890 garp = shash_find_data(&send_garp_data, binding_rec->logical_port);
1891 if (garp) {
1892 garp->ofport = ofport;
1893 return;
1894 }
1895
1896 /* Add GARP for new vif. */
1897 int i;
1898 for (i = 0; i < binding_rec->n_mac; i++) {
1899 struct lport_addresses laddrs;
1900 if (!extract_lsp_addresses(binding_rec->mac[i], &laddrs)
1901 || !laddrs.n_ipv4_addrs) {
1902 continue;
1903 }
1904
1905 add_garp(binding_rec->logical_port, ofport, tag,
1906 laddrs.ea, laddrs.ipv4_addrs[0].addr);
1907
1908 destroy_lport_addresses(&laddrs);
1909 break;
1910 }
1911 }
1912
1913 /* Remove a vif from GARP announcements. */
1914 static void
1915 send_garp_delete(const char *lport)
1916 {
1917 struct garp_data *garp = shash_find_and_delete(&send_garp_data, lport);
1918 free(garp);
1919 }
1920
1921 static long long int
1922 send_garp(struct garp_data *garp, long long int current_time)
1923 {
1924 if (current_time < garp->announce_time) {
1925 return garp->announce_time;
1926 }
1927
1928 /* Compose a GARP request packet. */
1929 uint64_t packet_stub[128 / 8];
1930 struct dp_packet packet;
1931 dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
1932 compose_arp(&packet, ARP_OP_REQUEST, garp->ea, eth_addr_zero,
1933 true, garp->ipv4, garp->ipv4);
1934
1935 /* Compose a GARP request packet's vlan if exist. */
1936 if (garp->tag >= 0) {
1937 eth_push_vlan(&packet, htons(ETH_TYPE_VLAN), htons(garp->tag));
1938 }
1939
1940 /* Compose actions. The garp request is output on localnet ofport. */
1941 uint64_t ofpacts_stub[4096 / 8];
1942 struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
1943 enum ofp_version version = rconn_get_version(swconn);
1944 ofpact_put_OUTPUT(&ofpacts)->port = garp->ofport;
1945
1946 struct ofputil_packet_out po = {
1947 .packet = dp_packet_data(&packet),
1948 .packet_len = dp_packet_size(&packet),
1949 .buffer_id = UINT32_MAX,
1950 .ofpacts = ofpacts.data,
1951 .ofpacts_len = ofpacts.size,
1952 };
1953 match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
1954 enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
1955 queue_msg(ofputil_encode_packet_out(&po, proto));
1956 dp_packet_uninit(&packet);
1957 ofpbuf_uninit(&ofpacts);
1958
1959 /* Set the next announcement. At most 5 announcements are sent for a
1960 * vif. */
1961 if (garp->backoff < 16) {
1962 garp->backoff *= 2;
1963 garp->announce_time = current_time + garp->backoff * 1000;
1964 } else {
1965 garp->announce_time = LLONG_MAX;
1966 }
1967 return garp->announce_time;
1968 }
1969
1970 /* Get localnet vifs, local l3gw ports and ofport for localnet patch ports. */
1971 static void
1972 get_localnet_vifs_l3gwports(
1973 struct ovsdb_idl_index *sbrec_port_binding_by_datapath,
1974 struct ovsdb_idl_index *sbrec_port_binding_by_name,
1975 const struct ovsrec_bridge *br_int,
1976 const struct sbrec_chassis *chassis,
1977 const struct hmap *local_datapaths,
1978 struct sset *localnet_vifs,
1979 struct simap *localnet_ofports,
1980 struct sset *local_l3gw_ports)
1981 {
1982 for (int i = 0; i < br_int->n_ports; i++) {
1983 const struct ovsrec_port *port_rec = br_int->ports[i];
1984 if (!strcmp(port_rec->name, br_int->name)) {
1985 continue;
1986 }
1987 const char *chassis_id = smap_get(&port_rec->external_ids,
1988 "ovn-chassis-id");
1989 if (chassis_id && !strcmp(chassis_id, chassis->name)) {
1990 continue;
1991 }
1992 const char *localnet = smap_get(&port_rec->external_ids,
1993 "ovn-localnet-port");
1994 for (int j = 0; j < port_rec->n_interfaces; j++) {
1995 const struct ovsrec_interface *iface_rec = port_rec->interfaces[j];
1996 if (!iface_rec->n_ofport) {
1997 continue;
1998 }
1999 /* Get localnet port with its ofport. */
2000 if (localnet) {
2001 int64_t ofport = iface_rec->ofport[0];
2002 if (ofport < 1 || ofport > ofp_to_u16(OFPP_MAX)) {
2003 continue;
2004 }
2005 simap_put(localnet_ofports, localnet, ofport);
2006 continue;
2007 }
2008 /* Get localnet vif. */
2009 const char *iface_id = smap_get(&iface_rec->external_ids,
2010 "iface-id");
2011 if (!iface_id) {
2012 continue;
2013 }
2014 const struct sbrec_port_binding *pb
2015 = lport_lookup_by_name(sbrec_port_binding_by_name, iface_id);
2016 if (!pb) {
2017 continue;
2018 }
2019 struct local_datapath *ld
2020 = get_local_datapath(local_datapaths,
2021 pb->datapath->tunnel_key);
2022 if (ld && ld->localnet_port) {
2023 sset_add(localnet_vifs, iface_id);
2024 }
2025 }
2026 }
2027
2028 struct sbrec_port_binding *target = sbrec_port_binding_index_init_row(
2029 sbrec_port_binding_by_datapath);
2030
2031 const struct local_datapath *ld;
2032 HMAP_FOR_EACH (ld, hmap_node, local_datapaths) {
2033 const struct sbrec_port_binding *pb;
2034
2035 if (!ld->localnet_port) {
2036 continue;
2037 }
2038
2039 /* Get l3gw ports. Consider port bindings with type "l3gateway"
2040 * that connect to gateway routers (if local), and consider port
2041 * bindings of type "patch" since they might connect to
2042 * distributed gateway ports with NAT addresses. */
2043
2044 sbrec_port_binding_index_set_datapath(target, ld->datapath);
2045 SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, target,
2046 sbrec_port_binding_by_datapath) {
2047 if ((ld->has_local_l3gateway && !strcmp(pb->type, "l3gateway"))
2048 || !strcmp(pb->type, "patch")) {
2049 sset_add(local_l3gw_ports, pb->logical_port);
2050 }
2051 }
2052 }
2053 sbrec_port_binding_index_destroy_row(target);
2054 }
2055
2056 static bool
2057 pinctrl_is_chassis_resident(struct ovsdb_idl_index *sbrec_port_binding_by_name,
2058 const struct sbrec_chassis *chassis,
2059 const struct chassis_index *chassis_index,
2060 const struct sset *active_tunnels,
2061 const char *port_name)
2062 {
2063 const struct sbrec_port_binding *pb
2064 = lport_lookup_by_name(sbrec_port_binding_by_name, port_name);
2065 if (!pb || !pb->chassis) {
2066 return false;
2067 }
2068 if (strcmp(pb->type, "chassisredirect")) {
2069 return pb->chassis == chassis;
2070 } else {
2071 struct ovs_list *gateway_chassis =
2072 gateway_chassis_get_ordered(pb, chassis_index);
2073 bool active = gateway_chassis_is_active(gateway_chassis,
2074 chassis,
2075 active_tunnels);
2076 gateway_chassis_destroy(gateway_chassis);
2077 return active;
2078 }
2079 }
2080
2081 /* Extracts the mac, IPv4 and IPv6 addresses, and logical port from
2082 * 'addresses' which should be of the format 'MAC [IP1 IP2 ..]
2083 * [is_chassis_resident("LPORT_NAME")]', where IPn should be a valid IPv4
2084 * or IPv6 address, and stores them in the 'ipv4_addrs' and 'ipv6_addrs'
2085 * fields of 'laddrs'. The logical port name is stored in 'lport'.
2086 *
2087 * Returns true if at least 'MAC' is found in 'address', false otherwise.
2088 *
2089 * The caller must call destroy_lport_addresses() and free(*lport). */
2090 static bool
2091 extract_addresses_with_port(const char *addresses,
2092 struct lport_addresses *laddrs,
2093 char **lport)
2094 {
2095 int ofs;
2096 if (!extract_addresses(addresses, laddrs, &ofs)) {
2097 return false;
2098 } else if (ofs >= strlen(addresses)) {
2099 return true;
2100 }
2101
2102 struct lexer lexer;
2103 lexer_init(&lexer, addresses + ofs);
2104 lexer_get(&lexer);
2105
2106 if (lexer.error || lexer.token.type != LEX_T_ID
2107 || !lexer_match_id(&lexer, "is_chassis_resident")) {
2108 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
2109 VLOG_INFO_RL(&rl, "invalid syntax '%s' in address", addresses);
2110 lexer_destroy(&lexer);
2111 return true;
2112 }
2113
2114 if (!lexer_match(&lexer, LEX_T_LPAREN)) {
2115 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
2116 VLOG_INFO_RL(&rl, "Syntax error: expecting '(' after "
2117 "'is_chassis_resident' in address '%s'", addresses);
2118 lexer_destroy(&lexer);
2119 return false;
2120 }
2121
2122 if (lexer.token.type != LEX_T_STRING) {
2123 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
2124 VLOG_INFO_RL(&rl,
2125 "Syntax error: expecting quoted string after"
2126 " 'is_chassis_resident' in address '%s'", addresses);
2127 lexer_destroy(&lexer);
2128 return false;
2129 }
2130
2131 *lport = xstrdup(lexer.token.s);
2132
2133 lexer_get(&lexer);
2134 if (!lexer_match(&lexer, LEX_T_RPAREN)) {
2135 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
2136 VLOG_INFO_RL(&rl, "Syntax error: expecting ')' after quoted string in "
2137 "'is_chassis_resident()' in address '%s'",
2138 addresses);
2139 lexer_destroy(&lexer);
2140 return false;
2141 }
2142
2143 lexer_destroy(&lexer);
2144 return true;
2145 }
2146
2147 static void
2148 consider_nat_address(struct ovsdb_idl_index *sbrec_port_binding_by_name,
2149 const char *nat_address,
2150 const struct sbrec_port_binding *pb,
2151 struct sset *nat_address_keys,
2152 const struct sbrec_chassis *chassis,
2153 const struct chassis_index *chassis_index,
2154 const struct sset *active_tunnels,
2155 struct shash *nat_addresses)
2156 {
2157 struct lport_addresses *laddrs = xmalloc(sizeof *laddrs);
2158 char *lport = NULL;
2159 if (!extract_addresses_with_port(nat_address, laddrs, &lport)
2160 || (!lport && !strcmp(pb->type, "patch"))
2161 || (lport && !pinctrl_is_chassis_resident(
2162 sbrec_port_binding_by_name, chassis, chassis_index,
2163 active_tunnels, lport))) {
2164 destroy_lport_addresses(laddrs);
2165 free(laddrs);
2166 free(lport);
2167 return;
2168 }
2169 free(lport);
2170
2171 int i;
2172 for (i = 0; i < laddrs->n_ipv4_addrs; i++) {
2173 char *name = xasprintf("%s-%s", pb->logical_port,
2174 laddrs->ipv4_addrs[i].addr_s);
2175 sset_add(nat_address_keys, name);
2176 free(name);
2177 }
2178 shash_add(nat_addresses, pb->logical_port, laddrs);
2179 }
2180
2181 static void
2182 get_nat_addresses_and_keys(struct ovsdb_idl_index *sbrec_port_binding_by_name,
2183 struct sset *nat_address_keys,
2184 struct sset *local_l3gw_ports,
2185 const struct sbrec_chassis *chassis,
2186 const struct chassis_index *chassis_index,
2187 const struct sset *active_tunnels,
2188 struct shash *nat_addresses)
2189 {
2190 const char *gw_port;
2191 SSET_FOR_EACH(gw_port, local_l3gw_ports) {
2192 const struct sbrec_port_binding *pb;
2193
2194 pb = lport_lookup_by_name(sbrec_port_binding_by_name, gw_port);
2195 if (!pb) {
2196 continue;
2197 }
2198
2199 if (pb->n_nat_addresses) {
2200 for (int i = 0; i < pb->n_nat_addresses; i++) {
2201 consider_nat_address(sbrec_port_binding_by_name,
2202 pb->nat_addresses[i], pb,
2203 nat_address_keys, chassis,
2204 chassis_index, active_tunnels,
2205 nat_addresses);
2206 }
2207 } else {
2208 /* Continue to support options:nat-addresses for version
2209 * upgrade. */
2210 const char *nat_addresses_options = smap_get(&pb->options,
2211 "nat-addresses");
2212 if (nat_addresses_options) {
2213 consider_nat_address(sbrec_port_binding_by_name,
2214 nat_addresses_options, pb,
2215 nat_address_keys, chassis,
2216 chassis_index, active_tunnels,
2217 nat_addresses);
2218 }
2219 }
2220 }
2221 }
2222
2223 static void
2224 send_garp_wait(void)
2225 {
2226 poll_timer_wait_until(send_garp_time);
2227 }
2228
2229 static void
2230 send_garp_run(struct ovsdb_idl_index *sbrec_port_binding_by_datapath,
2231 struct ovsdb_idl_index *sbrec_port_binding_by_name,
2232 const struct ovsrec_bridge *br_int,
2233 const struct sbrec_chassis *chassis,
2234 const struct chassis_index *chassis_index,
2235 const struct hmap *local_datapaths,
2236 const struct sset *active_tunnels)
2237 {
2238 struct sset localnet_vifs = SSET_INITIALIZER(&localnet_vifs);
2239 struct sset local_l3gw_ports = SSET_INITIALIZER(&local_l3gw_ports);
2240 struct sset nat_ip_keys = SSET_INITIALIZER(&nat_ip_keys);
2241 struct simap localnet_ofports = SIMAP_INITIALIZER(&localnet_ofports);
2242 struct shash nat_addresses;
2243
2244 shash_init(&nat_addresses);
2245
2246 get_localnet_vifs_l3gwports(sbrec_port_binding_by_datapath,
2247 sbrec_port_binding_by_name,
2248 br_int, chassis, local_datapaths,
2249 &localnet_vifs, &localnet_ofports,
2250 &local_l3gw_ports);
2251
2252 get_nat_addresses_and_keys(sbrec_port_binding_by_name,
2253 &nat_ip_keys, &local_l3gw_ports,
2254 chassis, chassis_index, active_tunnels,
2255 &nat_addresses);
2256 /* For deleted ports and deleted nat ips, remove from send_garp_data. */
2257 struct shash_node *iter, *next;
2258 SHASH_FOR_EACH_SAFE (iter, next, &send_garp_data) {
2259 if (!sset_contains(&localnet_vifs, iter->name) &&
2260 !sset_contains(&nat_ip_keys, iter->name)) {
2261 send_garp_delete(iter->name);
2262 }
2263 }
2264
2265 /* Update send_garp_data. */
2266 const char *iface_id;
2267 SSET_FOR_EACH (iface_id, &localnet_vifs) {
2268 const struct sbrec_port_binding *pb = lport_lookup_by_name(
2269 sbrec_port_binding_by_name, iface_id);
2270 if (pb) {
2271 send_garp_update(pb, &localnet_ofports, local_datapaths,
2272 &nat_addresses);
2273 }
2274 }
2275
2276 /* Update send_garp_data for nat-addresses. */
2277 const char *gw_port;
2278 SSET_FOR_EACH (gw_port, &local_l3gw_ports) {
2279 const struct sbrec_port_binding *pb
2280 = lport_lookup_by_name(sbrec_port_binding_by_name, gw_port);
2281 if (pb) {
2282 send_garp_update(pb, &localnet_ofports, local_datapaths,
2283 &nat_addresses);
2284 }
2285 }
2286
2287 /* Send GARPs, and update the next announcement. */
2288 long long int current_time = time_msec();
2289 send_garp_time = LLONG_MAX;
2290 SHASH_FOR_EACH (iter, &send_garp_data) {
2291 long long int next_announce = send_garp(iter->data, current_time);
2292 if (send_garp_time > next_announce) {
2293 send_garp_time = next_announce;
2294 }
2295 }
2296 sset_destroy(&localnet_vifs);
2297 sset_destroy(&local_l3gw_ports);
2298 simap_destroy(&localnet_ofports);
2299
2300 SHASH_FOR_EACH_SAFE (iter, next, &nat_addresses) {
2301 struct lport_addresses *laddrs = iter->data;
2302 destroy_lport_addresses(laddrs);
2303 shash_delete(&nat_addresses, iter);
2304 free(laddrs);
2305 }
2306 shash_destroy(&nat_addresses);
2307
2308 sset_destroy(&nat_ip_keys);
2309 }
2310
2311 static void
2312 reload_metadata(struct ofpbuf *ofpacts, const struct match *md)
2313 {
2314 enum mf_field_id md_fields[] = {
2315 #if FLOW_N_REGS == 16
2316 MFF_REG0,
2317 MFF_REG1,
2318 MFF_REG2,
2319 MFF_REG3,
2320 MFF_REG4,
2321 MFF_REG5,
2322 MFF_REG6,
2323 MFF_REG7,
2324 MFF_REG8,
2325 MFF_REG9,
2326 MFF_REG10,
2327 MFF_REG11,
2328 MFF_REG12,
2329 MFF_REG13,
2330 MFF_REG14,
2331 MFF_REG15,
2332 #else
2333 #error
2334 #endif
2335 MFF_METADATA,
2336 };
2337 for (size_t i = 0; i < ARRAY_SIZE(md_fields); i++) {
2338 const struct mf_field *field = mf_from_id(md_fields[i]);
2339 if (!mf_is_all_wild(field, &md->wc)) {
2340 union mf_value value;
2341 mf_get_value(field, &md->flow, &value);
2342 ofpact_put_set_field(ofpacts, field, &value, NULL);
2343 }
2344 }
2345 }
2346
2347 static void
2348 pinctrl_handle_nd_na(const struct flow *ip_flow, const struct match *md,
2349 struct ofpbuf *userdata, bool is_router)
2350 {
2351 /* This action only works for IPv6 ND packets, and the switch should only
2352 * send us ND packets this way, but check here just to be sure. */
2353 if (!is_nd(ip_flow, NULL)) {
2354 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
2355 VLOG_WARN_RL(&rl, "NA action on non-ND packet");
2356 return;
2357 }
2358
2359 uint64_t packet_stub[128 / 8];
2360 struct dp_packet packet;
2361 dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
2362
2363 /* These flags are not exactly correct. Look at section 7.2.4
2364 * of RFC 4861. */
2365 uint32_t rso_flags = ND_RSO_SOLICITED | ND_RSO_OVERRIDE;
2366 if (is_router) {
2367 rso_flags |= ND_RSO_ROUTER;
2368 }
2369 compose_nd_na(&packet, ip_flow->dl_dst, ip_flow->dl_src,
2370 &ip_flow->nd_target, &ip_flow->ipv6_src,
2371 htonl(rso_flags));
2372
2373 /* Reload previous packet metadata and set actions from userdata. */
2374 set_actions_and_enqueue_msg(&packet, md, userdata);
2375 dp_packet_uninit(&packet);
2376 }
2377
2378 static void
2379 pinctrl_handle_nd_ns(const struct flow *ip_flow, const struct match *md,
2380 struct ofpbuf *userdata)
2381 {
2382 /* This action only works for IPv6 packets. */
2383 if (get_dl_type(ip_flow) != htons(ETH_TYPE_IPV6)) {
2384 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
2385 VLOG_WARN_RL(&rl, "NS action on non-IPv6 packet");
2386 return;
2387 }
2388
2389 uint64_t packet_stub[128 / 8];
2390 struct dp_packet packet;
2391 dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
2392
2393 compose_nd_ns(&packet, ip_flow->dl_src, &ip_flow->ipv6_src,
2394 &ip_flow->ipv6_dst);
2395
2396 /* Reload previous packet metadata and set actions from userdata. */
2397 set_actions_and_enqueue_msg(&packet, md, userdata);
2398 dp_packet_uninit(&packet);
2399 }
2400
2401 static void
2402 pinctrl_handle_put_nd_ra_opts(
2403 const struct flow *in_flow, struct dp_packet *pkt_in,
2404 struct ofputil_packet_in *pin, struct ofpbuf *userdata,
2405 struct ofpbuf *continuation)
2406 {
2407 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
2408 enum ofp_version version = rconn_get_version(swconn);
2409 enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
2410 struct dp_packet *pkt_out_ptr = NULL;
2411 uint32_t success = 0;
2412
2413 /* Parse result field. */
2414 const struct mf_field *f;
2415 enum ofperr ofperr = nx_pull_header(userdata, NULL, &f, NULL);
2416 if (ofperr) {
2417 VLOG_WARN_RL(&rl, "bad result OXM (%s)", ofperr_to_string(ofperr));
2418 goto exit;
2419 }
2420
2421 /* Parse result offset. */
2422 ovs_be32 *ofsp = ofpbuf_try_pull(userdata, sizeof *ofsp);
2423 if (!ofsp) {
2424 VLOG_WARN_RL(&rl, "offset not present in the userdata");
2425 goto exit;
2426 }
2427
2428 /* Check that the result is valid and writable. */
2429 struct mf_subfield dst = { .field = f, .ofs = ntohl(*ofsp), .n_bits = 1 };
2430 ofperr = mf_check_dst(&dst, NULL);
2431 if (ofperr) {
2432 VLOG_WARN_RL(&rl, "bad result bit (%s)", ofperr_to_string(ofperr));
2433 goto exit;
2434 }
2435
2436 if (!userdata->size) {
2437 VLOG_WARN_RL(&rl, "IPv6 ND RA options not present in the userdata");
2438 goto exit;
2439 }
2440
2441 if (!is_icmpv6(in_flow, NULL) || in_flow->tp_dst != htons(0) ||
2442 in_flow->tp_src != htons(ND_ROUTER_SOLICIT)) {
2443 VLOG_WARN_RL(&rl, "put_nd_ra action on invalid or unsupported packet");
2444 goto exit;
2445 }
2446
2447 size_t new_packet_size = pkt_in->l4_ofs + userdata->size;
2448 struct dp_packet pkt_out;
2449 dp_packet_init(&pkt_out, new_packet_size);
2450 dp_packet_clear(&pkt_out);
2451 dp_packet_prealloc_tailroom(&pkt_out, new_packet_size);
2452 pkt_out_ptr = &pkt_out;
2453
2454 /* Copy L2 and L3 headers from pkt_in. */
2455 dp_packet_put(&pkt_out, dp_packet_pull(pkt_in, pkt_in->l4_ofs),
2456 pkt_in->l4_ofs);
2457
2458 pkt_out.l2_5_ofs = pkt_in->l2_5_ofs;
2459 pkt_out.l2_pad_size = pkt_in->l2_pad_size;
2460 pkt_out.l3_ofs = pkt_in->l3_ofs;
2461 pkt_out.l4_ofs = pkt_in->l4_ofs;
2462
2463 /* Copy the ICMPv6 Router Advertisement data from 'userdata' field. */
2464 dp_packet_put(&pkt_out, userdata->data, userdata->size);
2465
2466 /* Set the IPv6 payload length and calculate the ICMPv6 checksum. */
2467 struct ovs_16aligned_ip6_hdr *nh = dp_packet_l3(&pkt_out);
2468 nh->ip6_plen = htons(userdata->size);
2469 struct ovs_ra_msg *ra = dp_packet_l4(&pkt_out);
2470 ra->icmph.icmp6_cksum = 0;
2471 uint32_t icmp_csum = packet_csum_pseudoheader6(nh);
2472 ra->icmph.icmp6_cksum = csum_finish(csum_continue(
2473 icmp_csum, ra, userdata->size));
2474 pin->packet = dp_packet_data(&pkt_out);
2475 pin->packet_len = dp_packet_size(&pkt_out);
2476 success = 1;
2477
2478 exit:
2479 if (!ofperr) {
2480 union mf_subvalue sv;
2481 sv.u8_val = success;
2482 mf_write_subfield(&dst, &sv, &pin->flow_metadata);
2483 }
2484 queue_msg(ofputil_encode_resume(pin, continuation, proto));
2485 dp_packet_uninit(pkt_out_ptr);
2486 }