]> git.proxmox.com Git - mirror_ovs.git/blob - ovn/controller/pinctrl.c
OVN: update RA next_announce according to {min, max}_interval
[mirror_ovs.git] / ovn / controller / pinctrl.c
1 /* Copyright (c) 2015, 2016, 2017 Red Hat, Inc.
2 * Copyright (c) 2017 Nicira, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <config.h>
18
19 #include "pinctrl.h"
20
21 #include "coverage.h"
22 #include "csum.h"
23 #include "dirs.h"
24 #include "dp-packet.h"
25 #include "flow.h"
26 #include "gchassis.h"
27 #include "lport.h"
28 #include "nx-match.h"
29 #include "ovn-controller.h"
30 #include "lib/packets.h"
31 #include "lib/sset.h"
32 #include "openvswitch/ofp-actions.h"
33 #include "openvswitch/ofp-msgs.h"
34 #include "openvswitch/ofp-packet.h"
35 #include "openvswitch/ofp-print.h"
36 #include "openvswitch/ofp-switch.h"
37 #include "openvswitch/ofp-util.h"
38 #include "openvswitch/vlog.h"
39
40 #include "lib/dhcp.h"
41 #include "ovn-controller.h"
42 #include "ovn/actions.h"
43 #include "ovn/lex.h"
44 #include "ovn/lib/acl-log.h"
45 #include "ovn/lib/logical-fields.h"
46 #include "ovn/lib/ovn-l7.h"
47 #include "ovn/lib/ovn-util.h"
48 #include "openvswitch/poll-loop.h"
49 #include "openvswitch/rconn.h"
50 #include "socket-util.h"
51 #include "timeval.h"
52 #include "vswitch-idl.h"
53 #include "lflow.h"
54
55 VLOG_DEFINE_THIS_MODULE(pinctrl);
56
57 /* OpenFlow connection to the switch. */
58 static struct rconn *swconn;
59
60 /* Last seen sequence number for 'swconn'. When this differs from
61 * rconn_get_connection_seqno(rconn), 'swconn' has reconnected. */
62 static unsigned int conn_seq_no;
63
64 static void init_buffered_packets_map(void);
65 static void destroy_buffered_packets_map(void);
66
67 static void pinctrl_handle_put_mac_binding(const struct flow *md,
68 const struct flow *headers,
69 bool is_arp);
70 static void init_put_mac_bindings(void);
71 static void destroy_put_mac_bindings(void);
72 static void run_put_mac_bindings(
73 struct ovsdb_idl_txn *ovnsb_idl_txn,
74 struct ovsdb_idl_index *sbrec_datapath_binding_by_key,
75 struct ovsdb_idl_index *sbrec_port_binding_by_key,
76 struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip);
77 static void wait_put_mac_bindings(struct ovsdb_idl_txn *ovnsb_idl_txn);
78 static void flush_put_mac_bindings(void);
79
80 static void init_send_garps(void);
81 static void destroy_send_garps(void);
82 static void send_garp_wait(void);
83 static void send_garp_run(
84 struct ovsdb_idl_index *sbrec_chassis_by_name,
85 struct ovsdb_idl_index *sbrec_port_binding_by_datapath,
86 struct ovsdb_idl_index *sbrec_port_binding_by_name,
87 const struct ovsrec_bridge *,
88 const struct sbrec_chassis *,
89 const struct hmap *local_datapaths,
90 const struct sset *active_tunnels);
91 static void pinctrl_handle_nd_na(const struct flow *ip_flow,
92 const struct match *md,
93 struct ofpbuf *userdata,
94 bool is_router);
95 static void reload_metadata(struct ofpbuf *ofpacts,
96 const struct match *md);
97 static void pinctrl_handle_put_nd_ra_opts(
98 const struct flow *ip_flow, struct dp_packet *pkt_in,
99 struct ofputil_packet_in *pin, struct ofpbuf *userdata,
100 struct ofpbuf *continuation);
101 static void pinctrl_handle_nd_ns(const struct flow *ip_flow,
102 struct dp_packet *pkt_in,
103 const struct match *md,
104 struct ofpbuf *userdata);
105 static void init_ipv6_ras(void);
106 static void destroy_ipv6_ras(void);
107 static void ipv6_ra_wait(void);
108 static void send_ipv6_ras(
109 struct ovsdb_idl_index *sbrec_port_binding_by_datapath,
110 struct ovsdb_idl_index *sbrec_port_binding_by_name,
111 const struct hmap *local_datapaths);
112 ;
113
114 COVERAGE_DEFINE(pinctrl_drop_put_mac_binding);
115 COVERAGE_DEFINE(pinctrl_drop_buffered_packets_map);
116
117 void
118 pinctrl_init(void)
119 {
120 swconn = rconn_create(5, 0, DSCP_DEFAULT, 1 << OFP13_VERSION);
121 conn_seq_no = 0;
122 init_put_mac_bindings();
123 init_send_garps();
124 init_ipv6_ras();
125 init_buffered_packets_map();
126 }
127
128 static ovs_be32
129 queue_msg(struct ofpbuf *msg)
130 {
131 const struct ofp_header *oh = msg->data;
132 ovs_be32 xid = oh->xid;
133
134 rconn_send(swconn, msg, NULL);
135 return xid;
136 }
137
138 /* Sets up global 'swconn', a newly (re)connected connection to a switch. */
139 static void
140 pinctrl_setup(void)
141 {
142 /* Fetch the switch configuration. The response later will allow us to
143 * change the miss_send_len to UINT16_MAX, so that we can enable
144 * asynchronous messages. */
145 queue_msg(ofpraw_alloc(OFPRAW_OFPT_GET_CONFIG_REQUEST,
146 rconn_get_version(swconn), 0));
147
148 /* Set a packet-in format that supports userdata. */
149 queue_msg(ofputil_encode_set_packet_in_format(rconn_get_version(swconn),
150 OFPUTIL_PACKET_IN_NXT2));
151 }
152
153 static void
154 set_switch_config(struct rconn *swconn_,
155 const struct ofputil_switch_config *config)
156 {
157 enum ofp_version version = rconn_get_version(swconn_);
158 struct ofpbuf *request = ofputil_encode_set_config(config, version);
159 queue_msg(request);
160 }
161
162 static void
163 set_actions_and_enqueue_msg(const struct dp_packet *packet,
164 const struct match *md,
165 struct ofpbuf *userdata)
166 {
167 /* Copy metadata from 'md' into the packet-out via "set_field"
168 * actions, then add actions from 'userdata'.
169 */
170 uint64_t ofpacts_stub[4096 / 8];
171 struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
172 enum ofp_version version = rconn_get_version(swconn);
173
174 reload_metadata(&ofpacts, md);
175 enum ofperr error = ofpacts_pull_openflow_actions(userdata, userdata->size,
176 version, NULL, NULL,
177 &ofpacts);
178 if (error) {
179 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
180 VLOG_WARN_RL(&rl, "failed to parse actions from userdata (%s)",
181 ofperr_to_string(error));
182 ofpbuf_uninit(&ofpacts);
183 return;
184 }
185
186 struct ofputil_packet_out po = {
187 .packet = dp_packet_data(packet),
188 .packet_len = dp_packet_size(packet),
189 .buffer_id = UINT32_MAX,
190 .ofpacts = ofpacts.data,
191 .ofpacts_len = ofpacts.size,
192 };
193 match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
194 enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
195 queue_msg(ofputil_encode_packet_out(&po, proto));
196 ofpbuf_uninit(&ofpacts);
197 }
198
199 struct buffer_info {
200 struct ofpbuf ofpacts;
201 struct dp_packet *p;
202 };
203
204 #define BUFFER_QUEUE_DEPTH 4
205 struct buffered_packets {
206 struct hmap_node hmap_node;
207
208 /* key */
209 struct in6_addr ip;
210
211 long long int timestamp;
212
213 struct buffer_info data[BUFFER_QUEUE_DEPTH];
214 uint32_t head, tail;
215 };
216
217 static struct hmap buffered_packets_map;
218
219 static void
220 init_buffered_packets_map(void)
221 {
222 hmap_init(&buffered_packets_map);
223 }
224
225 static void
226 destroy_buffered_packets(struct buffered_packets *bp)
227 {
228 struct buffer_info *bi;
229
230 while (bp->head != bp->tail) {
231 bi = &bp->data[bp->head];
232 dp_packet_delete(bi->p);
233 ofpbuf_uninit(&bi->ofpacts);
234
235 bp->head = (bp->head + 1) % BUFFER_QUEUE_DEPTH;
236 }
237 hmap_remove(&buffered_packets_map, &bp->hmap_node);
238 free(bp);
239 }
240
241 static void
242 destroy_buffered_packets_map(void)
243 {
244 struct buffered_packets *bp, *next;
245 HMAP_FOR_EACH_SAFE (bp, next, hmap_node, &buffered_packets_map) {
246 destroy_buffered_packets(bp);
247 }
248 hmap_destroy(&buffered_packets_map);
249 }
250
251 static void
252 buffered_push_packet(struct buffered_packets *bp,
253 struct dp_packet *packet,
254 const struct match *md)
255 {
256 uint32_t next = (bp->tail + 1) % BUFFER_QUEUE_DEPTH;
257 struct buffer_info *bi = &bp->data[bp->tail];
258
259 ofpbuf_init(&bi->ofpacts, 4096);
260
261 reload_metadata(&bi->ofpacts, md);
262 struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&bi->ofpacts);
263 resubmit->in_port = OFPP_CONTROLLER;
264 resubmit->table_id = OFTABLE_REMOTE_OUTPUT;
265
266 bi->p = packet;
267
268 if (next == bp->head) {
269 bi = &bp->data[bp->head];
270 dp_packet_delete(bi->p);
271 ofpbuf_uninit(&bi->ofpacts);
272 bp->head = (bp->head + 1) % BUFFER_QUEUE_DEPTH;
273 }
274 bp->tail = next;
275 }
276
277 static void
278 buffered_send_packets(struct buffered_packets *bp, struct eth_addr *addr)
279 {
280 enum ofp_version version = rconn_get_version(swconn);
281 enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
282
283 while (bp->head != bp->tail) {
284 struct buffer_info *bi = &bp->data[bp->head];
285 struct eth_header *eth = dp_packet_data(bi->p);
286
287 eth->eth_dst = *addr;
288 struct ofputil_packet_out po = {
289 .packet = dp_packet_data(bi->p),
290 .packet_len = dp_packet_size(bi->p),
291 .buffer_id = UINT32_MAX,
292 .ofpacts = bi->ofpacts.data,
293 .ofpacts_len = bi->ofpacts.size,
294 };
295 match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
296 queue_msg(ofputil_encode_packet_out(&po, proto));
297
298 ofpbuf_uninit(&bi->ofpacts);
299 dp_packet_delete(bi->p);
300
301 bp->head = (bp->head + 1) % BUFFER_QUEUE_DEPTH;
302 }
303 }
304
305 #define BUFFER_MAP_TIMEOUT 10000
306 static void
307 buffered_packets_map_gc(void)
308 {
309 struct buffered_packets *cur_qp, *next_qp;
310 long long int now = time_msec();
311
312 HMAP_FOR_EACH_SAFE (cur_qp, next_qp, hmap_node, &buffered_packets_map) {
313 if (now > cur_qp->timestamp + BUFFER_MAP_TIMEOUT) {
314 destroy_buffered_packets(cur_qp);
315 }
316 }
317 }
318
319 static struct buffered_packets *
320 pinctrl_find_buffered_packets(const struct in6_addr *ip, uint32_t hash)
321 {
322 struct buffered_packets *qp;
323
324 HMAP_FOR_EACH_WITH_HASH (qp, hmap_node, hash,
325 &buffered_packets_map) {
326 if (IN6_ARE_ADDR_EQUAL(&qp->ip, ip)) {
327 return qp;
328 }
329 }
330 return NULL;
331 }
332
333 static int
334 pinctrl_handle_buffered_packets(const struct flow *ip_flow,
335 struct dp_packet *pkt_in,
336 const struct match *md, bool is_arp)
337 {
338 struct buffered_packets *bp;
339 struct dp_packet *clone;
340 struct in6_addr addr;
341
342 if (is_arp) {
343 addr = in6_addr_mapped_ipv4(ip_flow->nw_dst);
344 } else {
345 addr = ip_flow->ipv6_dst;
346 }
347
348 uint32_t hash = hash_bytes(&addr, sizeof addr, 0);
349 bp = pinctrl_find_buffered_packets(&addr, hash);
350 if (!bp) {
351 if (hmap_count(&buffered_packets_map) >= 1000) {
352 COVERAGE_INC(pinctrl_drop_buffered_packets_map);
353 return -ENOMEM;
354 }
355
356 bp = xmalloc(sizeof *bp);
357 hmap_insert(&buffered_packets_map, &bp->hmap_node, hash);
358 bp->head = bp->tail = 0;
359 bp->ip = addr;
360 }
361 bp->timestamp = time_msec();
362 /* clone the packet to send it later with correct L2 address */
363 clone = dp_packet_clone_data(dp_packet_data(pkt_in),
364 dp_packet_size(pkt_in));
365 buffered_push_packet(bp, clone, md);
366
367 return 0;
368 }
369
370 static void
371 pinctrl_handle_arp(const struct flow *ip_flow, struct dp_packet *pkt_in,
372 const struct match *md, struct ofpbuf *userdata)
373 {
374 /* This action only works for IP packets, and the switch should only send
375 * us IP packets this way, but check here just to be sure. */
376 if (ip_flow->dl_type != htons(ETH_TYPE_IP)) {
377 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
378 VLOG_WARN_RL(&rl, "ARP action on non-IP packet (Ethertype %"PRIx16")",
379 ntohs(ip_flow->dl_type));
380 return;
381 }
382
383 pinctrl_handle_buffered_packets(ip_flow, pkt_in, md, true);
384
385 /* Compose an ARP packet. */
386 uint64_t packet_stub[128 / 8];
387 struct dp_packet packet;
388 dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
389 compose_arp__(&packet);
390
391 struct eth_header *eth = dp_packet_eth(&packet);
392 eth->eth_dst = ip_flow->dl_dst;
393 eth->eth_src = ip_flow->dl_src;
394
395 struct arp_eth_header *arp = dp_packet_l3(&packet);
396 arp->ar_op = htons(ARP_OP_REQUEST);
397 arp->ar_sha = ip_flow->dl_src;
398 put_16aligned_be32(&arp->ar_spa, ip_flow->nw_src);
399 arp->ar_tha = eth_addr_zero;
400 put_16aligned_be32(&arp->ar_tpa, ip_flow->nw_dst);
401
402 if (ip_flow->vlans[0].tci & htons(VLAN_CFI)) {
403 eth_push_vlan(&packet, htons(ETH_TYPE_VLAN_8021Q),
404 ip_flow->vlans[0].tci);
405 }
406
407 set_actions_and_enqueue_msg(&packet, md, userdata);
408 dp_packet_uninit(&packet);
409 }
410
411 static void
412 pinctrl_handle_icmp(const struct flow *ip_flow, struct dp_packet *pkt_in,
413 const struct match *md, struct ofpbuf *userdata)
414 {
415 /* This action only works for IP packets, and the switch should only send
416 * us IP packets this way, but check here just to be sure. */
417 if (ip_flow->dl_type != htons(ETH_TYPE_IP) &&
418 ip_flow->dl_type != htons(ETH_TYPE_IPV6)) {
419 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
420 VLOG_WARN_RL(&rl,
421 "ICMP action on non-IP packet (eth_type 0x%"PRIx16")",
422 ntohs(ip_flow->dl_type));
423 return;
424 }
425
426 uint64_t packet_stub[128 / 8];
427 struct dp_packet packet;
428
429 dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
430 dp_packet_clear(&packet);
431 packet.packet_type = htonl(PT_ETH);
432
433 struct eth_header *eh = dp_packet_put_zeros(&packet, sizeof *eh);
434 eh->eth_dst = ip_flow->dl_dst;
435 eh->eth_src = ip_flow->dl_src;
436
437 if (get_dl_type(ip_flow) == htons(ETH_TYPE_IP)) {
438 struct ip_header *nh = dp_packet_put_zeros(&packet, sizeof *nh);
439
440 eh->eth_type = htons(ETH_TYPE_IP);
441 dp_packet_set_l3(&packet, nh);
442 nh->ip_ihl_ver = IP_IHL_VER(5, 4);
443 nh->ip_tot_len = htons(sizeof(struct ip_header) +
444 sizeof(struct icmp_header));
445 nh->ip_proto = IPPROTO_ICMP;
446 nh->ip_frag_off = htons(IP_DF);
447 packet_set_ipv4(&packet, ip_flow->nw_src, ip_flow->nw_dst,
448 ip_flow->nw_tos, 255);
449
450 struct icmp_header *ih = dp_packet_put_zeros(&packet, sizeof *ih);
451 dp_packet_set_l4(&packet, ih);
452 packet_set_icmp(&packet, ICMP4_DST_UNREACH, 1);
453 } else {
454 struct ip6_hdr *nh = dp_packet_put_zeros(&packet, sizeof *nh);
455 struct icmp6_error_header *ih;
456 uint32_t icmpv6_csum;
457
458 eh->eth_type = htons(ETH_TYPE_IPV6);
459 dp_packet_set_l3(&packet, nh);
460 nh->ip6_vfc = 0x60;
461 nh->ip6_nxt = IPPROTO_ICMPV6;
462 nh->ip6_plen = htons(sizeof(*nh) + ICMP6_ERROR_HEADER_LEN);
463 packet_set_ipv6(&packet, &ip_flow->ipv6_src, &ip_flow->ipv6_dst,
464 ip_flow->nw_tos, ip_flow->ipv6_label, 255);
465
466 ih = dp_packet_put_zeros(&packet, sizeof *ih);
467 dp_packet_set_l4(&packet, ih);
468 ih->icmp6_base.icmp6_type = ICMP6_DST_UNREACH;
469 ih->icmp6_base.icmp6_code = 1;
470 ih->icmp6_base.icmp6_cksum = 0;
471
472 uint8_t *data = dp_packet_put_zeros(&packet, sizeof *nh);
473 memcpy(data, dp_packet_l3(pkt_in), sizeof(*nh));
474
475 icmpv6_csum = packet_csum_pseudoheader6(dp_packet_l3(&packet));
476 ih->icmp6_base.icmp6_cksum = csum_finish(
477 csum_continue(icmpv6_csum, ih,
478 sizeof(*nh) + ICMP6_ERROR_HEADER_LEN));
479 }
480
481 if (ip_flow->vlans[0].tci & htons(VLAN_CFI)) {
482 eth_push_vlan(&packet, htons(ETH_TYPE_VLAN_8021Q),
483 ip_flow->vlans[0].tci);
484 }
485
486 set_actions_and_enqueue_msg(&packet, md, userdata);
487 dp_packet_uninit(&packet);
488 }
489
490 static void
491 pinctrl_handle_tcp_reset(const struct flow *ip_flow, struct dp_packet *pkt_in,
492 const struct match *md, struct ofpbuf *userdata)
493 {
494 /* This action only works for TCP segments, and the switch should only send
495 * us TCP segments this way, but check here just to be sure. */
496 if (ip_flow->nw_proto != IPPROTO_TCP) {
497 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
498 VLOG_WARN_RL(&rl, "TCP_RESET action on non-TCP packet");
499 return;
500 }
501
502 uint64_t packet_stub[128 / 8];
503 struct dp_packet packet;
504
505 dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
506 dp_packet_clear(&packet);
507 packet.packet_type = htonl(PT_ETH);
508
509 struct eth_header *eh = dp_packet_put_zeros(&packet, sizeof *eh);
510 eh->eth_dst = ip_flow->dl_dst;
511 eh->eth_src = ip_flow->dl_src;
512
513 if (get_dl_type(ip_flow) == htons(ETH_TYPE_IPV6)) {
514 struct ip6_hdr *nh = dp_packet_put_zeros(&packet, sizeof *nh);
515
516 eh->eth_type = htons(ETH_TYPE_IPV6);
517 dp_packet_set_l3(&packet, nh);
518 nh->ip6_vfc = 0x60;
519 nh->ip6_nxt = IPPROTO_TCP;
520 nh->ip6_plen = htons(TCP_HEADER_LEN);
521 packet_set_ipv6(&packet, &ip_flow->ipv6_src, &ip_flow->ipv6_dst,
522 ip_flow->nw_tos, ip_flow->ipv6_label, 255);
523 } else {
524 struct ip_header *nh = dp_packet_put_zeros(&packet, sizeof *nh);
525
526 eh->eth_type = htons(ETH_TYPE_IP);
527 dp_packet_set_l3(&packet, nh);
528 nh->ip_ihl_ver = IP_IHL_VER(5, 4);
529 nh->ip_tot_len = htons(IP_HEADER_LEN + TCP_HEADER_LEN);
530 nh->ip_proto = IPPROTO_TCP;
531 nh->ip_frag_off = htons(IP_DF);
532 packet_set_ipv4(&packet, ip_flow->nw_src, ip_flow->nw_dst,
533 ip_flow->nw_tos, 255);
534 }
535
536 struct tcp_header *th = dp_packet_put_zeros(&packet, sizeof *th);
537 struct tcp_header *tcp_in = dp_packet_l4(pkt_in);
538 dp_packet_set_l4(&packet, th);
539 th->tcp_ctl = TCP_CTL(TCP_RST, 5);
540 if (ip_flow->tcp_flags & htons(TCP_ACK)) {
541 th->tcp_seq = tcp_in->tcp_ack;
542 } else {
543 uint32_t tcp_seq, ack_seq, tcp_len;
544
545 tcp_seq = ntohl(get_16aligned_be32(&tcp_in->tcp_seq));
546 tcp_len = TCP_OFFSET(tcp_in->tcp_ctl) * 4;
547 ack_seq = tcp_seq + dp_packet_l4_size(pkt_in) - tcp_len;
548 put_16aligned_be32(&th->tcp_ack, htonl(ack_seq));
549 put_16aligned_be32(&th->tcp_seq, 0);
550 }
551 packet_set_tcp_port(&packet, ip_flow->tp_dst, ip_flow->tp_src);
552
553 if (ip_flow->vlans[0].tci & htons(VLAN_CFI)) {
554 eth_push_vlan(&packet, htons(ETH_TYPE_VLAN_8021Q),
555 ip_flow->vlans[0].tci);
556 }
557
558 set_actions_and_enqueue_msg(&packet, md, userdata);
559 dp_packet_uninit(&packet);
560 }
561
562 static void
563 pinctrl_handle_put_dhcp_opts(
564 struct dp_packet *pkt_in, struct ofputil_packet_in *pin,
565 struct ofpbuf *userdata, struct ofpbuf *continuation)
566 {
567 enum ofp_version version = rconn_get_version(swconn);
568 enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
569 struct dp_packet *pkt_out_ptr = NULL;
570 uint32_t success = 0;
571
572 /* Parse result field. */
573 const struct mf_field *f;
574 enum ofperr ofperr = nx_pull_header(userdata, NULL, &f, NULL);
575 if (ofperr) {
576 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
577 VLOG_WARN_RL(&rl, "bad result OXM (%s)", ofperr_to_string(ofperr));
578 goto exit;
579 }
580
581 /* Parse result offset and offer IP. */
582 ovs_be32 *ofsp = ofpbuf_try_pull(userdata, sizeof *ofsp);
583 ovs_be32 *offer_ip = ofpbuf_try_pull(userdata, sizeof *offer_ip);
584 if (!ofsp || !offer_ip) {
585 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
586 VLOG_WARN_RL(&rl, "offset or offer_ip not present in the userdata");
587 goto exit;
588 }
589
590 /* Check that the result is valid and writable. */
591 struct mf_subfield dst = { .field = f, .ofs = ntohl(*ofsp), .n_bits = 1 };
592 ofperr = mf_check_dst(&dst, NULL);
593 if (ofperr) {
594 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
595 VLOG_WARN_RL(&rl, "bad result bit (%s)", ofperr_to_string(ofperr));
596 goto exit;
597 }
598
599 if (!userdata->size) {
600 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
601 VLOG_WARN_RL(&rl, "DHCP options not present in the userdata");
602 goto exit;
603 }
604
605 /* Validate the DHCP request packet.
606 * Format of the DHCP packet is
607 * ------------------------------------------------------------------------
608 *| UDP HEADER | DHCP HEADER | 4 Byte DHCP Cookie | DHCP OPTIONS(var len)|
609 * ------------------------------------------------------------------------
610 */
611
612 const char *end = (char *)dp_packet_l4(pkt_in) + dp_packet_l4_size(pkt_in);
613 const char *in_dhcp_ptr = dp_packet_get_udp_payload(pkt_in);
614 if (!in_dhcp_ptr) {
615 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
616 VLOG_WARN_RL(&rl, "Invalid or incomplete DHCP packet received");
617 goto exit;
618 }
619
620 const struct dhcp_header *in_dhcp_data
621 = (const struct dhcp_header *) in_dhcp_ptr;
622 in_dhcp_ptr += sizeof *in_dhcp_data;
623 if (in_dhcp_ptr > end) {
624 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
625 VLOG_WARN_RL(&rl, "Invalid or incomplete DHCP packet received, "
626 "bad data length");
627 goto exit;
628 }
629 if (in_dhcp_data->op != DHCP_OP_REQUEST) {
630 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
631 VLOG_WARN_RL(&rl, "Invalid opcode in the DHCP packet: %d",
632 in_dhcp_data->op);
633 goto exit;
634 }
635
636 /* DHCP options follow the DHCP header. The first 4 bytes of the DHCP
637 * options is the DHCP magic cookie followed by the actual DHCP options.
638 */
639 ovs_be32 magic_cookie = htonl(DHCP_MAGIC_COOKIE);
640 if (in_dhcp_ptr + sizeof magic_cookie > end ||
641 get_unaligned_be32((const void *) in_dhcp_ptr) != magic_cookie) {
642 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
643 VLOG_WARN_RL(&rl, "DHCP magic cookie not present in the DHCP packet");
644 goto exit;
645 }
646 in_dhcp_ptr += sizeof magic_cookie;
647
648 const uint8_t *in_dhcp_msg_type = NULL;
649 ovs_be32 request_ip = in_dhcp_data->ciaddr;
650 while (in_dhcp_ptr < end) {
651 const struct dhcp_opt_header *in_dhcp_opt =
652 (const struct dhcp_opt_header *)in_dhcp_ptr;
653 if (in_dhcp_opt->code == DHCP_OPT_END) {
654 break;
655 }
656 if (in_dhcp_opt->code == DHCP_OPT_PAD) {
657 in_dhcp_ptr += 1;
658 continue;
659 }
660 in_dhcp_ptr += sizeof *in_dhcp_opt;
661 if (in_dhcp_ptr > end) {
662 break;
663 }
664 in_dhcp_ptr += in_dhcp_opt->len;
665 if (in_dhcp_ptr > end) {
666 break;
667 }
668
669 switch (in_dhcp_opt->code) {
670 case DHCP_OPT_MSG_TYPE:
671 if (in_dhcp_opt->len == 1) {
672 in_dhcp_msg_type = DHCP_OPT_PAYLOAD(in_dhcp_opt);
673 }
674 break;
675 case DHCP_OPT_REQ_IP:
676 if (in_dhcp_opt->len == 4) {
677 request_ip = get_unaligned_be32(DHCP_OPT_PAYLOAD(in_dhcp_opt));
678 }
679 break;
680 default:
681 break;
682 }
683 }
684
685 /* Check that the DHCP Message Type (opt 53) is present or not with
686 * valid values - DHCP_MSG_DISCOVER or DHCP_MSG_REQUEST.
687 */
688 if (!in_dhcp_msg_type) {
689 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
690 VLOG_WARN_RL(&rl, "Missing DHCP message type");
691 goto exit;
692 }
693 if (*in_dhcp_msg_type != DHCP_MSG_DISCOVER &&
694 *in_dhcp_msg_type != DHCP_MSG_REQUEST) {
695 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
696 VLOG_WARN_RL(&rl, "Invalid DHCP message type: %d", *in_dhcp_msg_type);
697 goto exit;
698 }
699
700 uint8_t msg_type;
701 if (*in_dhcp_msg_type == DHCP_MSG_DISCOVER) {
702 msg_type = DHCP_MSG_OFFER;
703 } else {
704 /* This is a DHCPREQUEST. If the client has requested an IP that
705 * does not match the offered IP address, reply with a NAK. The
706 * requested IP address may be supplied either via Requested IP Address
707 * (opt 50) or via ciaddr, depending on the client's state.
708 */
709 msg_type = DHCP_MSG_ACK;
710 if (request_ip != *offer_ip) {
711 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
712 VLOG_WARN_RL(&rl, "DHCPREQUEST requested IP "IP_FMT" does not "
713 "match offer "IP_FMT, IP_ARGS(request_ip),
714 IP_ARGS(*offer_ip));
715 msg_type = DHCP_MSG_NAK;
716 }
717 }
718
719 /* Frame the DHCP reply packet
720 * Total DHCP options length will be options stored in the userdata +
721 * 16 bytes. Note that the DHCP options stored in userdata are not included
722 * in DHCPNAK messages.
723 *
724 * --------------------------------------------------------------
725 *| 4 Bytes (dhcp cookie) | 3 Bytes (option type) | DHCP options |
726 * --------------------------------------------------------------
727 *| 4 Bytes padding | 1 Byte (option end 0xFF ) | 4 Bytes padding|
728 * --------------------------------------------------------------
729 */
730 uint16_t new_l4_size = UDP_HEADER_LEN + DHCP_HEADER_LEN + 16;
731 if (msg_type != DHCP_MSG_NAK) {
732 new_l4_size += userdata->size;
733 }
734 size_t new_packet_size = pkt_in->l4_ofs + new_l4_size;
735
736 struct dp_packet pkt_out;
737 dp_packet_init(&pkt_out, new_packet_size);
738 dp_packet_clear(&pkt_out);
739 dp_packet_prealloc_tailroom(&pkt_out, new_packet_size);
740 pkt_out_ptr = &pkt_out;
741
742 /* Copy the L2 and L3 headers from the pkt_in as they would remain same*/
743 dp_packet_put(
744 &pkt_out, dp_packet_pull(pkt_in, pkt_in->l4_ofs), pkt_in->l4_ofs);
745
746 pkt_out.l2_5_ofs = pkt_in->l2_5_ofs;
747 pkt_out.l2_pad_size = pkt_in->l2_pad_size;
748 pkt_out.l3_ofs = pkt_in->l3_ofs;
749 pkt_out.l4_ofs = pkt_in->l4_ofs;
750
751 struct udp_header *udp = dp_packet_put(
752 &pkt_out, dp_packet_pull(pkt_in, UDP_HEADER_LEN), UDP_HEADER_LEN);
753
754 struct dhcp_header *dhcp_data = dp_packet_put(
755 &pkt_out, dp_packet_pull(pkt_in, DHCP_HEADER_LEN), DHCP_HEADER_LEN);
756 dhcp_data->op = DHCP_OP_REPLY;
757 dhcp_data->yiaddr = (msg_type == DHCP_MSG_NAK) ? 0 : *offer_ip;
758 dp_packet_put(&pkt_out, &magic_cookie, sizeof(ovs_be32));
759
760 uint16_t out_dhcp_opts_size = 12;
761 if (msg_type != DHCP_MSG_NAK) {
762 out_dhcp_opts_size += userdata->size;
763 }
764 uint8_t *out_dhcp_opts = dp_packet_put_zeros(&pkt_out,
765 out_dhcp_opts_size);
766 /* DHCP option - type */
767 out_dhcp_opts[0] = DHCP_OPT_MSG_TYPE;
768 out_dhcp_opts[1] = 1;
769 out_dhcp_opts[2] = msg_type;
770 out_dhcp_opts += 3;
771
772 if (msg_type != DHCP_MSG_NAK) {
773 memcpy(out_dhcp_opts, userdata->data, userdata->size);
774 out_dhcp_opts += userdata->size;
775 }
776
777 /* Padding */
778 out_dhcp_opts += 4;
779 /* End */
780 out_dhcp_opts[0] = DHCP_OPT_END;
781
782 udp->udp_len = htons(new_l4_size);
783
784 struct ip_header *out_ip = dp_packet_l3(&pkt_out);
785 out_ip->ip_tot_len = htons(pkt_out.l4_ofs - pkt_out.l3_ofs + new_l4_size);
786 udp->udp_csum = 0;
787 /* Checksum needs to be initialized to zero. */
788 out_ip->ip_csum = 0;
789 out_ip->ip_csum = csum(out_ip, sizeof *out_ip);
790
791 pin->packet = dp_packet_data(&pkt_out);
792 pin->packet_len = dp_packet_size(&pkt_out);
793
794 /* Log the response. */
795 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(20, 40);
796 const struct eth_header *l2 = dp_packet_eth(&pkt_out);
797 VLOG_INFO_RL(&rl, "DHCP%s "ETH_ADDR_FMT" "IP_FMT"",
798 msg_type == DHCP_MSG_OFFER ? "OFFER" :
799 (msg_type == DHCP_MSG_ACK ? "ACK": "NAK"),
800 ETH_ADDR_ARGS(l2->eth_src), IP_ARGS(*offer_ip));
801
802 success = 1;
803 exit:
804 if (!ofperr) {
805 union mf_subvalue sv;
806 sv.u8_val = success;
807 mf_write_subfield(&dst, &sv, &pin->flow_metadata);
808 }
809 queue_msg(ofputil_encode_resume(pin, continuation, proto));
810 if (pkt_out_ptr) {
811 dp_packet_uninit(pkt_out_ptr);
812 }
813 }
814
815 static bool
816 compose_out_dhcpv6_opts(struct ofpbuf *userdata,
817 struct ofpbuf *out_dhcpv6_opts, ovs_be32 iaid)
818 {
819 while (userdata->size) {
820 struct dhcp_opt6_header *userdata_opt = ofpbuf_try_pull(
821 userdata, sizeof *userdata_opt);
822 if (!userdata_opt) {
823 return false;
824 }
825
826 size_t size = ntohs(userdata_opt->size);
827 uint8_t *userdata_opt_data = ofpbuf_try_pull(userdata, size);
828 if (!userdata_opt_data) {
829 return false;
830 }
831
832 switch (ntohs(userdata_opt->opt_code)) {
833 case DHCPV6_OPT_SERVER_ID_CODE:
834 {
835 /* The Server Identifier option carries a DUID
836 * identifying a server between a client and a server.
837 * See RFC 3315 Sec 9 and Sec 22.3.
838 *
839 * We use DUID Based on Link-layer Address [DUID-LL].
840 */
841
842 struct dhcpv6_opt_server_id *opt_server_id = ofpbuf_put_zeros(
843 out_dhcpv6_opts, sizeof *opt_server_id);
844
845 opt_server_id->opt.code = htons(DHCPV6_OPT_SERVER_ID_CODE);
846 opt_server_id->opt.len = htons(size + 4);
847 opt_server_id->duid_type = htons(DHCPV6_DUID_LL);
848 opt_server_id->hw_type = htons(DHCPV6_HW_TYPE_ETH);
849 memcpy(&opt_server_id->mac, userdata_opt_data,
850 sizeof(struct eth_addr));
851 break;
852 }
853
854 case DHCPV6_OPT_IA_ADDR_CODE:
855 {
856 if (size != sizeof(struct in6_addr)) {
857 return false;
858 }
859
860 if (!iaid) {
861 /* If iaid is None, it means its an DHCPv6 information request.
862 * Don't put IA_NA option in the response. */
863 break;
864 }
865 /* IA Address option is used to specify IPv6 addresses associated
866 * with an IA_NA or IA_TA. The IA Address option must be
867 * encapsulated in the Options field of an IA_NA or IA_TA option.
868 *
869 * We will encapsulate the IA Address within the IA_NA option.
870 * Please see RFC 3315 section 22.5 and 22.6
871 */
872 struct dhcpv6_opt_ia_na *opt_ia_na = ofpbuf_put_zeros(
873 out_dhcpv6_opts, sizeof *opt_ia_na);
874 opt_ia_na->opt.code = htons(DHCPV6_OPT_IA_NA_CODE);
875 /* IA_NA length (in bytes)-
876 * IAID - 4
877 * T1 - 4
878 * T2 - 4
879 * IA Address - sizeof(struct dhcpv6_opt_ia_addr)
880 */
881 opt_ia_na->opt.len = htons(12 + sizeof(struct dhcpv6_opt_ia_addr));
882 opt_ia_na->iaid = iaid;
883 /* Set the lifetime of the address(es) to infinity */
884 opt_ia_na->t1 = OVS_BE32_MAX;
885 opt_ia_na->t2 = OVS_BE32_MAX;
886
887 struct dhcpv6_opt_ia_addr *opt_ia_addr = ofpbuf_put_zeros(
888 out_dhcpv6_opts, sizeof *opt_ia_addr);
889 opt_ia_addr->opt.code = htons(DHCPV6_OPT_IA_ADDR_CODE);
890 opt_ia_addr->opt.len = htons(size + 8);
891 memcpy(opt_ia_addr->ipv6.s6_addr, userdata_opt_data, size);
892 opt_ia_addr->t1 = OVS_BE32_MAX;
893 opt_ia_addr->t2 = OVS_BE32_MAX;
894 break;
895 }
896
897 case DHCPV6_OPT_DNS_SERVER_CODE:
898 {
899 struct dhcpv6_opt_header *opt_dns = ofpbuf_put_zeros(
900 out_dhcpv6_opts, sizeof *opt_dns);
901 opt_dns->code = htons(DHCPV6_OPT_DNS_SERVER_CODE);
902 opt_dns->len = htons(size);
903 ofpbuf_put(out_dhcpv6_opts, userdata_opt_data, size);
904 break;
905 }
906
907 case DHCPV6_OPT_DOMAIN_SEARCH_CODE:
908 {
909 struct dhcpv6_opt_header *opt_dsl = ofpbuf_put_zeros(
910 out_dhcpv6_opts, sizeof *opt_dsl);
911 opt_dsl->code = htons(DHCPV6_OPT_DOMAIN_SEARCH_CODE);
912 opt_dsl->len = htons(size + 2);
913 uint8_t *data = ofpbuf_put_zeros(out_dhcpv6_opts, size + 2);
914 *data = size;
915 memcpy(data + 1, userdata_opt_data, size);
916 break;
917 }
918
919 default:
920 return false;
921 }
922 }
923 return true;
924 }
925
926 static void
927 pinctrl_handle_put_dhcpv6_opts(
928 struct dp_packet *pkt_in, struct ofputil_packet_in *pin,
929 struct ofpbuf *userdata, struct ofpbuf *continuation OVS_UNUSED)
930 {
931 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
932 enum ofp_version version = rconn_get_version(swconn);
933 enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
934 struct dp_packet *pkt_out_ptr = NULL;
935 uint32_t success = 0;
936
937 /* Parse result field. */
938 const struct mf_field *f;
939 enum ofperr ofperr = nx_pull_header(userdata, NULL, &f, NULL);
940 if (ofperr) {
941 VLOG_WARN_RL(&rl, "bad result OXM (%s)", ofperr_to_string(ofperr));
942 goto exit;
943 }
944
945 /* Parse result offset. */
946 ovs_be32 *ofsp = ofpbuf_try_pull(userdata, sizeof *ofsp);
947 if (!ofsp) {
948 VLOG_WARN_RL(&rl, "offset not present in the userdata");
949 goto exit;
950 }
951
952 /* Check that the result is valid and writable. */
953 struct mf_subfield dst = { .field = f, .ofs = ntohl(*ofsp), .n_bits = 1 };
954 ofperr = mf_check_dst(&dst, NULL);
955 if (ofperr) {
956 VLOG_WARN_RL(&rl, "bad result bit (%s)", ofperr_to_string(ofperr));
957 goto exit;
958 }
959
960 if (!userdata->size) {
961 VLOG_WARN_RL(&rl, "DHCPv6 options not present in the userdata");
962 goto exit;
963 }
964
965 struct udp_header *in_udp = dp_packet_l4(pkt_in);
966 const uint8_t *in_dhcpv6_data = dp_packet_get_udp_payload(pkt_in);
967 if (!in_udp || !in_dhcpv6_data) {
968 VLOG_WARN_RL(&rl, "truncated dhcpv6 packet");
969 goto exit;
970 }
971
972 uint8_t out_dhcpv6_msg_type;
973 uint8_t in_dhcpv6_msg_type = *in_dhcpv6_data;
974 switch (in_dhcpv6_msg_type) {
975 case DHCPV6_MSG_TYPE_SOLICIT:
976 out_dhcpv6_msg_type = DHCPV6_MSG_TYPE_ADVT;
977 break;
978
979 case DHCPV6_MSG_TYPE_REQUEST:
980 case DHCPV6_MSG_TYPE_CONFIRM:
981 case DHCPV6_MSG_TYPE_DECLINE:
982 case DHCPV6_MSG_TYPE_INFO_REQ:
983 out_dhcpv6_msg_type = DHCPV6_MSG_TYPE_REPLY;
984 break;
985
986 default:
987 /* Invalid or unsupported DHCPv6 message type */
988 goto exit;
989 }
990
991 /* Skip 4 bytes (message type (1 byte) + transaction ID (3 bytes). */
992 in_dhcpv6_data += 4;
993 /* We need to extract IAID from the IA-NA option of the client's DHCPv6
994 * solicit/request/confirm packet and copy the same IAID in the Server's
995 * response.
996 * DHCPv6 information packet (for stateless request will not have IA-NA
997 * option. So we don't need to copy that in the Server's response.
998 * */
999 ovs_be32 iaid = 0;
1000 struct dhcpv6_opt_header const *in_opt_client_id = NULL;
1001 size_t udp_len = ntohs(in_udp->udp_len);
1002 size_t l4_len = dp_packet_l4_size(pkt_in);
1003 uint8_t *end = (uint8_t *)in_udp + MIN(udp_len, l4_len);
1004 while (in_dhcpv6_data < end) {
1005 struct dhcpv6_opt_header const *in_opt =
1006 (struct dhcpv6_opt_header *)in_dhcpv6_data;
1007 switch(ntohs(in_opt->code)) {
1008 case DHCPV6_OPT_IA_NA_CODE:
1009 {
1010 struct dhcpv6_opt_ia_na *opt_ia_na = (
1011 struct dhcpv6_opt_ia_na *)in_opt;
1012 iaid = opt_ia_na->iaid;
1013 break;
1014 }
1015
1016 case DHCPV6_OPT_CLIENT_ID_CODE:
1017 in_opt_client_id = in_opt;
1018 break;
1019
1020 default:
1021 break;
1022 }
1023 in_dhcpv6_data += sizeof *in_opt + ntohs(in_opt->len);
1024 }
1025
1026 if (!in_opt_client_id) {
1027 VLOG_WARN_RL(&rl, "DHCPv6 option - Client id not present in the "
1028 "DHCPv6 packet");
1029 goto exit;
1030 }
1031
1032 if (!iaid && in_dhcpv6_msg_type != DHCPV6_MSG_TYPE_INFO_REQ) {
1033 VLOG_WARN_RL(&rl, "DHCPv6 option - IA NA not present in the "
1034 "DHCPv6 packet");
1035 goto exit;
1036 }
1037
1038 uint64_t out_ofpacts_dhcpv6_opts_stub[256 / 8];
1039 struct ofpbuf out_dhcpv6_opts =
1040 OFPBUF_STUB_INITIALIZER(out_ofpacts_dhcpv6_opts_stub);
1041
1042 if (!compose_out_dhcpv6_opts(userdata, &out_dhcpv6_opts, iaid)) {
1043 VLOG_WARN_RL(&rl, "Invalid userdata");
1044 goto exit;
1045 }
1046
1047 uint16_t new_l4_size
1048 = (UDP_HEADER_LEN + 4 + sizeof *in_opt_client_id +
1049 ntohs(in_opt_client_id->len) + out_dhcpv6_opts.size);
1050 size_t new_packet_size = pkt_in->l4_ofs + new_l4_size;
1051
1052 struct dp_packet pkt_out;
1053 dp_packet_init(&pkt_out, new_packet_size);
1054 dp_packet_clear(&pkt_out);
1055 dp_packet_prealloc_tailroom(&pkt_out, new_packet_size);
1056 pkt_out_ptr = &pkt_out;
1057
1058 /* Copy L2 and L3 headers from pkt_in. */
1059 dp_packet_put(&pkt_out, dp_packet_pull(pkt_in, pkt_in->l4_ofs),
1060 pkt_in->l4_ofs);
1061
1062 pkt_out.l2_5_ofs = pkt_in->l2_5_ofs;
1063 pkt_out.l2_pad_size = pkt_in->l2_pad_size;
1064 pkt_out.l3_ofs = pkt_in->l3_ofs;
1065 pkt_out.l4_ofs = pkt_in->l4_ofs;
1066
1067 /* Pull the DHCPv6 message type and transaction id from the pkt_in.
1068 * Need to preserve the transaction id in the DHCPv6 reply packet. */
1069 struct udp_header *out_udp = dp_packet_put(
1070 &pkt_out, dp_packet_pull(pkt_in, UDP_HEADER_LEN), UDP_HEADER_LEN);
1071 uint8_t *out_dhcpv6 = dp_packet_put(&pkt_out, dp_packet_pull(pkt_in, 4), 4);
1072
1073 /* Set the proper DHCPv6 message type. */
1074 *out_dhcpv6 = out_dhcpv6_msg_type;
1075
1076 /* Copy the Client Identifier. */
1077 dp_packet_put(&pkt_out, in_opt_client_id,
1078 sizeof *in_opt_client_id + ntohs(in_opt_client_id->len));
1079
1080 /* Copy the DHCPv6 Options. */
1081 dp_packet_put(&pkt_out, out_dhcpv6_opts.data, out_dhcpv6_opts.size);
1082 out_udp->udp_len = htons(new_l4_size);
1083 out_udp->udp_csum = 0;
1084
1085 struct ovs_16aligned_ip6_hdr *out_ip6 = dp_packet_l3(&pkt_out);
1086 out_ip6->ip6_ctlun.ip6_un1.ip6_un1_plen = out_udp->udp_len;
1087
1088 uint32_t csum;
1089 csum = packet_csum_pseudoheader6(dp_packet_l3(&pkt_out));
1090 csum = csum_continue(csum, out_udp, dp_packet_size(&pkt_out) -
1091 ((const unsigned char *)out_udp -
1092 (const unsigned char *)dp_packet_eth(&pkt_out)));
1093 out_udp->udp_csum = csum_finish(csum);
1094 if (!out_udp->udp_csum) {
1095 out_udp->udp_csum = htons(0xffff);
1096 }
1097
1098 pin->packet = dp_packet_data(&pkt_out);
1099 pin->packet_len = dp_packet_size(&pkt_out);
1100 ofpbuf_uninit(&out_dhcpv6_opts);
1101 success = 1;
1102 exit:
1103 if (!ofperr) {
1104 union mf_subvalue sv;
1105 sv.u8_val = success;
1106 mf_write_subfield(&dst, &sv, &pin->flow_metadata);
1107 }
1108 queue_msg(ofputil_encode_resume(pin, continuation, proto));
1109 dp_packet_uninit(pkt_out_ptr);
1110 }
1111
1112 static void
1113 put_be16(struct ofpbuf *buf, ovs_be16 x)
1114 {
1115 ofpbuf_put(buf, &x, sizeof x);
1116 }
1117
1118 static void
1119 put_be32(struct ofpbuf *buf, ovs_be32 x)
1120 {
1121 ofpbuf_put(buf, &x, sizeof x);
1122 }
1123
1124 static void
1125 pinctrl_handle_dns_lookup(
1126 const struct sbrec_dns_table *dns_table,
1127 struct dp_packet *pkt_in, struct ofputil_packet_in *pin,
1128 struct ofpbuf *userdata, struct ofpbuf *continuation)
1129 {
1130 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1131 enum ofp_version version = rconn_get_version(swconn);
1132 enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
1133 struct dp_packet *pkt_out_ptr = NULL;
1134 uint32_t success = 0;
1135
1136 /* Parse result field. */
1137 const struct mf_field *f;
1138 enum ofperr ofperr = nx_pull_header(userdata, NULL, &f, NULL);
1139 if (ofperr) {
1140 VLOG_WARN_RL(&rl, "bad result OXM (%s)", ofperr_to_string(ofperr));
1141 goto exit;
1142 }
1143
1144 /* Parse result offset. */
1145 ovs_be32 *ofsp = ofpbuf_try_pull(userdata, sizeof *ofsp);
1146 if (!ofsp) {
1147 VLOG_WARN_RL(&rl, "offset not present in the userdata");
1148 goto exit;
1149 }
1150
1151 /* Check that the result is valid and writable. */
1152 struct mf_subfield dst = { .field = f, .ofs = ntohl(*ofsp), .n_bits = 1 };
1153 ofperr = mf_check_dst(&dst, NULL);
1154 if (ofperr) {
1155 VLOG_WARN_RL(&rl, "bad result bit (%s)", ofperr_to_string(ofperr));
1156 goto exit;
1157 }
1158
1159 /* Extract the DNS header */
1160 struct dns_header const *in_dns_header = dp_packet_get_udp_payload(pkt_in);
1161 if (!in_dns_header) {
1162 VLOG_WARN_RL(&rl, "truncated dns packet");
1163 goto exit;
1164 }
1165
1166 /* Check if it is DNS request or not */
1167 if (in_dns_header->lo_flag & 0x80) {
1168 /* It's a DNS response packet which we are not interested in */
1169 goto exit;
1170 }
1171
1172 /* Check if at least one query request is present */
1173 if (!in_dns_header->qdcount) {
1174 goto exit;
1175 }
1176
1177 struct udp_header *in_udp = dp_packet_l4(pkt_in);
1178 size_t udp_len = ntohs(in_udp->udp_len);
1179 size_t l4_len = dp_packet_l4_size(pkt_in);
1180 uint8_t *end = (uint8_t *)in_udp + MIN(udp_len, l4_len);
1181 uint8_t *in_dns_data = (uint8_t *)(in_dns_header + 1);
1182 uint8_t *in_queryname = in_dns_data;
1183 uint8_t idx = 0;
1184 struct ds query_name;
1185 ds_init(&query_name);
1186 /* Extract the query_name. If the query name is - 'www.ovn.org' it would be
1187 * encoded as (in hex) - 03 77 77 77 03 6f 76 63 03 6f 72 67 00.
1188 */
1189 while ((in_dns_data + idx) < end && in_dns_data[idx]) {
1190 uint8_t label_len = in_dns_data[idx++];
1191 if (in_dns_data + idx + label_len > end) {
1192 ds_destroy(&query_name);
1193 goto exit;
1194 }
1195 ds_put_buffer(&query_name, (const char *) in_dns_data + idx, label_len);
1196 idx += label_len;
1197 ds_put_char(&query_name, '.');
1198 }
1199
1200 idx++;
1201 ds_chomp(&query_name, '.');
1202 in_dns_data += idx;
1203
1204 /* Query should have TYPE and CLASS fields */
1205 if (in_dns_data + (2 * sizeof(ovs_be16)) > end) {
1206 ds_destroy(&query_name);
1207 goto exit;
1208 }
1209
1210 uint16_t query_type = ntohs(*ALIGNED_CAST(const ovs_be16 *, in_dns_data));
1211 /* Supported query types - A, AAAA and ANY */
1212 if (!(query_type == DNS_QUERY_TYPE_A || query_type == DNS_QUERY_TYPE_AAAA
1213 || query_type == DNS_QUERY_TYPE_ANY)) {
1214 ds_destroy(&query_name);
1215 goto exit;
1216 }
1217
1218 uint64_t dp_key = ntohll(pin->flow_metadata.flow.metadata);
1219 const struct sbrec_dns *sbrec_dns;
1220 const char *answer_ips = NULL;
1221 SBREC_DNS_TABLE_FOR_EACH (sbrec_dns, dns_table) {
1222 for (size_t i = 0; i < sbrec_dns->n_datapaths; i++) {
1223 if (sbrec_dns->datapaths[i]->tunnel_key == dp_key) {
1224 answer_ips = smap_get(&sbrec_dns->records,
1225 ds_cstr(&query_name));
1226 if (answer_ips) {
1227 break;
1228 }
1229 }
1230 }
1231
1232 if (answer_ips) {
1233 break;
1234 }
1235 }
1236
1237 ds_destroy(&query_name);
1238 if (!answer_ips) {
1239 goto exit;
1240 }
1241
1242 struct lport_addresses ip_addrs;
1243 if (!extract_ip_addresses(answer_ips, &ip_addrs)) {
1244 goto exit;
1245 }
1246
1247 uint16_t ancount = 0;
1248 uint64_t dns_ans_stub[128 / 8];
1249 struct ofpbuf dns_answer = OFPBUF_STUB_INITIALIZER(dns_ans_stub);
1250
1251 if (query_type == DNS_QUERY_TYPE_A || query_type == DNS_QUERY_TYPE_ANY) {
1252 for (size_t i = 0; i < ip_addrs.n_ipv4_addrs; i++) {
1253 /* Copy the answer section */
1254 /* Format of the answer section is
1255 * - NAME -> The domain name
1256 * - TYPE -> 2 octets containing one of the RR type codes
1257 * - CLASS -> 2 octets which specify the class of the data
1258 * in the RDATA field.
1259 * - TTL -> 32 bit unsigned int specifying the time
1260 * interval (in secs) that the resource record
1261 * may be cached before it should be discarded.
1262 * - RDLENGTH -> 16 bit integer specifying the length of the
1263 * RDATA field.
1264 * - RDATA -> a variable length string of octets that
1265 * describes the resource. In our case it will
1266 * be IP address of the domain name.
1267 */
1268 ofpbuf_put(&dns_answer, in_queryname, idx);
1269 put_be16(&dns_answer, htons(DNS_QUERY_TYPE_A));
1270 put_be16(&dns_answer, htons(DNS_CLASS_IN));
1271 put_be32(&dns_answer, htonl(DNS_DEFAULT_RR_TTL));
1272 put_be16(&dns_answer, htons(sizeof(ovs_be32)));
1273 put_be32(&dns_answer, ip_addrs.ipv4_addrs[i].addr);
1274 ancount++;
1275 }
1276 }
1277
1278 if (query_type == DNS_QUERY_TYPE_AAAA ||
1279 query_type == DNS_QUERY_TYPE_ANY) {
1280 for (size_t i = 0; i < ip_addrs.n_ipv6_addrs; i++) {
1281 ofpbuf_put(&dns_answer, in_queryname, idx);
1282 put_be16(&dns_answer, htons(DNS_QUERY_TYPE_AAAA));
1283 put_be16(&dns_answer, htons(DNS_CLASS_IN));
1284 put_be32(&dns_answer, htonl(DNS_DEFAULT_RR_TTL));
1285 const struct in6_addr *ip6 = &ip_addrs.ipv6_addrs[i].addr;
1286 put_be16(&dns_answer, htons(sizeof *ip6));
1287 ofpbuf_put(&dns_answer, ip6, sizeof *ip6);
1288 ancount++;
1289 }
1290 }
1291
1292 destroy_lport_addresses(&ip_addrs);
1293
1294 if (!ancount) {
1295 ofpbuf_uninit(&dns_answer);
1296 goto exit;
1297 }
1298
1299 uint16_t new_l4_size = ntohs(in_udp->udp_len) + dns_answer.size;
1300 size_t new_packet_size = pkt_in->l4_ofs + new_l4_size;
1301 struct dp_packet pkt_out;
1302 dp_packet_init(&pkt_out, new_packet_size);
1303 dp_packet_clear(&pkt_out);
1304 dp_packet_prealloc_tailroom(&pkt_out, new_packet_size);
1305 pkt_out_ptr = &pkt_out;
1306
1307 /* Copy the L2 and L3 headers from the pkt_in as they would remain same.*/
1308 dp_packet_put(
1309 &pkt_out, dp_packet_pull(pkt_in, pkt_in->l4_ofs), pkt_in->l4_ofs);
1310
1311 pkt_out.l2_5_ofs = pkt_in->l2_5_ofs;
1312 pkt_out.l2_pad_size = pkt_in->l2_pad_size;
1313 pkt_out.l3_ofs = pkt_in->l3_ofs;
1314 pkt_out.l4_ofs = pkt_in->l4_ofs;
1315
1316 struct udp_header *out_udp = dp_packet_put(
1317 &pkt_out, dp_packet_pull(pkt_in, UDP_HEADER_LEN), UDP_HEADER_LEN);
1318
1319 /* Copy the DNS header. */
1320 struct dns_header *out_dns_header = dp_packet_put(
1321 &pkt_out, dp_packet_pull(pkt_in, sizeof *out_dns_header),
1322 sizeof *out_dns_header);
1323
1324 /* Set the response bit to 1 in the flags. */
1325 out_dns_header->lo_flag |= 0x80;
1326
1327 /* Set the answer RR. */
1328 out_dns_header->ancount = htons(ancount);
1329
1330 /* Copy the Query section. */
1331 dp_packet_put(&pkt_out, dp_packet_data(pkt_in), dp_packet_size(pkt_in));
1332
1333 /* Copy the answer sections. */
1334 dp_packet_put(&pkt_out, dns_answer.data, dns_answer.size);
1335 ofpbuf_uninit(&dns_answer);
1336
1337 out_udp->udp_len = htons(new_l4_size);
1338 out_udp->udp_csum = 0;
1339
1340 struct eth_header *eth = dp_packet_data(&pkt_out);
1341 if (eth->eth_type == htons(ETH_TYPE_IP)) {
1342 struct ip_header *out_ip = dp_packet_l3(&pkt_out);
1343 out_ip->ip_tot_len = htons(pkt_out.l4_ofs - pkt_out.l3_ofs
1344 + new_l4_size);
1345 /* Checksum needs to be initialized to zero. */
1346 out_ip->ip_csum = 0;
1347 out_ip->ip_csum = csum(out_ip, sizeof *out_ip);
1348 } else {
1349 struct ovs_16aligned_ip6_hdr *nh = dp_packet_l3(&pkt_out);
1350 nh->ip6_plen = htons(new_l4_size);
1351
1352 /* IPv6 needs UDP checksum calculated */
1353 uint32_t csum;
1354 csum = packet_csum_pseudoheader6(nh);
1355 csum = csum_continue(csum, out_udp, dp_packet_size(&pkt_out) -
1356 ((const unsigned char *)out_udp -
1357 (const unsigned char *)eth));
1358 out_udp->udp_csum = csum_finish(csum);
1359 if (!out_udp->udp_csum) {
1360 out_udp->udp_csum = htons(0xffff);
1361 }
1362 }
1363
1364 pin->packet = dp_packet_data(&pkt_out);
1365 pin->packet_len = dp_packet_size(&pkt_out);
1366
1367 success = 1;
1368 exit:
1369 if (!ofperr) {
1370 union mf_subvalue sv;
1371 sv.u8_val = success;
1372 mf_write_subfield(&dst, &sv, &pin->flow_metadata);
1373 }
1374 queue_msg(ofputil_encode_resume(pin, continuation, proto));
1375 dp_packet_uninit(pkt_out_ptr);
1376 }
1377
1378 static void
1379 process_packet_in(const struct ofp_header *msg,
1380 const struct sbrec_dns_table *dns_table)
1381 {
1382 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1383
1384 struct ofputil_packet_in pin;
1385 struct ofpbuf continuation;
1386 enum ofperr error = ofputil_decode_packet_in(msg, true, NULL, NULL, &pin,
1387 NULL, NULL, &continuation);
1388
1389 if (error) {
1390 VLOG_WARN_RL(&rl, "error decoding packet-in: %s",
1391 ofperr_to_string(error));
1392 return;
1393 }
1394 if (pin.reason != OFPR_ACTION) {
1395 return;
1396 }
1397
1398 struct ofpbuf userdata = ofpbuf_const_initializer(pin.userdata,
1399 pin.userdata_len);
1400 const struct action_header *ah = ofpbuf_pull(&userdata, sizeof *ah);
1401 if (!ah) {
1402 VLOG_WARN_RL(&rl, "packet-in userdata lacks action header");
1403 return;
1404 }
1405
1406 struct dp_packet packet;
1407 dp_packet_use_const(&packet, pin.packet, pin.packet_len);
1408 struct flow headers;
1409 flow_extract(&packet, &headers);
1410
1411 switch (ntohl(ah->opcode)) {
1412 case ACTION_OPCODE_ARP:
1413 pinctrl_handle_arp(&headers, &packet, &pin.flow_metadata, &userdata);
1414 break;
1415
1416 case ACTION_OPCODE_PUT_ARP:
1417 pinctrl_handle_put_mac_binding(&pin.flow_metadata.flow, &headers,
1418 true);
1419 break;
1420
1421 case ACTION_OPCODE_PUT_DHCP_OPTS:
1422 pinctrl_handle_put_dhcp_opts(&packet, &pin, &userdata, &continuation);
1423 break;
1424
1425 case ACTION_OPCODE_ND_NA:
1426 pinctrl_handle_nd_na(&headers, &pin.flow_metadata, &userdata, false);
1427 break;
1428
1429 case ACTION_OPCODE_ND_NA_ROUTER:
1430 pinctrl_handle_nd_na(&headers, &pin.flow_metadata, &userdata, true);
1431 break;
1432
1433 case ACTION_OPCODE_PUT_ND:
1434 pinctrl_handle_put_mac_binding(&pin.flow_metadata.flow, &headers,
1435 false);
1436 break;
1437
1438 case ACTION_OPCODE_PUT_DHCPV6_OPTS:
1439 pinctrl_handle_put_dhcpv6_opts(&packet, &pin, &userdata,
1440 &continuation);
1441 break;
1442
1443 case ACTION_OPCODE_DNS_LOOKUP:
1444 pinctrl_handle_dns_lookup(dns_table,
1445 &packet, &pin, &userdata, &continuation);
1446 break;
1447
1448 case ACTION_OPCODE_LOG:
1449 handle_acl_log(&headers, &userdata);
1450 break;
1451
1452 case ACTION_OPCODE_PUT_ND_RA_OPTS:
1453 pinctrl_handle_put_nd_ra_opts(&headers, &packet, &pin, &userdata,
1454 &continuation);
1455 break;
1456
1457 case ACTION_OPCODE_ND_NS:
1458 pinctrl_handle_nd_ns(&headers, &packet, &pin.flow_metadata,
1459 &userdata);
1460 break;
1461
1462 case ACTION_OPCODE_ICMP:
1463 pinctrl_handle_icmp(&headers, &packet, &pin.flow_metadata,
1464 &userdata);
1465 break;
1466
1467 case ACTION_OPCODE_TCP_RESET:
1468 pinctrl_handle_tcp_reset(&headers, &packet, &pin.flow_metadata,
1469 &userdata);
1470 break;
1471
1472 default:
1473 VLOG_WARN_RL(&rl, "unrecognized packet-in opcode %"PRIu32,
1474 ntohl(ah->opcode));
1475 break;
1476 }
1477 }
1478
1479 static void
1480 pinctrl_recv(const struct sbrec_dns_table *dns_table,
1481 const struct ofp_header *oh, enum ofptype type)
1482 {
1483 if (type == OFPTYPE_ECHO_REQUEST) {
1484 queue_msg(ofputil_encode_echo_reply(oh));
1485 } else if (type == OFPTYPE_GET_CONFIG_REPLY) {
1486 /* Enable asynchronous messages */
1487 struct ofputil_switch_config config;
1488
1489 ofputil_decode_get_config_reply(oh, &config);
1490 config.miss_send_len = UINT16_MAX;
1491 set_switch_config(swconn, &config);
1492 } else if (type == OFPTYPE_PACKET_IN) {
1493 process_packet_in(oh, dns_table);
1494 } else {
1495 if (VLOG_IS_DBG_ENABLED()) {
1496 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30, 300);
1497
1498 char *s = ofp_to_string(oh, ntohs(oh->length), NULL, NULL, 2);
1499
1500 VLOG_DBG_RL(&rl, "OpenFlow packet ignored: %s", s);
1501 free(s);
1502 }
1503 }
1504 }
1505
1506 void
1507 pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
1508 struct ovsdb_idl_index *sbrec_chassis_by_name,
1509 struct ovsdb_idl_index *sbrec_datapath_binding_by_key,
1510 struct ovsdb_idl_index *sbrec_port_binding_by_datapath,
1511 struct ovsdb_idl_index *sbrec_port_binding_by_key,
1512 struct ovsdb_idl_index *sbrec_port_binding_by_name,
1513 struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
1514 const struct sbrec_dns_table *dns_table,
1515 const struct ovsrec_bridge *br_int,
1516 const struct sbrec_chassis *chassis,
1517 const struct hmap *local_datapaths,
1518 const struct sset *active_tunnels)
1519 {
1520 char *target = xasprintf("unix:%s/%s.mgmt", ovs_rundir(), br_int->name);
1521 if (strcmp(target, rconn_get_target(swconn))) {
1522 VLOG_INFO("%s: connecting to switch", target);
1523 rconn_connect(swconn, target, target);
1524 }
1525 free(target);
1526
1527 rconn_run(swconn);
1528
1529 if (!rconn_is_connected(swconn)) {
1530 return;
1531 }
1532
1533 if (conn_seq_no != rconn_get_connection_seqno(swconn)) {
1534 pinctrl_setup();
1535 conn_seq_no = rconn_get_connection_seqno(swconn);
1536 flush_put_mac_bindings();
1537 }
1538
1539 /* Process a limited number of messages per call. */
1540 for (int i = 0; i < 50; i++) {
1541 struct ofpbuf *msg = rconn_recv(swconn);
1542 if (!msg) {
1543 break;
1544 }
1545
1546 const struct ofp_header *oh = msg->data;
1547 enum ofptype type;
1548
1549 ofptype_decode(&type, oh);
1550 pinctrl_recv(dns_table, oh, type);
1551 ofpbuf_delete(msg);
1552 }
1553
1554 run_put_mac_bindings(ovnsb_idl_txn, sbrec_datapath_binding_by_key,
1555 sbrec_port_binding_by_key,
1556 sbrec_mac_binding_by_lport_ip);
1557 send_garp_run(sbrec_chassis_by_name, sbrec_port_binding_by_datapath,
1558 sbrec_port_binding_by_name, br_int, chassis,
1559 local_datapaths, active_tunnels);
1560 send_ipv6_ras(sbrec_port_binding_by_datapath,
1561 sbrec_port_binding_by_name, local_datapaths);
1562 buffered_packets_map_gc();
1563 }
1564
1565 /* Table of ipv6_ra_state structures, keyed on logical port name */
1566 static struct shash ipv6_ras;
1567
1568 /* Next IPV6 RA in seconds. */
1569 static long long int send_ipv6_ra_time;
1570
1571 struct ipv6_ra_config {
1572 time_t min_interval;
1573 time_t max_interval;
1574 struct eth_addr eth_src;
1575 struct eth_addr eth_dst;
1576 struct in6_addr ipv6_src;
1577 struct in6_addr ipv6_dst;
1578 int32_t mtu;
1579 uint8_t mo_flags; /* Managed/Other flags for RAs */
1580 uint8_t la_flags; /* On-link/autonomous flags for address prefixes */
1581 struct lport_addresses prefixes;
1582 };
1583
1584 struct ipv6_ra_state {
1585 long long int next_announce;
1586 struct ipv6_ra_config *config;
1587 int64_t port_key;
1588 int64_t metadata;
1589 bool delete_me;
1590 };
1591
1592 static void
1593 init_ipv6_ras(void)
1594 {
1595 shash_init(&ipv6_ras);
1596 send_ipv6_ra_time = LLONG_MAX;
1597 }
1598
1599 static void
1600 ipv6_ra_config_delete(struct ipv6_ra_config *config)
1601 {
1602 if (config) {
1603 destroy_lport_addresses(&config->prefixes);
1604 free(config);
1605 }
1606 }
1607
1608 static void
1609 ipv6_ra_delete(struct ipv6_ra_state *ra)
1610 {
1611 if (ra) {
1612 ipv6_ra_config_delete(ra->config);
1613 free(ra);
1614 }
1615 }
1616
1617 static void
1618 destroy_ipv6_ras(void)
1619 {
1620 struct shash_node *iter, *next;
1621 SHASH_FOR_EACH_SAFE (iter, next, &ipv6_ras) {
1622 struct ipv6_ra_state *ra = iter->data;
1623 ipv6_ra_delete(ra);
1624 shash_delete(&ipv6_ras, iter);
1625 }
1626 shash_destroy(&ipv6_ras);
1627 }
1628
1629 static struct ipv6_ra_config *
1630 ipv6_ra_update_config(const struct sbrec_port_binding *pb)
1631 {
1632 struct ipv6_ra_config *config;
1633
1634 config = xzalloc(sizeof *config);
1635
1636 config->max_interval = smap_get_int(&pb->options, "ipv6_ra_max_interval",
1637 ND_RA_MAX_INTERVAL_DEFAULT);
1638 config->min_interval = smap_get_int(&pb->options, "ipv6_ra_min_interval",
1639 nd_ra_min_interval_default(config->max_interval));
1640 config->mtu = smap_get_int(&pb->options, "ipv6_ra_mtu", ND_MTU_DEFAULT);
1641 config->la_flags = ND_PREFIX_ON_LINK;
1642
1643 const char *address_mode = smap_get(&pb->options, "ipv6_ra_address_mode");
1644 if (!address_mode) {
1645 VLOG_WARN("No address mode specified");
1646 goto fail;
1647 }
1648 if (!strcmp(address_mode, "dhcpv6_stateless")) {
1649 config->mo_flags = IPV6_ND_RA_FLAG_OTHER_ADDR_CONFIG;
1650 } else if (!strcmp(address_mode, "dhcpv6_stateful")) {
1651 config->mo_flags = IPV6_ND_RA_FLAG_MANAGED_ADDR_CONFIG;
1652 } else if (!strcmp(address_mode, "slaac")) {
1653 config->la_flags |= ND_PREFIX_AUTONOMOUS_ADDRESS;
1654 } else {
1655 VLOG_WARN("Invalid address mode %s", address_mode);
1656 goto fail;
1657 }
1658
1659 const char *prefixes = smap_get(&pb->options, "ipv6_ra_prefixes");
1660 if (prefixes && !extract_ip_addresses(prefixes, &config->prefixes)) {
1661 VLOG_WARN("Invalid IPv6 prefixes: %s", prefixes);
1662 goto fail;
1663 }
1664
1665 /* All nodes multicast addresses */
1666 config->eth_dst = (struct eth_addr) ETH_ADDR_C(33,33,00,00,00,01);
1667 ipv6_parse("ff02::1", &config->ipv6_dst);
1668
1669 const char *eth_addr = smap_get(&pb->options, "ipv6_ra_src_eth");
1670 if (!eth_addr || !eth_addr_from_string(eth_addr, &config->eth_src)) {
1671 VLOG_WARN("Invalid ethernet source %s", eth_addr);
1672 goto fail;
1673 }
1674 const char *ip_addr = smap_get(&pb->options, "ipv6_ra_src_addr");
1675 if (!ip_addr || !ipv6_parse(ip_addr, &config->ipv6_src)) {
1676 VLOG_WARN("Invalid IP source %s", ip_addr);
1677 goto fail;
1678 }
1679
1680 return config;
1681
1682 fail:
1683 ipv6_ra_config_delete(config);
1684 return NULL;
1685 }
1686
1687 static long long int
1688 ipv6_ra_calc_next_announce(time_t min_interval, time_t max_interval)
1689 {
1690 long long int min_interval_ms = min_interval * 1000LL;
1691 long long int max_interval_ms = max_interval * 1000LL;
1692
1693 return time_msec() + min_interval_ms +
1694 random_range(max_interval_ms - min_interval_ms);
1695 }
1696
1697 static void
1698 put_load(uint64_t value, enum mf_field_id dst, int ofs, int n_bits,
1699 struct ofpbuf *ofpacts)
1700 {
1701 struct ofpact_set_field *sf = ofpact_put_set_field(ofpacts,
1702 mf_from_id(dst), NULL,
1703 NULL);
1704 ovs_be64 n_value = htonll(value);
1705 bitwise_copy(&n_value, 8, 0, sf->value, sf->field->n_bytes, ofs, n_bits);
1706 bitwise_one(ofpact_set_field_mask(sf), sf->field->n_bytes, ofs, n_bits);
1707 }
1708
1709 static long long int
1710 ipv6_ra_send(struct ipv6_ra_state *ra)
1711 {
1712 if (time_msec() < ra->next_announce) {
1713 return ra->next_announce;
1714 }
1715
1716 uint64_t packet_stub[128 / 8];
1717 struct dp_packet packet;
1718 dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
1719 compose_nd_ra(&packet, ra->config->eth_src, ra->config->eth_dst,
1720 &ra->config->ipv6_src, &ra->config->ipv6_dst,
1721 255, ra->config->mo_flags, htons(IPV6_ND_RA_LIFETIME), 0, 0,
1722 ra->config->mtu);
1723
1724 for (int i = 0; i < ra->config->prefixes.n_ipv6_addrs; i++) {
1725 ovs_be128 addr;
1726 memcpy(&addr, &ra->config->prefixes.ipv6_addrs[i].addr, sizeof addr);
1727 packet_put_ra_prefix_opt(&packet,
1728 ra->config->prefixes.ipv6_addrs[i].plen,
1729 ra->config->la_flags, htonl(IPV6_ND_RA_OPT_PREFIX_VALID_LIFETIME),
1730 htonl(IPV6_ND_RA_OPT_PREFIX_PREFERRED_LIFETIME), addr);
1731 }
1732
1733 uint64_t ofpacts_stub[4096 / 8];
1734 struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
1735
1736 /* Set MFF_LOG_DATAPATH and MFF_LOG_INPORT. */
1737 uint32_t dp_key = ra->metadata;
1738 uint32_t port_key = ra->port_key;
1739 put_load(dp_key, MFF_LOG_DATAPATH, 0, 64, &ofpacts);
1740 put_load(port_key, MFF_LOG_INPORT, 0, 32, &ofpacts);
1741 put_load(1, MFF_LOG_FLAGS, MLF_LOCAL_ONLY_BIT, 1, &ofpacts);
1742 struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&ofpacts);
1743 resubmit->in_port = OFPP_CONTROLLER;
1744 resubmit->table_id = OFTABLE_LOG_INGRESS_PIPELINE;
1745
1746 struct ofputil_packet_out po = {
1747 .packet = dp_packet_data(&packet),
1748 .packet_len = dp_packet_size(&packet),
1749 .buffer_id = UINT32_MAX,
1750 .ofpacts = ofpacts.data,
1751 .ofpacts_len = ofpacts.size,
1752 };
1753
1754 match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
1755 enum ofp_version version = rconn_get_version(swconn);
1756 enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
1757 queue_msg(ofputil_encode_packet_out(&po, proto));
1758 dp_packet_uninit(&packet);
1759 ofpbuf_uninit(&ofpacts);
1760
1761 ra->next_announce = ipv6_ra_calc_next_announce(ra->config->min_interval,
1762 ra->config->max_interval);
1763
1764 return ra->next_announce;
1765 }
1766
1767 static void
1768 ipv6_ra_wait(void)
1769 {
1770 poll_timer_wait_until(send_ipv6_ra_time);
1771 }
1772
1773 static void
1774 send_ipv6_ras(struct ovsdb_idl_index *sbrec_port_binding_by_datapath,
1775 struct ovsdb_idl_index *sbrec_port_binding_by_name,
1776 const struct hmap *local_datapaths)
1777 {
1778 struct shash_node *iter, *iter_next;
1779
1780 send_ipv6_ra_time = LLONG_MAX;
1781
1782 SHASH_FOR_EACH (iter, &ipv6_ras) {
1783 struct ipv6_ra_state *ra = iter->data;
1784 ra->delete_me = true;
1785 }
1786
1787 const struct local_datapath *ld;
1788 HMAP_FOR_EACH (ld, hmap_node, local_datapaths) {
1789 struct sbrec_port_binding *target = sbrec_port_binding_index_init_row(
1790 sbrec_port_binding_by_datapath);
1791 sbrec_port_binding_index_set_datapath(target, ld->datapath);
1792
1793 struct sbrec_port_binding *pb;
1794 SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, target,
1795 sbrec_port_binding_by_datapath) {
1796 if (!smap_get_bool(&pb->options, "ipv6_ra_send_periodic", false)) {
1797 continue;
1798 }
1799
1800 const char *peer_s = smap_get(&pb->options, "peer");
1801 if (!peer_s) {
1802 continue;
1803 }
1804
1805 const struct sbrec_port_binding *peer
1806 = lport_lookup_by_name(sbrec_port_binding_by_name, peer_s);
1807 if (!peer) {
1808 continue;
1809 }
1810
1811 struct ipv6_ra_config *config = ipv6_ra_update_config(pb);
1812 if (!config) {
1813 continue;
1814 }
1815
1816 struct ipv6_ra_state *ra
1817 = shash_find_data(&ipv6_ras, pb->logical_port);
1818 if (!ra) {
1819 ra = xzalloc(sizeof *ra);
1820 ra->config = config;
1821 ra->next_announce = ipv6_ra_calc_next_announce(
1822 ra->config->min_interval,
1823 ra->config->max_interval);
1824 shash_add(&ipv6_ras, pb->logical_port, ra);
1825 } else {
1826 if (config->min_interval != ra->config->min_interval ||
1827 config->max_interval != ra->config->max_interval)
1828 ra->next_announce = ipv6_ra_calc_next_announce(
1829 config->min_interval,
1830 config->max_interval);
1831 ipv6_ra_config_delete(ra->config);
1832 ra->config = config;
1833 }
1834
1835 /* Peer is the logical switch port that the logical
1836 * router port is connected to. The RA is injected
1837 * into that logical switch port.
1838 */
1839 ra->port_key = peer->tunnel_key;
1840 ra->metadata = peer->datapath->tunnel_key;
1841 ra->delete_me = false;
1842
1843 long long int next_ra = ipv6_ra_send(ra);
1844 if (send_ipv6_ra_time > next_ra) {
1845 send_ipv6_ra_time = next_ra;
1846 }
1847 }
1848 sbrec_port_binding_index_destroy_row(target);
1849 }
1850
1851 /* Remove those that are no longer in the SB database */
1852 SHASH_FOR_EACH_SAFE (iter, iter_next, &ipv6_ras) {
1853 struct ipv6_ra_state *ra = iter->data;
1854 if (ra->delete_me) {
1855 shash_delete(&ipv6_ras, iter);
1856 ipv6_ra_delete(ra);
1857 }
1858 }
1859 }
1860
1861 void
1862 pinctrl_wait(struct ovsdb_idl_txn *ovnsb_idl_txn)
1863 {
1864 wait_put_mac_bindings(ovnsb_idl_txn);
1865 rconn_run_wait(swconn);
1866 rconn_recv_wait(swconn);
1867 send_garp_wait();
1868 ipv6_ra_wait();
1869 }
1870
1871 void
1872 pinctrl_destroy(void)
1873 {
1874 rconn_destroy(swconn);
1875 destroy_put_mac_bindings();
1876 destroy_send_garps();
1877 destroy_ipv6_ras();
1878 destroy_buffered_packets_map();
1879 }
1880 \f
1881 /* Implementation of the "put_arp" and "put_nd" OVN actions. These
1882 * actions send a packet to ovn-controller, using the flow as an API
1883 * (see actions.h for details). This code implements the actions by
1884 * updating the MAC_Binding table in the southbound database.
1885 *
1886 * This code could be a lot simpler if the database could always be updated,
1887 * but in fact we can only update it when 'ovnsb_idl_txn' is nonnull. Thus,
1888 * we buffer up a few put_mac_bindings (but we don't keep them longer
1889 * than 1 second) and apply them whenever a database transaction is
1890 * available. */
1891
1892 /* Buffered "put_mac_binding" operation. */
1893 struct put_mac_binding {
1894 struct hmap_node hmap_node; /* In 'put_mac_bindings'. */
1895
1896 long long int timestamp; /* In milliseconds. */
1897
1898 /* Key. */
1899 uint32_t dp_key;
1900 uint32_t port_key;
1901 struct in6_addr ip_key;
1902
1903 /* Value. */
1904 struct eth_addr mac;
1905 };
1906
1907 /* Contains "struct put_mac_binding"s. */
1908 static struct hmap put_mac_bindings;
1909
1910 static void
1911 init_put_mac_bindings(void)
1912 {
1913 hmap_init(&put_mac_bindings);
1914 }
1915
1916 static void
1917 destroy_put_mac_bindings(void)
1918 {
1919 flush_put_mac_bindings();
1920 hmap_destroy(&put_mac_bindings);
1921 }
1922
1923 static struct put_mac_binding *
1924 pinctrl_find_put_mac_binding(uint32_t dp_key, uint32_t port_key,
1925 const struct in6_addr *ip_key, uint32_t hash)
1926 {
1927 struct put_mac_binding *pa;
1928 HMAP_FOR_EACH_WITH_HASH (pa, hmap_node, hash, &put_mac_bindings) {
1929 if (pa->dp_key == dp_key
1930 && pa->port_key == port_key
1931 && IN6_ARE_ADDR_EQUAL(&pa->ip_key, ip_key)) {
1932 return pa;
1933 }
1934 }
1935 return NULL;
1936 }
1937
1938 static void
1939 pinctrl_handle_put_mac_binding(const struct flow *md,
1940 const struct flow *headers, bool is_arp)
1941 {
1942 uint32_t dp_key = ntohll(md->metadata);
1943 uint32_t port_key = md->regs[MFF_LOG_INPORT - MFF_REG0];
1944 struct buffered_packets *bp;
1945 struct in6_addr ip_key;
1946
1947 if (is_arp) {
1948 ip_key = in6_addr_mapped_ipv4(htonl(md->regs[0]));
1949 } else {
1950 ovs_be128 ip6 = hton128(flow_get_xxreg(md, 0));
1951 memcpy(&ip_key, &ip6, sizeof ip_key);
1952 }
1953 uint32_t hash = hash_bytes(&ip_key, sizeof ip_key,
1954 hash_2words(dp_key, port_key));
1955 struct put_mac_binding *pmb
1956 = pinctrl_find_put_mac_binding(dp_key, port_key, &ip_key, hash);
1957 if (!pmb) {
1958 if (hmap_count(&put_mac_bindings) >= 1000) {
1959 COVERAGE_INC(pinctrl_drop_put_mac_binding);
1960 return;
1961 }
1962
1963 pmb = xmalloc(sizeof *pmb);
1964 hmap_insert(&put_mac_bindings, &pmb->hmap_node, hash);
1965 pmb->dp_key = dp_key;
1966 pmb->port_key = port_key;
1967 pmb->ip_key = ip_key;
1968 }
1969 pmb->timestamp = time_msec();
1970 pmb->mac = headers->dl_src;
1971
1972 /* send queued pkts */
1973 uint32_t bhash = hash_bytes(&ip_key, sizeof ip_key, 0);
1974 bp = pinctrl_find_buffered_packets(&ip_key, bhash);
1975 if (bp) {
1976 buffered_send_packets(bp, &pmb->mac);
1977 }
1978 }
1979
1980 static const struct sbrec_mac_binding *
1981 mac_binding_lookup(struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
1982 const char *logical_port,
1983 const char *ip)
1984 {
1985 struct sbrec_mac_binding *mb = sbrec_mac_binding_index_init_row(
1986 sbrec_mac_binding_by_lport_ip);
1987 sbrec_mac_binding_index_set_logical_port(mb, logical_port);
1988 sbrec_mac_binding_index_set_ip(mb, ip);
1989
1990 const struct sbrec_mac_binding *retval
1991 = sbrec_mac_binding_index_find(sbrec_mac_binding_by_lport_ip,
1992 mb);
1993
1994 sbrec_mac_binding_index_destroy_row(mb);
1995
1996 return retval;
1997 }
1998
1999 static void
2000 run_put_mac_binding(struct ovsdb_idl_txn *ovnsb_idl_txn,
2001 struct ovsdb_idl_index *sbrec_datapath_binding_by_key,
2002 struct ovsdb_idl_index *sbrec_port_binding_by_key,
2003 struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
2004 const struct put_mac_binding *pmb)
2005 {
2006 if (time_msec() > pmb->timestamp + 1000) {
2007 return;
2008 }
2009
2010 /* Convert logical datapath and logical port key into lport. */
2011 const struct sbrec_port_binding *pb = lport_lookup_by_key(
2012 sbrec_datapath_binding_by_key, sbrec_port_binding_by_key,
2013 pmb->dp_key, pmb->port_key);
2014 if (!pb) {
2015 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
2016
2017 VLOG_WARN_RL(&rl, "unknown logical port with datapath %"PRIu32" "
2018 "and port %"PRIu32, pmb->dp_key, pmb->port_key);
2019 return;
2020 }
2021
2022 /* Convert ethernet argument to string form for database. */
2023 char mac_string[ETH_ADDR_STRLEN + 1];
2024 snprintf(mac_string, sizeof mac_string,
2025 ETH_ADDR_FMT, ETH_ADDR_ARGS(pmb->mac));
2026
2027 struct ds ip_s = DS_EMPTY_INITIALIZER;
2028 ipv6_format_mapped(&pmb->ip_key, &ip_s);
2029
2030 /* Update or add an IP-MAC binding for this logical port. */
2031 const struct sbrec_mac_binding *b =
2032 mac_binding_lookup(sbrec_mac_binding_by_lport_ip, pb->logical_port,
2033 ds_cstr(&ip_s));
2034 if (!b) {
2035 b = sbrec_mac_binding_insert(ovnsb_idl_txn);
2036 sbrec_mac_binding_set_logical_port(b, pb->logical_port);
2037 sbrec_mac_binding_set_ip(b, ds_cstr(&ip_s));
2038 sbrec_mac_binding_set_mac(b, mac_string);
2039 sbrec_mac_binding_set_datapath(b, pb->datapath);
2040 } else if (strcmp(b->mac, mac_string)) {
2041 sbrec_mac_binding_set_mac(b, mac_string);
2042 }
2043 ds_destroy(&ip_s);
2044 }
2045
2046 static void
2047 run_put_mac_bindings(struct ovsdb_idl_txn *ovnsb_idl_txn,
2048 struct ovsdb_idl_index *sbrec_datapath_binding_by_key,
2049 struct ovsdb_idl_index *sbrec_port_binding_by_key,
2050 struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip)
2051 {
2052 if (!ovnsb_idl_txn) {
2053 return;
2054 }
2055
2056 const struct put_mac_binding *pmb;
2057 HMAP_FOR_EACH (pmb, hmap_node, &put_mac_bindings) {
2058 run_put_mac_binding(ovnsb_idl_txn, sbrec_datapath_binding_by_key,
2059 sbrec_port_binding_by_key,
2060 sbrec_mac_binding_by_lport_ip,
2061 pmb);
2062 }
2063 flush_put_mac_bindings();
2064 }
2065
2066 static void
2067 wait_put_mac_bindings(struct ovsdb_idl_txn *ovnsb_idl_txn)
2068 {
2069 if (ovnsb_idl_txn && !hmap_is_empty(&put_mac_bindings)) {
2070 poll_immediate_wake();
2071 }
2072 }
2073
2074 static void
2075 flush_put_mac_bindings(void)
2076 {
2077 struct put_mac_binding *pmb;
2078 HMAP_FOR_EACH_POP (pmb, hmap_node, &put_mac_bindings) {
2079 free(pmb);
2080 }
2081 }
2082 \f
2083 /*
2084 * Send gratuitous ARP for vif on localnet.
2085 *
2086 * When a new vif on localnet is added, gratuitous ARPs are sent announcing
2087 * the port's mac,ip mapping. On localnet, such announcements are needed for
2088 * switches and routers on the broadcast segment to update their port-mac
2089 * and ARP tables.
2090 */
2091 struct garp_data {
2092 struct eth_addr ea; /* Ethernet address of port. */
2093 ovs_be32 ipv4; /* Ipv4 address of port. */
2094 long long int announce_time; /* Next announcement in ms. */
2095 int backoff; /* Backoff for the next announcement. */
2096 uint32_t dp_key; /* Datapath used to output this GARP. */
2097 uint32_t port_key; /* Port to inject the GARP into. */
2098 };
2099
2100 /* Contains GARPs to be sent. */
2101 static struct shash send_garp_data;
2102
2103 /* Next GARP announcement in ms. */
2104 static long long int send_garp_time;
2105
2106 static void
2107 init_send_garps(void)
2108 {
2109 shash_init(&send_garp_data);
2110 send_garp_time = LLONG_MAX;
2111 }
2112
2113 static void
2114 destroy_send_garps(void)
2115 {
2116 shash_destroy_free_data(&send_garp_data);
2117 }
2118
2119 static void
2120 add_garp(const char *name, const struct eth_addr ea, ovs_be32 ip,
2121 uint32_t dp_key, uint32_t port_key)
2122 {
2123 struct garp_data *garp = xmalloc(sizeof *garp);
2124 garp->ea = ea;
2125 garp->ipv4 = ip;
2126 garp->announce_time = time_msec() + 1000;
2127 garp->backoff = 1;
2128 garp->dp_key = dp_key;
2129 garp->port_key = port_key;
2130 shash_add(&send_garp_data, name, garp);
2131 }
2132
2133 /* Add or update a vif for which GARPs need to be announced. */
2134 static void
2135 send_garp_update(const struct sbrec_port_binding *binding_rec,
2136 struct shash *nat_addresses)
2137 {
2138 volatile struct garp_data *garp = NULL;
2139 /* Update GARP for NAT IP if it exists. Consider port bindings with type
2140 * "l3gateway" for logical switch ports attached to gateway routers, and
2141 * port bindings with type "patch" for logical switch ports attached to
2142 * distributed gateway ports. */
2143 if (!strcmp(binding_rec->type, "l3gateway")
2144 || !strcmp(binding_rec->type, "patch")) {
2145 struct lport_addresses *laddrs = NULL;
2146 while ((laddrs = shash_find_and_delete(nat_addresses,
2147 binding_rec->logical_port))) {
2148 int i;
2149 for (i = 0; i < laddrs->n_ipv4_addrs; i++) {
2150 char *name = xasprintf("%s-%s", binding_rec->logical_port,
2151 laddrs->ipv4_addrs[i].addr_s);
2152 garp = shash_find_data(&send_garp_data, name);
2153 if (garp) {
2154 garp->dp_key = binding_rec->datapath->tunnel_key;
2155 garp->port_key = binding_rec->tunnel_key;
2156 } else {
2157 add_garp(name, laddrs->ea,
2158 laddrs->ipv4_addrs[i].addr,
2159 binding_rec->datapath->tunnel_key,
2160 binding_rec->tunnel_key);
2161 }
2162 free(name);
2163 }
2164 destroy_lport_addresses(laddrs);
2165 free(laddrs);
2166 }
2167 return;
2168 }
2169
2170 /* Update GARP for vif if it exists. */
2171 garp = shash_find_data(&send_garp_data, binding_rec->logical_port);
2172 if (garp) {
2173 garp->dp_key = binding_rec->datapath->tunnel_key;
2174 garp->port_key = binding_rec->tunnel_key;
2175 return;
2176 }
2177
2178 /* Add GARP for new vif. */
2179 int i;
2180 for (i = 0; i < binding_rec->n_mac; i++) {
2181 struct lport_addresses laddrs;
2182 if (!extract_lsp_addresses(binding_rec->mac[i], &laddrs)
2183 || !laddrs.n_ipv4_addrs) {
2184 continue;
2185 }
2186
2187 add_garp(binding_rec->logical_port,
2188 laddrs.ea, laddrs.ipv4_addrs[0].addr,
2189 binding_rec->datapath->tunnel_key, binding_rec->tunnel_key);
2190
2191 destroy_lport_addresses(&laddrs);
2192 break;
2193 }
2194 }
2195
2196 /* Remove a vif from GARP announcements. */
2197 static void
2198 send_garp_delete(const char *lport)
2199 {
2200 struct garp_data *garp = shash_find_and_delete(&send_garp_data, lport);
2201 free(garp);
2202 }
2203
2204 static long long int
2205 send_garp(struct garp_data *garp, long long int current_time)
2206 {
2207 if (current_time < garp->announce_time) {
2208 return garp->announce_time;
2209 }
2210
2211 /* Compose a GARP request packet. */
2212 uint64_t packet_stub[128 / 8];
2213 struct dp_packet packet;
2214 dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
2215 compose_arp(&packet, ARP_OP_REQUEST, garp->ea, eth_addr_zero,
2216 true, garp->ipv4, garp->ipv4);
2217
2218 /* Inject GARP request. */
2219 uint64_t ofpacts_stub[4096 / 8];
2220 struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
2221 enum ofp_version version = rconn_get_version(swconn);
2222 put_load(garp->dp_key, MFF_LOG_DATAPATH, 0, 64, &ofpacts);
2223 put_load(garp->port_key, MFF_LOG_INPORT, 0, 32, &ofpacts);
2224 struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&ofpacts);
2225 resubmit->in_port = OFPP_CONTROLLER;
2226 resubmit->table_id = OFTABLE_LOG_INGRESS_PIPELINE;
2227
2228 struct ofputil_packet_out po = {
2229 .packet = dp_packet_data(&packet),
2230 .packet_len = dp_packet_size(&packet),
2231 .buffer_id = UINT32_MAX,
2232 .ofpacts = ofpacts.data,
2233 .ofpacts_len = ofpacts.size,
2234 };
2235 match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
2236 enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
2237 queue_msg(ofputil_encode_packet_out(&po, proto));
2238 dp_packet_uninit(&packet);
2239 ofpbuf_uninit(&ofpacts);
2240
2241 /* Set the next announcement. At most 5 announcements are sent for a
2242 * vif. */
2243 if (garp->backoff < 16) {
2244 garp->backoff *= 2;
2245 garp->announce_time = current_time + garp->backoff * 1000;
2246 } else {
2247 garp->announce_time = LLONG_MAX;
2248 }
2249 return garp->announce_time;
2250 }
2251
2252 /* Get localnet vifs, local l3gw ports and ofport for localnet patch ports. */
2253 static void
2254 get_localnet_vifs_l3gwports(
2255 struct ovsdb_idl_index *sbrec_port_binding_by_datapath,
2256 struct ovsdb_idl_index *sbrec_port_binding_by_name,
2257 const struct ovsrec_bridge *br_int,
2258 const struct sbrec_chassis *chassis,
2259 const struct hmap *local_datapaths,
2260 struct sset *localnet_vifs,
2261 struct sset *local_l3gw_ports)
2262 {
2263 for (int i = 0; i < br_int->n_ports; i++) {
2264 const struct ovsrec_port *port_rec = br_int->ports[i];
2265 if (!strcmp(port_rec->name, br_int->name)) {
2266 continue;
2267 }
2268 const char *tunnel_id = smap_get(&port_rec->external_ids,
2269 "ovn-chassis-id");
2270 if (tunnel_id && strstr(tunnel_id, chassis->name)) {
2271 continue;
2272 }
2273 const char *localnet = smap_get(&port_rec->external_ids,
2274 "ovn-localnet-port");
2275 if (localnet) {
2276 continue;
2277 }
2278 for (int j = 0; j < port_rec->n_interfaces; j++) {
2279 const struct ovsrec_interface *iface_rec = port_rec->interfaces[j];
2280 if (!iface_rec->n_ofport) {
2281 continue;
2282 }
2283 /* Get localnet vif. */
2284 const char *iface_id = smap_get(&iface_rec->external_ids,
2285 "iface-id");
2286 if (!iface_id) {
2287 continue;
2288 }
2289 const struct sbrec_port_binding *pb
2290 = lport_lookup_by_name(sbrec_port_binding_by_name, iface_id);
2291 if (!pb) {
2292 continue;
2293 }
2294 struct local_datapath *ld
2295 = get_local_datapath(local_datapaths,
2296 pb->datapath->tunnel_key);
2297 if (ld && ld->localnet_port) {
2298 sset_add(localnet_vifs, iface_id);
2299 }
2300 }
2301 }
2302
2303 struct sbrec_port_binding *target = sbrec_port_binding_index_init_row(
2304 sbrec_port_binding_by_datapath);
2305
2306 const struct local_datapath *ld;
2307 HMAP_FOR_EACH (ld, hmap_node, local_datapaths) {
2308 const struct sbrec_port_binding *pb;
2309
2310 if (!ld->localnet_port) {
2311 continue;
2312 }
2313
2314 /* Get l3gw ports. Consider port bindings with type "l3gateway"
2315 * that connect to gateway routers (if local), and consider port
2316 * bindings of type "patch" since they might connect to
2317 * distributed gateway ports with NAT addresses. */
2318
2319 sbrec_port_binding_index_set_datapath(target, ld->datapath);
2320 SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, target,
2321 sbrec_port_binding_by_datapath) {
2322 if ((ld->has_local_l3gateway && !strcmp(pb->type, "l3gateway"))
2323 || !strcmp(pb->type, "patch")) {
2324 sset_add(local_l3gw_ports, pb->logical_port);
2325 }
2326 }
2327 }
2328 sbrec_port_binding_index_destroy_row(target);
2329 }
2330
2331 static bool
2332 pinctrl_is_chassis_resident(struct ovsdb_idl_index *sbrec_chassis_by_name,
2333 struct ovsdb_idl_index *sbrec_port_binding_by_name,
2334 const struct sbrec_chassis *chassis,
2335 const struct sset *active_tunnels,
2336 const char *port_name)
2337 {
2338 const struct sbrec_port_binding *pb
2339 = lport_lookup_by_name(sbrec_port_binding_by_name, port_name);
2340 if (!pb || !pb->chassis) {
2341 return false;
2342 }
2343 if (strcmp(pb->type, "chassisredirect")) {
2344 return pb->chassis == chassis;
2345 } else {
2346 struct ovs_list *gateway_chassis =
2347 gateway_chassis_get_ordered(sbrec_chassis_by_name, pb);
2348 bool active = gateway_chassis_is_active(gateway_chassis,
2349 chassis,
2350 active_tunnels);
2351 gateway_chassis_destroy(gateway_chassis);
2352 return active;
2353 }
2354 }
2355
2356 /* Extracts the mac, IPv4 and IPv6 addresses, and logical port from
2357 * 'addresses' which should be of the format 'MAC [IP1 IP2 ..]
2358 * [is_chassis_resident("LPORT_NAME")]', where IPn should be a valid IPv4
2359 * or IPv6 address, and stores them in the 'ipv4_addrs' and 'ipv6_addrs'
2360 * fields of 'laddrs'. The logical port name is stored in 'lport'.
2361 *
2362 * Returns true if at least 'MAC' is found in 'address', false otherwise.
2363 *
2364 * The caller must call destroy_lport_addresses() and free(*lport). */
2365 static bool
2366 extract_addresses_with_port(const char *addresses,
2367 struct lport_addresses *laddrs,
2368 char **lport)
2369 {
2370 int ofs;
2371 if (!extract_addresses(addresses, laddrs, &ofs)) {
2372 return false;
2373 } else if (ofs >= strlen(addresses)) {
2374 return true;
2375 }
2376
2377 struct lexer lexer;
2378 lexer_init(&lexer, addresses + ofs);
2379 lexer_get(&lexer);
2380
2381 if (lexer.error || lexer.token.type != LEX_T_ID
2382 || !lexer_match_id(&lexer, "is_chassis_resident")) {
2383 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
2384 VLOG_INFO_RL(&rl, "invalid syntax '%s' in address", addresses);
2385 lexer_destroy(&lexer);
2386 return true;
2387 }
2388
2389 if (!lexer_match(&lexer, LEX_T_LPAREN)) {
2390 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
2391 VLOG_INFO_RL(&rl, "Syntax error: expecting '(' after "
2392 "'is_chassis_resident' in address '%s'", addresses);
2393 lexer_destroy(&lexer);
2394 return false;
2395 }
2396
2397 if (lexer.token.type != LEX_T_STRING) {
2398 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
2399 VLOG_INFO_RL(&rl,
2400 "Syntax error: expecting quoted string after "
2401 "'is_chassis_resident' in address '%s'", addresses);
2402 lexer_destroy(&lexer);
2403 return false;
2404 }
2405
2406 *lport = xstrdup(lexer.token.s);
2407
2408 lexer_get(&lexer);
2409 if (!lexer_match(&lexer, LEX_T_RPAREN)) {
2410 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
2411 VLOG_INFO_RL(&rl, "Syntax error: expecting ')' after quoted string in "
2412 "'is_chassis_resident()' in address '%s'",
2413 addresses);
2414 lexer_destroy(&lexer);
2415 return false;
2416 }
2417
2418 lexer_destroy(&lexer);
2419 return true;
2420 }
2421
2422 static void
2423 consider_nat_address(struct ovsdb_idl_index *sbrec_chassis_by_name,
2424 struct ovsdb_idl_index *sbrec_port_binding_by_name,
2425 const char *nat_address,
2426 const struct sbrec_port_binding *pb,
2427 struct sset *nat_address_keys,
2428 const struct sbrec_chassis *chassis,
2429 const struct sset *active_tunnels,
2430 struct shash *nat_addresses)
2431 {
2432 struct lport_addresses *laddrs = xmalloc(sizeof *laddrs);
2433 char *lport = NULL;
2434 if (!extract_addresses_with_port(nat_address, laddrs, &lport)
2435 || (!lport && !strcmp(pb->type, "patch"))
2436 || (lport && !pinctrl_is_chassis_resident(
2437 sbrec_chassis_by_name, sbrec_port_binding_by_name, chassis,
2438 active_tunnels, lport))) {
2439 destroy_lport_addresses(laddrs);
2440 free(laddrs);
2441 free(lport);
2442 return;
2443 }
2444 free(lport);
2445
2446 int i;
2447 for (i = 0; i < laddrs->n_ipv4_addrs; i++) {
2448 char *name = xasprintf("%s-%s", pb->logical_port,
2449 laddrs->ipv4_addrs[i].addr_s);
2450 sset_add(nat_address_keys, name);
2451 free(name);
2452 }
2453 shash_add(nat_addresses, pb->logical_port, laddrs);
2454 }
2455
2456 static void
2457 get_nat_addresses_and_keys(struct ovsdb_idl_index *sbrec_chassis_by_name,
2458 struct ovsdb_idl_index *sbrec_port_binding_by_name,
2459 struct sset *nat_address_keys,
2460 struct sset *local_l3gw_ports,
2461 const struct sbrec_chassis *chassis,
2462 const struct sset *active_tunnels,
2463 struct shash *nat_addresses)
2464 {
2465 const char *gw_port;
2466 SSET_FOR_EACH(gw_port, local_l3gw_ports) {
2467 const struct sbrec_port_binding *pb;
2468
2469 pb = lport_lookup_by_name(sbrec_port_binding_by_name, gw_port);
2470 if (!pb) {
2471 continue;
2472 }
2473
2474 if (pb->n_nat_addresses) {
2475 for (int i = 0; i < pb->n_nat_addresses; i++) {
2476 consider_nat_address(sbrec_chassis_by_name,
2477 sbrec_port_binding_by_name,
2478 pb->nat_addresses[i], pb,
2479 nat_address_keys, chassis,
2480 active_tunnels,
2481 nat_addresses);
2482 }
2483 } else {
2484 /* Continue to support options:nat-addresses for version
2485 * upgrade. */
2486 const char *nat_addresses_options = smap_get(&pb->options,
2487 "nat-addresses");
2488 if (nat_addresses_options) {
2489 consider_nat_address(sbrec_chassis_by_name,
2490 sbrec_port_binding_by_name,
2491 nat_addresses_options, pb,
2492 nat_address_keys, chassis,
2493 active_tunnels,
2494 nat_addresses);
2495 }
2496 }
2497 }
2498 }
2499
2500 static void
2501 send_garp_wait(void)
2502 {
2503 poll_timer_wait_until(send_garp_time);
2504 }
2505
2506 static void
2507 send_garp_run(struct ovsdb_idl_index *sbrec_chassis_by_name,
2508 struct ovsdb_idl_index *sbrec_port_binding_by_datapath,
2509 struct ovsdb_idl_index *sbrec_port_binding_by_name,
2510 const struct ovsrec_bridge *br_int,
2511 const struct sbrec_chassis *chassis,
2512 const struct hmap *local_datapaths,
2513 const struct sset *active_tunnels)
2514 {
2515 struct sset localnet_vifs = SSET_INITIALIZER(&localnet_vifs);
2516 struct sset local_l3gw_ports = SSET_INITIALIZER(&local_l3gw_ports);
2517 struct sset nat_ip_keys = SSET_INITIALIZER(&nat_ip_keys);
2518 struct shash nat_addresses;
2519
2520 shash_init(&nat_addresses);
2521
2522 get_localnet_vifs_l3gwports(sbrec_port_binding_by_datapath,
2523 sbrec_port_binding_by_name,
2524 br_int, chassis, local_datapaths,
2525 &localnet_vifs, &local_l3gw_ports);
2526
2527 get_nat_addresses_and_keys(sbrec_chassis_by_name,
2528 sbrec_port_binding_by_name,
2529 &nat_ip_keys, &local_l3gw_ports,
2530 chassis, active_tunnels,
2531 &nat_addresses);
2532 /* For deleted ports and deleted nat ips, remove from send_garp_data. */
2533 struct shash_node *iter, *next;
2534 SHASH_FOR_EACH_SAFE (iter, next, &send_garp_data) {
2535 if (!sset_contains(&localnet_vifs, iter->name) &&
2536 !sset_contains(&nat_ip_keys, iter->name)) {
2537 send_garp_delete(iter->name);
2538 }
2539 }
2540
2541 /* Update send_garp_data. */
2542 const char *iface_id;
2543 SSET_FOR_EACH (iface_id, &localnet_vifs) {
2544 const struct sbrec_port_binding *pb = lport_lookup_by_name(
2545 sbrec_port_binding_by_name, iface_id);
2546 if (pb) {
2547 send_garp_update(pb, &nat_addresses);
2548 }
2549 }
2550
2551 /* Update send_garp_data for nat-addresses. */
2552 const char *gw_port;
2553 SSET_FOR_EACH (gw_port, &local_l3gw_ports) {
2554 const struct sbrec_port_binding *pb
2555 = lport_lookup_by_name(sbrec_port_binding_by_name, gw_port);
2556 if (pb) {
2557 send_garp_update(pb, &nat_addresses);
2558 }
2559 }
2560
2561 /* Send GARPs, and update the next announcement. */
2562 long long int current_time = time_msec();
2563 send_garp_time = LLONG_MAX;
2564 SHASH_FOR_EACH (iter, &send_garp_data) {
2565 long long int next_announce = send_garp(iter->data, current_time);
2566 if (send_garp_time > next_announce) {
2567 send_garp_time = next_announce;
2568 }
2569 }
2570 sset_destroy(&localnet_vifs);
2571 sset_destroy(&local_l3gw_ports);
2572
2573 SHASH_FOR_EACH_SAFE (iter, next, &nat_addresses) {
2574 struct lport_addresses *laddrs = iter->data;
2575 destroy_lport_addresses(laddrs);
2576 shash_delete(&nat_addresses, iter);
2577 free(laddrs);
2578 }
2579 shash_destroy(&nat_addresses);
2580
2581 sset_destroy(&nat_ip_keys);
2582 }
2583
2584 static void
2585 reload_metadata(struct ofpbuf *ofpacts, const struct match *md)
2586 {
2587 enum mf_field_id md_fields[] = {
2588 #if FLOW_N_REGS == 16
2589 MFF_REG0,
2590 MFF_REG1,
2591 MFF_REG2,
2592 MFF_REG3,
2593 MFF_REG4,
2594 MFF_REG5,
2595 MFF_REG6,
2596 MFF_REG7,
2597 MFF_REG8,
2598 MFF_REG9,
2599 MFF_REG10,
2600 MFF_REG11,
2601 MFF_REG12,
2602 MFF_REG13,
2603 MFF_REG14,
2604 MFF_REG15,
2605 #else
2606 #error
2607 #endif
2608 MFF_METADATA,
2609 };
2610 for (size_t i = 0; i < ARRAY_SIZE(md_fields); i++) {
2611 const struct mf_field *field = mf_from_id(md_fields[i]);
2612 if (!mf_is_all_wild(field, &md->wc)) {
2613 union mf_value value;
2614 mf_get_value(field, &md->flow, &value);
2615 ofpact_put_set_field(ofpacts, field, &value, NULL);
2616 }
2617 }
2618 }
2619
2620 static void
2621 pinctrl_handle_nd_na(const struct flow *ip_flow, const struct match *md,
2622 struct ofpbuf *userdata, bool is_router)
2623 {
2624 /* This action only works for IPv6 ND packets, and the switch should only
2625 * send us ND packets this way, but check here just to be sure. */
2626 if (!is_nd(ip_flow, NULL)) {
2627 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
2628 VLOG_WARN_RL(&rl, "NA action on non-ND packet");
2629 return;
2630 }
2631
2632 uint64_t packet_stub[128 / 8];
2633 struct dp_packet packet;
2634 dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
2635
2636 /* These flags are not exactly correct. Look at section 7.2.4
2637 * of RFC 4861. */
2638 uint32_t rso_flags = ND_RSO_SOLICITED | ND_RSO_OVERRIDE;
2639 if (is_router) {
2640 rso_flags |= ND_RSO_ROUTER;
2641 }
2642 compose_nd_na(&packet, ip_flow->dl_dst, ip_flow->dl_src,
2643 &ip_flow->nd_target, &ip_flow->ipv6_src,
2644 htonl(rso_flags));
2645
2646 /* Reload previous packet metadata and set actions from userdata. */
2647 set_actions_and_enqueue_msg(&packet, md, userdata);
2648 dp_packet_uninit(&packet);
2649 }
2650
2651 static void
2652 pinctrl_handle_nd_ns(const struct flow *ip_flow, struct dp_packet *pkt_in,
2653 const struct match *md, struct ofpbuf *userdata)
2654 {
2655 /* This action only works for IPv6 packets. */
2656 if (get_dl_type(ip_flow) != htons(ETH_TYPE_IPV6)) {
2657 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
2658 VLOG_WARN_RL(&rl, "NS action on non-IPv6 packet");
2659 return;
2660 }
2661
2662 pinctrl_handle_buffered_packets(ip_flow, pkt_in, md, false);
2663
2664 uint64_t packet_stub[128 / 8];
2665 struct dp_packet packet;
2666 dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
2667
2668 compose_nd_ns(&packet, ip_flow->dl_src, &ip_flow->ipv6_src,
2669 &ip_flow->ipv6_dst);
2670
2671 /* Reload previous packet metadata and set actions from userdata. */
2672 set_actions_and_enqueue_msg(&packet, md, userdata);
2673 dp_packet_uninit(&packet);
2674 }
2675
2676 static void
2677 pinctrl_handle_put_nd_ra_opts(
2678 const struct flow *in_flow, struct dp_packet *pkt_in,
2679 struct ofputil_packet_in *pin, struct ofpbuf *userdata,
2680 struct ofpbuf *continuation)
2681 {
2682 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
2683 enum ofp_version version = rconn_get_version(swconn);
2684 enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
2685 struct dp_packet *pkt_out_ptr = NULL;
2686 uint32_t success = 0;
2687
2688 /* Parse result field. */
2689 const struct mf_field *f;
2690 enum ofperr ofperr = nx_pull_header(userdata, NULL, &f, NULL);
2691 if (ofperr) {
2692 VLOG_WARN_RL(&rl, "bad result OXM (%s)", ofperr_to_string(ofperr));
2693 goto exit;
2694 }
2695
2696 /* Parse result offset. */
2697 ovs_be32 *ofsp = ofpbuf_try_pull(userdata, sizeof *ofsp);
2698 if (!ofsp) {
2699 VLOG_WARN_RL(&rl, "offset not present in the userdata");
2700 goto exit;
2701 }
2702
2703 /* Check that the result is valid and writable. */
2704 struct mf_subfield dst = { .field = f, .ofs = ntohl(*ofsp), .n_bits = 1 };
2705 ofperr = mf_check_dst(&dst, NULL);
2706 if (ofperr) {
2707 VLOG_WARN_RL(&rl, "bad result bit (%s)", ofperr_to_string(ofperr));
2708 goto exit;
2709 }
2710
2711 if (!userdata->size) {
2712 VLOG_WARN_RL(&rl, "IPv6 ND RA options not present in the userdata");
2713 goto exit;
2714 }
2715
2716 if (!is_icmpv6(in_flow, NULL) || in_flow->tp_dst != htons(0) ||
2717 in_flow->tp_src != htons(ND_ROUTER_SOLICIT)) {
2718 VLOG_WARN_RL(&rl, "put_nd_ra action on invalid or unsupported packet");
2719 goto exit;
2720 }
2721
2722 size_t new_packet_size = pkt_in->l4_ofs + userdata->size;
2723 struct dp_packet pkt_out;
2724 dp_packet_init(&pkt_out, new_packet_size);
2725 dp_packet_clear(&pkt_out);
2726 dp_packet_prealloc_tailroom(&pkt_out, new_packet_size);
2727 pkt_out_ptr = &pkt_out;
2728
2729 /* Copy L2 and L3 headers from pkt_in. */
2730 dp_packet_put(&pkt_out, dp_packet_pull(pkt_in, pkt_in->l4_ofs),
2731 pkt_in->l4_ofs);
2732
2733 pkt_out.l2_5_ofs = pkt_in->l2_5_ofs;
2734 pkt_out.l2_pad_size = pkt_in->l2_pad_size;
2735 pkt_out.l3_ofs = pkt_in->l3_ofs;
2736 pkt_out.l4_ofs = pkt_in->l4_ofs;
2737
2738 /* Copy the ICMPv6 Router Advertisement data from 'userdata' field. */
2739 dp_packet_put(&pkt_out, userdata->data, userdata->size);
2740
2741 /* Set the IPv6 payload length and calculate the ICMPv6 checksum. */
2742 struct ovs_16aligned_ip6_hdr *nh = dp_packet_l3(&pkt_out);
2743 nh->ip6_plen = htons(userdata->size);
2744 struct ovs_ra_msg *ra = dp_packet_l4(&pkt_out);
2745 ra->icmph.icmp6_cksum = 0;
2746 uint32_t icmp_csum = packet_csum_pseudoheader6(nh);
2747 ra->icmph.icmp6_cksum = csum_finish(csum_continue(
2748 icmp_csum, ra, userdata->size));
2749 pin->packet = dp_packet_data(&pkt_out);
2750 pin->packet_len = dp_packet_size(&pkt_out);
2751 success = 1;
2752
2753 exit:
2754 if (!ofperr) {
2755 union mf_subvalue sv;
2756 sv.u8_val = success;
2757 mf_write_subfield(&dst, &sv, &pin->flow_metadata);
2758 }
2759 queue_msg(ofputil_encode_resume(pin, continuation, proto));
2760 dp_packet_uninit(pkt_out_ptr);
2761 }