]> git.proxmox.com Git - mirror_ovs.git/blob - datapath/vport-capwap.c
56e6394a0f0d50ea70e6bd8cbff31ef4c95ce895
[mirror_ovs.git] / datapath / vport-capwap.c
1 /*
2 * Copyright (c) 2007-2012 Nicira, Inc.
3 * Distributed under the terms of the GNU GPL version 2.
4 *
5 * Significant portions of this file may be copied from parts of the Linux
6 * kernel, by Linus Torvalds and others.
7 */
8
9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10
11 #include <linux/version.h>
12 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)
13
14 #include <linux/if.h>
15 #include <linux/in.h>
16 #include <linux/ip.h>
17 #include <linux/list.h>
18 #include <linux/net.h>
19 #include <net/net_namespace.h>
20
21 #include <net/icmp.h>
22 #include <net/inet_frag.h>
23 #include <net/ip.h>
24 #include <net/protocol.h>
25 #include <net/udp.h>
26
27 #include "datapath.h"
28 #include "tunnel.h"
29 #include "vport.h"
30
31 #define CAPWAP_SRC_PORT 58881
32 #define CAPWAP_DST_PORT 58882
33
34 #define CAPWAP_FRAG_TIMEOUT (30 * HZ)
35 #define CAPWAP_FRAG_MAX_MEM (256 * 1024)
36 #define CAPWAP_FRAG_PRUNE_MEM (192 * 1024)
37 #define CAPWAP_FRAG_SECRET_INTERVAL (10 * 60 * HZ)
38
39 /*
40 * The CAPWAP header is a mess, with all kinds of odd size bit fields that
41 * cross byte boundaries, which are difficult to represent correctly in
42 * various byte orderings. Luckily we only care about a few permutations, so
43 * statically create them and we can do very fast parsing by checking all 12
44 * fields in one go.
45 */
46 #define CAPWAP_PREAMBLE_MASK __cpu_to_be32(0xFF000000)
47 #define CAPWAP_HLEN_SHIFT 17
48 #define CAPWAP_HLEN_MASK __cpu_to_be32(0x00F80000)
49 #define CAPWAP_RID_MASK __cpu_to_be32(0x0007C000)
50 #define CAPWAP_WBID_MASK __cpu_to_be32(0x00003E00)
51 #define CAPWAP_F_MASK __cpu_to_be32(0x000001FF)
52
53 #define CAPWAP_F_FRAG __cpu_to_be32(0x00000080)
54 #define CAPWAP_F_LASTFRAG __cpu_to_be32(0x00000040)
55 #define CAPWAP_F_WSI __cpu_to_be32(0x00000020)
56 #define CAPWAP_F_RMAC __cpu_to_be32(0x00000010)
57
58 #define CAPWAP_RMAC_LEN 4
59
60 /* Standard CAPWAP looks for a WBID value of 2.
61 * When we insert WSI field, use WBID value of 30, which has been
62 * proposed for all "experimental" usage - users with no reserved WBID value
63 * of their own.
64 */
65 #define CAPWAP_WBID_30 __cpu_to_be32(0x00003C00)
66 #define CAPWAP_WBID_2 __cpu_to_be32(0x00000200)
67
68 #define FRAG_HDR (CAPWAP_F_FRAG)
69 #define FRAG_LAST_HDR (FRAG_HDR | CAPWAP_F_LASTFRAG)
70
71 /* Keyed packet, WBID 30, and length long enough to include WSI key */
72 #define CAPWAP_KEYED (CAPWAP_WBID_30 | CAPWAP_F_WSI | htonl(20 << CAPWAP_HLEN_SHIFT))
73 /* A backward-compatible packet, WBID 2 and length of 2 words (no WSI fields) */
74 #define CAPWAP_NO_WSI (CAPWAP_WBID_2 | htonl(8 << CAPWAP_HLEN_SHIFT))
75
76 /* Mask for all parts of header that must be 0. */
77 #define CAPWAP_ZERO_MASK (CAPWAP_PREAMBLE_MASK | \
78 (CAPWAP_F_MASK ^ (CAPWAP_F_WSI | CAPWAP_F_FRAG | CAPWAP_F_LASTFRAG | CAPWAP_F_RMAC)))
79
80 struct capwaphdr {
81 __be32 begin;
82 __be16 frag_id;
83 /* low 3 bits of frag_off are reserved */
84 __be16 frag_off;
85 };
86
87 /*
88 * We use the WSI field to hold additional tunnel data.
89 * The first eight bits store the size of the wsi data in bytes.
90 */
91 struct capwaphdr_wsi {
92 u8 wsi_len;
93 u8 flags;
94 __be16 reserved_padding;
95 };
96
97 struct capwaphdr_wsi_key {
98 __be64 key;
99 };
100
101 /* Flag indicating a 64bit key is stored in WSI data field */
102 #define CAPWAP_WSI_F_KEY64 0x80
103
104 static struct capwaphdr *capwap_hdr(const struct sk_buff *skb)
105 {
106 return (struct capwaphdr *)(udp_hdr(skb) + 1);
107 }
108
109 /*
110 * The fragment offset is actually the high 13 bits of the last 16 bit field,
111 * so we would normally need to right shift 3 places. However, it stores the
112 * offset in 8 byte chunks, which would involve a 3 place left shift. So we
113 * just mask off the last 3 bits and be done with it.
114 */
115 #define FRAG_OFF_MASK (~0x7U)
116
117 /*
118 * The minimum header length. The header may be longer if the optional
119 * WSI field is used.
120 */
121 #define CAPWAP_MIN_HLEN (sizeof(struct udphdr) + sizeof(struct capwaphdr))
122
123 struct frag_match {
124 __be32 saddr;
125 __be32 daddr;
126 __be16 id;
127 };
128
129 struct frag_queue {
130 struct inet_frag_queue ifq;
131 struct frag_match match;
132 };
133
134 struct frag_skb_cb {
135 u16 offset;
136 };
137 #define FRAG_CB(skb) ((struct frag_skb_cb *)(skb)->cb)
138
139 static struct sk_buff *fragment(struct sk_buff *, const struct vport *,
140 struct dst_entry *dst, unsigned int hlen);
141 static struct sk_buff *defrag(struct sk_buff *, bool frag_last);
142
143 static void capwap_frag_init(struct inet_frag_queue *, void *match);
144 static unsigned int capwap_frag_hash(struct inet_frag_queue *);
145 #if LINUX_VERSION_CODE < KERNEL_VERSION(3,7,0)
146 static int capwap_frag_match(struct inet_frag_queue *, void *match);
147 #else
148 static bool capwap_frag_match(struct inet_frag_queue *, void *match);
149 #endif
150 static void capwap_frag_expire(unsigned long ifq);
151
152 static struct inet_frags frag_state = {
153 .constructor = capwap_frag_init,
154 .qsize = sizeof(struct frag_queue),
155 .hashfn = capwap_frag_hash,
156 .match = capwap_frag_match,
157 .frag_expire = capwap_frag_expire,
158 .secret_interval = CAPWAP_FRAG_SECRET_INTERVAL,
159 };
160
161 static int capwap_hdr_len(const struct tnl_mutable_config *mutable,
162 const struct ovs_key_ipv4_tunnel *tun_key)
163 {
164 int size = CAPWAP_MIN_HLEN;
165 u32 flags;
166 __be64 out_key;
167
168 tnl_get_param(mutable, tun_key, &flags, &out_key);
169
170 /* CAPWAP has no checksums. */
171 if (flags & TNL_F_CSUM)
172 return -EINVAL;
173
174 /* if keys are specified, then add WSI field */
175 if (out_key || (flags & TNL_F_OUT_KEY_ACTION)) {
176 size += sizeof(struct capwaphdr_wsi) +
177 sizeof(struct capwaphdr_wsi_key);
178 }
179
180 return size;
181 }
182
183 static struct sk_buff *capwap_build_header(const struct vport *vport,
184 const struct tnl_mutable_config *mutable,
185 struct dst_entry *dst,
186 struct sk_buff *skb,
187 int tunnel_hlen)
188 {
189 struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key;
190 struct udphdr *udph = udp_hdr(skb);
191 struct capwaphdr *cwh = (struct capwaphdr *)(udph + 1);
192 u32 flags;
193 __be64 out_key;
194
195 tnl_get_param(mutable, tun_key, &flags, &out_key);
196
197 udph->source = htons(CAPWAP_SRC_PORT);
198 udph->dest = htons(CAPWAP_DST_PORT);
199 udph->check = 0;
200
201 cwh->frag_id = 0;
202 cwh->frag_off = 0;
203
204 if (out_key || flags & TNL_F_OUT_KEY_ACTION) {
205 /* first field in WSI is key */
206 struct capwaphdr_wsi *wsi = (struct capwaphdr_wsi *)(cwh + 1);
207
208 cwh->begin = CAPWAP_KEYED;
209
210 /* -1 for wsi_len byte, not included in length as per spec */
211 wsi->wsi_len = sizeof(struct capwaphdr_wsi) - 1
212 + sizeof(struct capwaphdr_wsi_key);
213 wsi->flags = CAPWAP_WSI_F_KEY64;
214 wsi->reserved_padding = 0;
215
216 if (out_key) {
217 struct capwaphdr_wsi_key *opt = (struct capwaphdr_wsi_key *)(wsi + 1);
218 opt->key = out_key;
219 }
220 } else {
221 /* make packet readable by old capwap code */
222 cwh->begin = CAPWAP_NO_WSI;
223 }
224 udph->len = htons(skb->len - skb_transport_offset(skb));
225
226 if (unlikely(skb->len - skb_network_offset(skb) > dst_mtu(dst))) {
227 unsigned int hlen = skb_transport_offset(skb) + capwap_hdr_len(mutable, tun_key);
228 skb = fragment(skb, vport, dst, hlen);
229 }
230
231 return skb;
232 }
233
234 static int process_capwap_wsi(struct sk_buff *skb, __be64 *key, bool *key_present)
235 {
236 struct capwaphdr *cwh = capwap_hdr(skb);
237 struct capwaphdr_wsi *wsi;
238 int hdr_len;
239 int rmac_len = 0;
240 int wsi_len;
241
242 if (((cwh->begin & CAPWAP_WBID_MASK) != CAPWAP_WBID_30))
243 return 0;
244
245 if (cwh->begin & CAPWAP_F_RMAC)
246 rmac_len = CAPWAP_RMAC_LEN;
247
248 hdr_len = ntohl(cwh->begin & CAPWAP_HLEN_MASK) >> CAPWAP_HLEN_SHIFT;
249
250 if (unlikely(sizeof(struct capwaphdr) + rmac_len + sizeof(struct capwaphdr_wsi) > hdr_len))
251 return -EINVAL;
252
253 /* read wsi header to find out how big it really is */
254 wsi = (struct capwaphdr_wsi *)((u8 *)(cwh + 1) + rmac_len);
255 /* +1 for length byte not included in wsi_len */
256 wsi_len = 1 + wsi->wsi_len;
257
258 if (unlikely(sizeof(struct capwaphdr) + rmac_len + wsi_len != hdr_len))
259 return -EINVAL;
260
261 wsi_len -= sizeof(struct capwaphdr_wsi);
262
263 if (wsi->flags & CAPWAP_WSI_F_KEY64) {
264 struct capwaphdr_wsi_key *opt;
265
266 if (unlikely(wsi_len < sizeof(struct capwaphdr_wsi_key)))
267 return -EINVAL;
268
269 opt = (struct capwaphdr_wsi_key *)(wsi + 1);
270 *key = opt->key;
271 *key_present = true;
272 } else {
273 *key_present = false;
274 }
275
276 return 0;
277 }
278
279 static struct sk_buff *process_capwap_proto(struct sk_buff *skb, __be64 *key, bool *key_present)
280 {
281 struct capwaphdr *cwh = capwap_hdr(skb);
282 int hdr_len = sizeof(struct udphdr);
283
284 if (unlikely((cwh->begin & CAPWAP_ZERO_MASK) != 0))
285 goto error;
286
287 hdr_len += ntohl(cwh->begin & CAPWAP_HLEN_MASK) >> CAPWAP_HLEN_SHIFT;
288 if (unlikely(hdr_len < CAPWAP_MIN_HLEN))
289 goto error;
290
291 if (unlikely(!pskb_may_pull(skb, hdr_len + ETH_HLEN)))
292 goto error;
293
294 cwh = capwap_hdr(skb);
295 __skb_pull(skb, hdr_len);
296 skb_postpull_rcsum(skb, skb_transport_header(skb), hdr_len + ETH_HLEN);
297
298 if (cwh->begin & CAPWAP_F_FRAG) {
299 skb = defrag(skb, (__force bool)(cwh->begin & CAPWAP_F_LASTFRAG));
300 if (!skb)
301 return NULL;
302 cwh = capwap_hdr(skb);
303 }
304
305 if ((cwh->begin & CAPWAP_F_WSI) && process_capwap_wsi(skb, key, key_present))
306 goto error;
307
308 return skb;
309 error:
310 kfree_skb(skb);
311 return NULL;
312 }
313
314 /* Called with rcu_read_lock and BH disabled. */
315 static int capwap_rcv(struct sock *sk, struct sk_buff *skb)
316 {
317 struct vport *vport;
318 const struct tnl_mutable_config *mutable;
319 struct iphdr *iph;
320 struct ovs_key_ipv4_tunnel tun_key;
321 __be64 key = 0;
322 bool key_present = false;
323
324 if (unlikely(!pskb_may_pull(skb, CAPWAP_MIN_HLEN + ETH_HLEN)))
325 goto error;
326
327 skb = process_capwap_proto(skb, &key, &key_present);
328 if (unlikely(!skb))
329 goto out;
330
331 iph = ip_hdr(skb);
332 vport = ovs_tnl_find_port(sock_net(sk), iph->daddr, iph->saddr, key,
333 TNL_T_PROTO_CAPWAP, &mutable);
334 if (unlikely(!vport)) {
335 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
336 goto error;
337 }
338
339 if (key_present && mutable->key.daddr &&
340 !(mutable->flags & TNL_F_IN_KEY_MATCH)) {
341 key_present = false;
342 key = 0;
343 }
344
345 tnl_tun_key_init(&tun_key, iph, key, key_present ? OVS_TNL_F_KEY : 0);
346 OVS_CB(skb)->tun_key = &tun_key;
347
348 ovs_tnl_rcv(vport, skb);
349 goto out;
350
351 error:
352 kfree_skb(skb);
353 out:
354 return 0;
355 }
356
357 static const struct tnl_ops capwap_tnl_ops = {
358 .tunnel_type = TNL_T_PROTO_CAPWAP,
359 .ipproto = IPPROTO_UDP,
360 .hdr_len = capwap_hdr_len,
361 .build_header = capwap_build_header,
362 };
363
364 static inline struct capwap_net *ovs_get_capwap_net(struct net *net)
365 {
366 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
367 return &ovs_net->vport_net.capwap;
368 }
369
370 /* Arbitrary value. Irrelevant as long as it's not 0 since we set the handler. */
371 #define UDP_ENCAP_CAPWAP 10
372 static int init_socket(struct net *net)
373 {
374 int err;
375 struct capwap_net *capwap_net = ovs_get_capwap_net(net);
376 struct sockaddr_in sin;
377
378 if (capwap_net->n_tunnels) {
379 capwap_net->n_tunnels++;
380 return 0;
381 }
382
383 err = sock_create_kern(AF_INET, SOCK_DGRAM, 0,
384 &capwap_net->capwap_rcv_socket);
385 if (err)
386 goto error;
387
388 /* release net ref. */
389 sk_change_net(capwap_net->capwap_rcv_socket->sk, net);
390
391 sin.sin_family = AF_INET;
392 sin.sin_addr.s_addr = htonl(INADDR_ANY);
393 sin.sin_port = htons(CAPWAP_DST_PORT);
394
395 err = kernel_bind(capwap_net->capwap_rcv_socket,
396 (struct sockaddr *)&sin,
397 sizeof(struct sockaddr_in));
398 if (err)
399 goto error_sock;
400
401 udp_sk(capwap_net->capwap_rcv_socket->sk)->encap_type = UDP_ENCAP_CAPWAP;
402 udp_sk(capwap_net->capwap_rcv_socket->sk)->encap_rcv = capwap_rcv;
403
404 capwap_net->frag_state.timeout = CAPWAP_FRAG_TIMEOUT;
405 capwap_net->frag_state.high_thresh = CAPWAP_FRAG_MAX_MEM;
406 capwap_net->frag_state.low_thresh = CAPWAP_FRAG_PRUNE_MEM;
407
408 inet_frags_init_net(&capwap_net->frag_state);
409 udp_encap_enable();
410 capwap_net->n_tunnels++;
411 return 0;
412
413 error_sock:
414 sk_release_kernel(capwap_net->capwap_rcv_socket->sk);
415 error:
416 pr_warn("cannot register capwap protocol handler : %d\n", err);
417 return err;
418 }
419
420 static void release_socket(struct net *net)
421 {
422 struct capwap_net *capwap_net = ovs_get_capwap_net(net);
423
424 capwap_net->n_tunnels--;
425 if (capwap_net->n_tunnels)
426 return;
427
428 inet_frags_exit_net(&capwap_net->frag_state, &frag_state);
429 sk_release_kernel(capwap_net->capwap_rcv_socket->sk);
430 }
431
432 static struct vport *capwap_create(const struct vport_parms *parms)
433 {
434 struct vport *vport;
435 int err;
436
437 err = init_socket(ovs_dp_get_net(parms->dp));
438 if (err)
439 return ERR_PTR(err);
440
441 vport = ovs_tnl_create(parms, &ovs_capwap_vport_ops, &capwap_tnl_ops);
442 if (IS_ERR(vport))
443 release_socket(ovs_dp_get_net(parms->dp));
444
445 return vport;
446 }
447
448 static void capwap_destroy(struct vport *vport)
449 {
450 ovs_tnl_destroy(vport);
451 release_socket(ovs_dp_get_net(vport->dp));
452 }
453
454 static int capwap_init(void)
455 {
456 inet_frags_init(&frag_state);
457 return 0;
458 }
459
460 static void capwap_exit(void)
461 {
462 inet_frags_fini(&frag_state);
463 }
464
465 static void copy_skb_metadata(struct sk_buff *from, struct sk_buff *to)
466 {
467 to->pkt_type = from->pkt_type;
468 to->priority = from->priority;
469 to->protocol = from->protocol;
470 skb_dst_set(to, dst_clone(skb_dst(from)));
471 to->dev = from->dev;
472 to->mark = from->mark;
473
474 if (from->sk)
475 skb_set_owner_w(to, from->sk);
476
477 #ifdef CONFIG_NET_SCHED
478 to->tc_index = from->tc_index;
479 #endif
480 #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
481 to->ipvs_property = from->ipvs_property;
482 #endif
483 skb_copy_secmark(to, from);
484 }
485
486 static struct sk_buff *fragment(struct sk_buff *skb, const struct vport *vport,
487 struct dst_entry *dst, unsigned int hlen)
488 {
489 struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
490 unsigned int headroom;
491 unsigned int max_frame_len = dst_mtu(dst) + skb_network_offset(skb);
492 struct sk_buff *result = NULL, *list_cur = NULL;
493 unsigned int remaining;
494 unsigned int offset;
495 __be16 frag_id;
496
497 if (hlen + ~FRAG_OFF_MASK + 1 > max_frame_len) {
498 if (net_ratelimit())
499 pr_warn("capwap link mtu (%d) is less than minimum packet (%d)\n",
500 dst_mtu(dst),
501 hlen - skb_network_offset(skb) + ~FRAG_OFF_MASK + 1);
502 goto error;
503 }
504
505 remaining = skb->len - hlen;
506 offset = 0;
507 frag_id = htons(atomic_inc_return(&tnl_vport->frag_id));
508
509 headroom = dst->header_len + 16;
510 if (!skb_network_offset(skb))
511 headroom += LL_RESERVED_SPACE(dst->dev);
512
513 while (remaining) {
514 struct sk_buff *skb2;
515 int frag_size;
516 struct udphdr *udph;
517 struct capwaphdr *cwh;
518
519 frag_size = min(remaining, max_frame_len - hlen);
520 if (remaining > frag_size)
521 frag_size &= FRAG_OFF_MASK;
522
523 skb2 = alloc_skb(headroom + hlen + frag_size, GFP_ATOMIC);
524 if (!skb2)
525 goto error;
526
527 skb_reserve(skb2, headroom);
528 __skb_put(skb2, hlen + frag_size);
529
530 if (skb_network_offset(skb))
531 skb_reset_mac_header(skb2);
532 skb_set_network_header(skb2, skb_network_offset(skb));
533 skb_set_transport_header(skb2, skb_transport_offset(skb));
534
535 /* Copy (Ethernet)/IP/UDP/CAPWAP header. */
536 copy_skb_metadata(skb, skb2);
537 skb_copy_from_linear_data(skb, skb2->data, hlen);
538
539 /* Copy this data chunk. */
540 if (skb_copy_bits(skb, hlen + offset, skb2->data + hlen, frag_size))
541 BUG();
542
543 udph = udp_hdr(skb2);
544 udph->len = htons(skb2->len - skb_transport_offset(skb2));
545
546 cwh = capwap_hdr(skb2);
547 if (remaining > frag_size)
548 cwh->begin |= FRAG_HDR;
549 else
550 cwh->begin |= FRAG_LAST_HDR;
551 cwh->frag_id = frag_id;
552 cwh->frag_off = htons(offset);
553
554 if (result) {
555 list_cur->next = skb2;
556 list_cur = skb2;
557 } else
558 result = list_cur = skb2;
559
560 offset += frag_size;
561 remaining -= frag_size;
562 }
563
564 consume_skb(skb);
565 return result;
566
567 error:
568 ovs_tnl_free_linked_skbs(result);
569 kfree_skb(skb);
570 return NULL;
571 }
572
573 /* All of the following functions relate to fragmentation reassembly. */
574
575 static struct frag_queue *ifq_cast(struct inet_frag_queue *ifq)
576 {
577 return container_of(ifq, struct frag_queue, ifq);
578 }
579
580 static u32 frag_hash(struct frag_match *match)
581 {
582 return jhash_3words((__force u16)match->id, (__force u32)match->saddr,
583 (__force u32)match->daddr,
584 frag_state.rnd) & (INETFRAGS_HASHSZ - 1);
585 }
586
587 static struct frag_queue *queue_find(struct netns_frags *ns_frag_state,
588 struct frag_match *match)
589 {
590 struct inet_frag_queue *ifq;
591
592 read_lock(&frag_state.lock);
593
594 ifq = inet_frag_find(ns_frag_state, &frag_state, match, frag_hash(match));
595 if (!ifq)
596 return NULL;
597
598 /* Unlock happens inside inet_frag_find(). */
599
600 return ifq_cast(ifq);
601 }
602
603 static struct sk_buff *frag_reasm(struct frag_queue *fq, struct net_device *dev)
604 {
605 struct sk_buff *head = fq->ifq.fragments;
606 struct sk_buff *frag;
607
608 /* Succeed or fail, we're done with this queue. */
609 inet_frag_kill(&fq->ifq, &frag_state);
610
611 if (fq->ifq.len > 65535)
612 return NULL;
613
614 /* Can't have the head be a clone. */
615 if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC))
616 return NULL;
617
618 /*
619 * We're about to build frag list for this SKB. If it already has a
620 * frag list, alloc a new SKB and put the existing frag list there.
621 */
622 if (skb_shinfo(head)->frag_list) {
623 int i;
624 int paged_len = 0;
625
626 frag = alloc_skb(0, GFP_ATOMIC);
627 if (!frag)
628 return NULL;
629
630 frag->next = head->next;
631 head->next = frag;
632 skb_shinfo(frag)->frag_list = skb_shinfo(head)->frag_list;
633 skb_shinfo(head)->frag_list = NULL;
634
635 for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
636 paged_len += skb_shinfo(head)->frags[i].size;
637 frag->len = frag->data_len = head->data_len - paged_len;
638 head->data_len -= frag->len;
639 head->len -= frag->len;
640
641 frag->ip_summed = head->ip_summed;
642 atomic_add(frag->truesize, &fq->ifq.net->mem);
643 }
644
645 skb_shinfo(head)->frag_list = head->next;
646 atomic_sub(head->truesize, &fq->ifq.net->mem);
647
648 /* Properly account for data in various packets. */
649 for (frag = head->next; frag; frag = frag->next) {
650 head->data_len += frag->len;
651 head->len += frag->len;
652
653 if (head->ip_summed != frag->ip_summed)
654 head->ip_summed = CHECKSUM_NONE;
655 else if (head->ip_summed == CHECKSUM_COMPLETE)
656 head->csum = csum_add(head->csum, frag->csum);
657
658 head->truesize += frag->truesize;
659 atomic_sub(frag->truesize, &fq->ifq.net->mem);
660 }
661
662 head->next = NULL;
663 head->dev = dev;
664 head->tstamp = fq->ifq.stamp;
665 fq->ifq.fragments = NULL;
666
667 return head;
668 }
669
670 static struct sk_buff *frag_queue(struct frag_queue *fq, struct sk_buff *skb,
671 u16 offset, bool frag_last)
672 {
673 struct sk_buff *prev, *next;
674 struct net_device *dev;
675 int end;
676
677 if (fq->ifq.last_in & INET_FRAG_COMPLETE)
678 goto error;
679
680 if (!skb->len)
681 goto error;
682
683 end = offset + skb->len;
684
685 if (frag_last) {
686 /*
687 * Last fragment, shouldn't already have data past our end or
688 * have another last fragment.
689 */
690 if (end < fq->ifq.len || fq->ifq.last_in & INET_FRAG_LAST_IN)
691 goto error;
692
693 fq->ifq.last_in |= INET_FRAG_LAST_IN;
694 fq->ifq.len = end;
695 } else {
696 /* Fragments should align to 8 byte chunks. */
697 if (end & ~FRAG_OFF_MASK)
698 goto error;
699
700 if (end > fq->ifq.len) {
701 /*
702 * Shouldn't have data past the end, if we already
703 * have one.
704 */
705 if (fq->ifq.last_in & INET_FRAG_LAST_IN)
706 goto error;
707
708 fq->ifq.len = end;
709 }
710 }
711
712 /* Find where we fit in. */
713 prev = NULL;
714 for (next = fq->ifq.fragments; next != NULL; next = next->next) {
715 if (FRAG_CB(next)->offset >= offset)
716 break;
717 prev = next;
718 }
719
720 /*
721 * Overlapping fragments aren't allowed. We shouldn't start before
722 * the end of the previous fragment.
723 */
724 if (prev && FRAG_CB(prev)->offset + prev->len > offset)
725 goto error;
726
727 /* We also shouldn't end after the beginning of the next fragment. */
728 if (next && end > FRAG_CB(next)->offset)
729 goto error;
730
731 FRAG_CB(skb)->offset = offset;
732
733 /* Link into list. */
734 skb->next = next;
735 if (prev)
736 prev->next = skb;
737 else
738 fq->ifq.fragments = skb;
739
740 dev = skb->dev;
741 skb->dev = NULL;
742
743 fq->ifq.stamp = skb->tstamp;
744 fq->ifq.meat += skb->len;
745 atomic_add(skb->truesize, &fq->ifq.net->mem);
746 if (offset == 0)
747 fq->ifq.last_in |= INET_FRAG_FIRST_IN;
748
749 /* If we have all fragments do reassembly. */
750 if (fq->ifq.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
751 fq->ifq.meat == fq->ifq.len)
752 return frag_reasm(fq, dev);
753
754 write_lock(&frag_state.lock);
755 list_move_tail(&fq->ifq.lru_list, &fq->ifq.net->lru_list);
756 write_unlock(&frag_state.lock);
757
758 return NULL;
759
760 error:
761 kfree_skb(skb);
762 return NULL;
763 }
764
765 static struct sk_buff *defrag(struct sk_buff *skb, bool frag_last)
766 {
767 struct iphdr *iph = ip_hdr(skb);
768 struct capwaphdr *cwh = capwap_hdr(skb);
769 struct capwap_net *capwap_net = ovs_get_capwap_net(dev_net(skb->dev));
770 struct netns_frags *ns_frag_state = &capwap_net->frag_state;
771 struct frag_match match;
772 u16 frag_off;
773 struct frag_queue *fq;
774
775 inet_frag_evictor(ns_frag_state, &frag_state, false);
776
777 match.daddr = iph->daddr;
778 match.saddr = iph->saddr;
779 match.id = cwh->frag_id;
780 frag_off = ntohs(cwh->frag_off) & FRAG_OFF_MASK;
781
782 fq = queue_find(ns_frag_state, &match);
783 if (fq) {
784 spin_lock(&fq->ifq.lock);
785 skb = frag_queue(fq, skb, frag_off, frag_last);
786 spin_unlock(&fq->ifq.lock);
787
788 inet_frag_put(&fq->ifq, &frag_state);
789
790 return skb;
791 }
792
793 kfree_skb(skb);
794 return NULL;
795 }
796
797 static void capwap_frag_init(struct inet_frag_queue *ifq, void *match_)
798 {
799 struct frag_match *match = match_;
800
801 ifq_cast(ifq)->match = *match;
802 }
803
804 static unsigned int capwap_frag_hash(struct inet_frag_queue *ifq)
805 {
806 return frag_hash(&ifq_cast(ifq)->match);
807 }
808
809 #if LINUX_VERSION_CODE < KERNEL_VERSION(3,7,0)
810 static int capwap_frag_match(struct inet_frag_queue *ifq, void *a_)
811 #else
812 static bool capwap_frag_match(struct inet_frag_queue *ifq, void *a_)
813 #endif
814 {
815 struct frag_match *a = a_;
816 struct frag_match *b = &ifq_cast(ifq)->match;
817
818 return a->id == b->id && a->saddr == b->saddr && a->daddr == b->daddr;
819 }
820
821 /* Run when the timeout for a given queue expires. */
822 static void capwap_frag_expire(unsigned long ifq)
823 {
824 struct frag_queue *fq;
825
826 fq = ifq_cast((struct inet_frag_queue *)ifq);
827
828 spin_lock(&fq->ifq.lock);
829
830 if (!(fq->ifq.last_in & INET_FRAG_COMPLETE))
831 inet_frag_kill(&fq->ifq, &frag_state);
832
833 spin_unlock(&fq->ifq.lock);
834 inet_frag_put(&fq->ifq, &frag_state);
835 }
836
837 const struct vport_ops ovs_capwap_vport_ops = {
838 .type = OVS_VPORT_TYPE_CAPWAP,
839 .flags = VPORT_F_TUN_ID,
840 .init = capwap_init,
841 .exit = capwap_exit,
842 .create = capwap_create,
843 .destroy = capwap_destroy,
844 .get_name = ovs_tnl_get_name,
845 .get_options = ovs_tnl_get_options,
846 .set_options = ovs_tnl_set_options,
847 .send = ovs_tnl_send,
848 };
849 #else
850 #warning CAPWAP tunneling will not be available on kernels before 2.6.26
851 #endif /* Linux kernel < 2.6.26 */