]> git.proxmox.com Git - ovs.git/blame - datapath/vport-capwap.c
Global replace of Nicira Networks.
[ovs.git] / datapath / vport-capwap.c
CommitLineData
e90b1cf9 1/*
e0edde6f 2 * Copyright (c) 2007-2012 Nicira, Inc.
e90b1cf9
JG
3 * Distributed under the terms of the GNU GPL version 2.
4 *
5 * Significant portions of this file may be copied from parts of the Linux
6 * kernel, by Linus Torvalds and others.
7 */
8
dfffaef1
JP
9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10
e90b1cf9
JG
11#include <linux/version.h>
12#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)
13
14#include <linux/if.h>
15#include <linux/in.h>
16#include <linux/ip.h>
17#include <linux/list.h>
18#include <linux/net.h>
2a4999f3 19#include <net/net_namespace.h>
e90b1cf9
JG
20
21#include <net/icmp.h>
22#include <net/inet_frag.h>
23#include <net/ip.h>
24#include <net/protocol.h>
cd8055cc 25#include <net/udp.h>
e90b1cf9 26
2a4999f3 27#include "datapath.h"
e90b1cf9
JG
28#include "tunnel.h"
29#include "vport.h"
30#include "vport-generic.h"
31
32#define CAPWAP_SRC_PORT 58881
33#define CAPWAP_DST_PORT 58882
34
35#define CAPWAP_FRAG_TIMEOUT (30 * HZ)
36#define CAPWAP_FRAG_MAX_MEM (256 * 1024)
6455100f 37#define CAPWAP_FRAG_PRUNE_MEM (192 * 1024)
e90b1cf9
JG
38#define CAPWAP_FRAG_SECRET_INTERVAL (10 * 60 * HZ)
39
40/*
41 * The CAPWAP header is a mess, with all kinds of odd size bit fields that
42 * cross byte boundaries, which are difficult to represent correctly in
43 * various byte orderings. Luckily we only care about a few permutations, so
44 * statically create them and we can do very fast parsing by checking all 12
45 * fields in one go.
46 */
40a75177
VG
47#define CAPWAP_PREAMBLE_MASK __cpu_to_be32(0xFF000000)
48#define CAPWAP_HLEN_SHIFT 17
49#define CAPWAP_HLEN_MASK __cpu_to_be32(0x00F80000)
50#define CAPWAP_RID_MASK __cpu_to_be32(0x0007C000)
51#define CAPWAP_WBID_MASK __cpu_to_be32(0x00003E00)
52#define CAPWAP_F_MASK __cpu_to_be32(0x000001FF)
53
54#define CAPWAP_F_FRAG __cpu_to_be32(0x00000080)
55#define CAPWAP_F_LASTFRAG __cpu_to_be32(0x00000040)
56#define CAPWAP_F_WSI __cpu_to_be32(0x00000020)
57#define CAPWAP_F_RMAC __cpu_to_be32(0x00000010)
58
59#define CAPWAP_RMAC_LEN 4
60
61/* Standard CAPWAP looks for a WBID value of 2.
62 * When we insert WSI field, use WBID value of 30, which has been
63 * proposed for all "experimental" usage - users with no reserved WBID value
64 * of their own.
65*/
66#define CAPWAP_WBID_30 __cpu_to_be32(0x00003C00)
67#define CAPWAP_WBID_2 __cpu_to_be32(0x00000200)
68
69#define FRAG_HDR (CAPWAP_F_FRAG)
70#define FRAG_LAST_HDR (FRAG_HDR | CAPWAP_F_LASTFRAG)
71
72/* Keyed packet, WBID 30, and length long enough to include WSI key */
73#define CAPWAP_KEYED (CAPWAP_WBID_30 | CAPWAP_F_WSI | htonl(20 << CAPWAP_HLEN_SHIFT))
74/* A backward-compatible packet, WBID 2 and length of 2 words (no WSI fields) */
75#define CAPWAP_NO_WSI (CAPWAP_WBID_2 | htonl(8 << CAPWAP_HLEN_SHIFT))
76
77/* Mask for all parts of header that must be 0. */
78#define CAPWAP_ZERO_MASK (CAPWAP_PREAMBLE_MASK | \
79 (CAPWAP_F_MASK ^ (CAPWAP_F_WSI | CAPWAP_F_FRAG | CAPWAP_F_LASTFRAG | CAPWAP_F_RMAC)))
e90b1cf9
JG
80
81struct capwaphdr {
82 __be32 begin;
83 __be16 frag_id;
40a75177 84 /* low 3 bits of frag_off are reserved */
e90b1cf9
JG
85 __be16 frag_off;
86};
87
40a75177
VG
88/*
89 * We use the WSI field to hold additional tunnel data.
90 * The first eight bits store the size of the wsi data in bytes.
91 */
92struct capwaphdr_wsi {
93 u8 wsi_len;
94 u8 flags;
95 __be16 reserved_padding;
96};
97
98struct capwaphdr_wsi_key {
99 __be64 key;
100};
101
102/* Flag indicating a 64bit key is stored in WSI data field */
103#define CAPWAP_WSI_F_KEY64 0x80
104
6455100f 105static struct capwaphdr *capwap_hdr(const struct sk_buff *skb)
e90b1cf9
JG
106{
107 return (struct capwaphdr *)(udp_hdr(skb) + 1);
108}
109
110/*
111 * The fragment offset is actually the high 13 bits of the last 16 bit field,
112 * so we would normally need to right shift 3 places. However, it stores the
113 * offset in 8 byte chunks, which would involve a 3 place left shift. So we
114 * just mask off the last 3 bits and be done with it.
115 */
116#define FRAG_OFF_MASK (~0x7U)
117
40a75177
VG
118/*
119 * The minimum header length. The header may be longer if the optional
120 * WSI field is used.
121 */
122#define CAPWAP_MIN_HLEN (sizeof(struct udphdr) + sizeof(struct capwaphdr))
e90b1cf9
JG
123
124struct frag_match {
125 __be32 saddr;
126 __be32 daddr;
127 __be16 id;
128};
129
130struct frag_queue {
131 struct inet_frag_queue ifq;
132 struct frag_match match;
133};
134
135struct frag_skb_cb {
136 u16 offset;
137};
138#define FRAG_CB(skb) ((struct frag_skb_cb *)(skb)->cb)
139
140static struct sk_buff *fragment(struct sk_buff *, const struct vport *,
40a75177 141 struct dst_entry *dst, unsigned int hlen);
e90b1cf9
JG
142static struct sk_buff *defrag(struct sk_buff *, bool frag_last);
143
144static void capwap_frag_init(struct inet_frag_queue *, void *match);
145static unsigned int capwap_frag_hash(struct inet_frag_queue *);
146static int capwap_frag_match(struct inet_frag_queue *, void *match);
147static void capwap_frag_expire(unsigned long ifq);
148
149static struct inet_frags frag_state = {
150 .constructor = capwap_frag_init,
151 .qsize = sizeof(struct frag_queue),
152 .hashfn = capwap_frag_hash,
153 .match = capwap_frag_match,
154 .frag_expire = capwap_frag_expire,
155 .secret_interval = CAPWAP_FRAG_SECRET_INTERVAL,
156};
e90b1cf9 157
c19e6535 158static int capwap_hdr_len(const struct tnl_mutable_config *mutable)
e90b1cf9 159{
40a75177
VG
160 int size = CAPWAP_MIN_HLEN;
161
c19e6535
BP
162 /* CAPWAP has no checksums. */
163 if (mutable->flags & TNL_F_CSUM)
e90b1cf9
JG
164 return -EINVAL;
165
6455100f 166 /* if keys are specified, then add WSI field */
40a75177
VG
167 if (mutable->out_key || (mutable->flags & TNL_F_OUT_KEY_ACTION)) {
168 size += sizeof(struct capwaphdr_wsi) +
169 sizeof(struct capwaphdr_wsi_key);
170 }
e90b1cf9 171
40a75177 172 return size;
e90b1cf9
JG
173}
174
842cf6f4
JG
175static void capwap_build_header(const struct vport *vport,
176 const struct tnl_mutable_config *mutable,
177 void *header)
e90b1cf9 178{
842cf6f4
JG
179 struct udphdr *udph = header;
180 struct capwaphdr *cwh = (struct capwaphdr *)(udph + 1);
e90b1cf9
JG
181
182 udph->source = htons(CAPWAP_SRC_PORT);
183 udph->dest = htons(CAPWAP_DST_PORT);
e90b1cf9
JG
184 udph->check = 0;
185
e90b1cf9
JG
186 cwh->frag_id = 0;
187 cwh->frag_off = 0;
40a75177
VG
188
189 if (mutable->out_key || (mutable->flags & TNL_F_OUT_KEY_ACTION)) {
190 struct capwaphdr_wsi *wsi = (struct capwaphdr_wsi *)(cwh + 1);
191
192 cwh->begin = CAPWAP_KEYED;
193
194 /* -1 for wsi_len byte, not included in length as per spec */
195 wsi->wsi_len = sizeof(struct capwaphdr_wsi) - 1
196 + sizeof(struct capwaphdr_wsi_key);
197 wsi->flags = CAPWAP_WSI_F_KEY64;
198 wsi->reserved_padding = 0;
199
200 if (mutable->out_key) {
201 struct capwaphdr_wsi_key *opt = (struct capwaphdr_wsi_key *)(wsi + 1);
202 opt->key = mutable->out_key;
203 }
204 } else {
205 /* make packet readable by old capwap code */
206 cwh->begin = CAPWAP_NO_WSI;
207 }
842cf6f4
JG
208}
209
210static struct sk_buff *capwap_update_header(const struct vport *vport,
211 const struct tnl_mutable_config *mutable,
212 struct dst_entry *dst,
213 struct sk_buff *skb)
214{
215 struct udphdr *udph = udp_hdr(skb);
e90b1cf9 216
40a75177
VG
217 if (mutable->flags & TNL_F_OUT_KEY_ACTION) {
218 /* first field in WSI is key */
219 struct capwaphdr *cwh = (struct capwaphdr *)(udph + 1);
220 struct capwaphdr_wsi *wsi = (struct capwaphdr_wsi *)(cwh + 1);
221 struct capwaphdr_wsi_key *opt = (struct capwaphdr_wsi_key *)(wsi + 1);
222
223 opt->key = OVS_CB(skb)->tun_id;
224 }
225
842cf6f4
JG
226 udph->len = htons(skb->len - skb_transport_offset(skb));
227
40a75177
VG
228 if (unlikely(skb->len - skb_network_offset(skb) > dst_mtu(dst))) {
229 unsigned int hlen = skb_transport_offset(skb) + capwap_hdr_len(mutable);
230 skb = fragment(skb, vport, dst, hlen);
231 }
e90b1cf9
JG
232
233 return skb;
234}
235
40a75177 236static int process_capwap_wsi(struct sk_buff *skb, __be64 *key)
e90b1cf9
JG
237{
238 struct capwaphdr *cwh = capwap_hdr(skb);
40a75177
VG
239 struct capwaphdr_wsi *wsi;
240 int hdr_len;
241 int rmac_len = 0;
242 int wsi_len;
e90b1cf9 243
40a75177
VG
244 if (((cwh->begin & CAPWAP_WBID_MASK) != CAPWAP_WBID_30))
245 return 0;
e90b1cf9 246
40a75177
VG
247 if (cwh->begin & CAPWAP_F_RMAC)
248 rmac_len = CAPWAP_RMAC_LEN;
249
250 hdr_len = ntohl(cwh->begin & CAPWAP_HLEN_MASK) >> CAPWAP_HLEN_SHIFT;
251
252 if (unlikely(sizeof(struct capwaphdr) + rmac_len + sizeof(struct capwaphdr_wsi) > hdr_len))
253 return -EINVAL;
254
255 /* read wsi header to find out how big it really is */
256 wsi = (struct capwaphdr_wsi *)((u8 *)(cwh + 1) + rmac_len);
257 /* +1 for length byte not included in wsi_len */
258 wsi_len = 1 + wsi->wsi_len;
259
260 if (unlikely(sizeof(struct capwaphdr) + rmac_len + wsi_len != hdr_len))
261 return -EINVAL;
262
263 wsi_len -= sizeof(struct capwaphdr_wsi);
264
265 if (wsi->flags & CAPWAP_WSI_F_KEY64) {
266 struct capwaphdr_wsi_key *opt;
267
268 if (unlikely(wsi_len < sizeof(struct capwaphdr_wsi_key)))
269 return -EINVAL;
270
271 opt = (struct capwaphdr_wsi_key *)(wsi + 1);
272 *key = opt->key;
273 }
274
275 return 0;
276}
277
6455100f 278static struct sk_buff *process_capwap_proto(struct sk_buff *skb, __be64 *key)
40a75177
VG
279{
280 struct capwaphdr *cwh = capwap_hdr(skb);
281 int hdr_len = sizeof(struct udphdr);
282
283 if (unlikely((cwh->begin & CAPWAP_ZERO_MASK) != 0))
284 goto error;
285
286 hdr_len += ntohl(cwh->begin & CAPWAP_HLEN_MASK) >> CAPWAP_HLEN_SHIFT;
287 if (unlikely(hdr_len < CAPWAP_MIN_HLEN))
288 goto error;
289
290 if (unlikely(!pskb_may_pull(skb, hdr_len + ETH_HLEN)))
291 goto error;
292
293 cwh = capwap_hdr(skb);
294 __skb_pull(skb, hdr_len);
295 skb_postpull_rcsum(skb, skb_transport_header(skb), hdr_len + ETH_HLEN);
296
297 if (cwh->begin & CAPWAP_F_FRAG) {
298 skb = defrag(skb, (__force bool)(cwh->begin & CAPWAP_F_LASTFRAG));
299 if (!skb)
300 return NULL;
301 cwh = capwap_hdr(skb);
e90b1cf9 302 }
40a75177
VG
303
304 if ((cwh->begin & CAPWAP_F_WSI) && process_capwap_wsi(skb, key))
305 goto error;
306
307 return skb;
308error:
309 kfree_skb(skb);
310 return NULL;
e90b1cf9
JG
311}
312
313/* Called with rcu_read_lock and BH disabled. */
314static int capwap_rcv(struct sock *sk, struct sk_buff *skb)
315{
316 struct vport *vport;
317 const struct tnl_mutable_config *mutable;
318 struct iphdr *iph;
40a75177 319 __be64 key = 0;
e90b1cf9 320
40a75177 321 if (unlikely(!pskb_may_pull(skb, CAPWAP_MIN_HLEN + ETH_HLEN)))
e90b1cf9
JG
322 goto error;
323
40a75177 324 skb = process_capwap_proto(skb, &key);
e90b1cf9
JG
325 if (unlikely(!skb))
326 goto out;
327
328 iph = ip_hdr(skb);
2a4999f3
PS
329 vport = ovs_tnl_find_port(sock_net(sk), iph->daddr, iph->saddr, key,
330 TNL_T_PROTO_CAPWAP, &mutable);
e90b1cf9
JG
331 if (unlikely(!vport)) {
332 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
333 goto error;
334 }
335
40a75177
VG
336 if (mutable->flags & TNL_F_IN_KEY_MATCH)
337 OVS_CB(skb)->tun_id = key;
338 else
339 OVS_CB(skb)->tun_id = 0;
340
850b6b3b 341 ovs_tnl_rcv(vport, skb, iph->tos);
e90b1cf9
JG
342 goto out;
343
344error:
345 kfree_skb(skb);
346out:
347 return 0;
348}
349
15d90bb6 350static const struct tnl_ops capwap_tnl_ops = {
e90b1cf9
JG
351 .tunnel_type = TNL_T_PROTO_CAPWAP,
352 .ipproto = IPPROTO_UDP,
353 .hdr_len = capwap_hdr_len,
354 .build_header = capwap_build_header,
842cf6f4 355 .update_header = capwap_update_header,
e90b1cf9
JG
356};
357
2a4999f3 358static inline struct capwap_net *ovs_get_capwap_net(struct net *net)
e90b1cf9 359{
2a4999f3
PS
360 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
361 return &ovs_net->vport_net.capwap;
e90b1cf9
JG
362}
363
7dc05f69 364/* Arbitrary value. Irrelevant as long as it's not 0 since we set the handler. */
e90b1cf9 365#define UDP_ENCAP_CAPWAP 10
2a4999f3 366static int init_socket(struct net *net)
e90b1cf9
JG
367{
368 int err;
2a4999f3 369 struct capwap_net *capwap_net = ovs_get_capwap_net(net);
e90b1cf9
JG
370 struct sockaddr_in sin;
371
2a4999f3
PS
372 if (capwap_net->n_tunnels) {
373 capwap_net->n_tunnels++;
374 return 0;
375 }
376
377 err = sock_create_kern(AF_INET, SOCK_DGRAM, 0,
378 &capwap_net->capwap_rcv_socket);
e90b1cf9
JG
379 if (err)
380 goto error;
d295e8e9 381
2a4999f3
PS
382 /* release net ref. */
383 sk_change_net(capwap_net->capwap_rcv_socket->sk, net);
384
e90b1cf9 385 sin.sin_family = AF_INET;
56b20c59 386 sin.sin_addr.s_addr = htonl(INADDR_ANY);
e90b1cf9
JG
387 sin.sin_port = htons(CAPWAP_DST_PORT);
388
2a4999f3
PS
389 err = kernel_bind(capwap_net->capwap_rcv_socket,
390 (struct sockaddr *)&sin,
e90b1cf9
JG
391 sizeof(struct sockaddr_in));
392 if (err)
393 goto error_sock;
394
2a4999f3
PS
395 udp_sk(capwap_net->capwap_rcv_socket->sk)->encap_type = UDP_ENCAP_CAPWAP;
396 udp_sk(capwap_net->capwap_rcv_socket->sk)->encap_rcv = capwap_rcv;
397
398 capwap_net->frag_state.timeout = CAPWAP_FRAG_TIMEOUT;
399 capwap_net->frag_state.high_thresh = CAPWAP_FRAG_MAX_MEM;
400 capwap_net->frag_state.low_thresh = CAPWAP_FRAG_PRUNE_MEM;
e90b1cf9 401
2a4999f3 402 inet_frags_init_net(&capwap_net->frag_state);
e90b1cf9 403
2a4999f3 404 capwap_net->n_tunnels++;
842cf6f4 405 return 0;
e90b1cf9
JG
406
407error_sock:
2a4999f3 408 sk_release_kernel(capwap_net->capwap_rcv_socket->sk);
e90b1cf9 409error:
2a4999f3 410 pr_warn("cannot register capwap protocol handler : %d\n", err);
e90b1cf9
JG
411 return err;
412}
413
2a4999f3
PS
414static void release_socket(struct net *net)
415{
416 struct capwap_net *capwap_net = ovs_get_capwap_net(net);
417
418 capwap_net->n_tunnels--;
419 if (capwap_net->n_tunnels)
420 return;
421
422 inet_frags_exit_net(&capwap_net->frag_state, &frag_state);
423 sk_release_kernel(capwap_net->capwap_rcv_socket->sk);
424}
425
426static struct vport *capwap_create(const struct vport_parms *parms)
427{
428 struct vport *vport;
429 int err;
430
431 err = init_socket(ovs_dp_get_net(parms->dp));
432 if (err)
433 return ERR_PTR(err);
434
435 vport = ovs_tnl_create(parms, &ovs_capwap_vport_ops, &capwap_tnl_ops);
436 if (IS_ERR(vport))
437 release_socket(ovs_dp_get_net(parms->dp));
438
439 return vport;
440}
441
442static void capwap_destroy(struct vport *vport)
443{
444 ovs_tnl_destroy(vport);
445 release_socket(ovs_dp_get_net(vport->dp));
446}
447
448static int capwap_init(void)
449{
450 inet_frags_init(&frag_state);
451 return 0;
452}
453
e90b1cf9
JG
454static void capwap_exit(void)
455{
2a4999f3 456 inet_frags_fini(&frag_state);
e90b1cf9
JG
457}
458
459static void copy_skb_metadata(struct sk_buff *from, struct sk_buff *to)
460{
461 to->pkt_type = from->pkt_type;
462 to->priority = from->priority;
463 to->protocol = from->protocol;
464 skb_dst_set(to, dst_clone(skb_dst(from)));
465 to->dev = from->dev;
466 to->mark = from->mark;
467
468 if (from->sk)
469 skb_set_owner_w(to, from->sk);
470
471#ifdef CONFIG_NET_SCHED
472 to->tc_index = from->tc_index;
473#endif
474#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
475 to->ipvs_property = from->ipvs_property;
476#endif
477 skb_copy_secmark(to, from);
478}
479
480static struct sk_buff *fragment(struct sk_buff *skb, const struct vport *vport,
40a75177 481 struct dst_entry *dst, unsigned int hlen)
e90b1cf9
JG
482{
483 struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
842cf6f4
JG
484 unsigned int headroom;
485 unsigned int max_frame_len = dst_mtu(dst) + skb_network_offset(skb);
e90b1cf9
JG
486 struct sk_buff *result = NULL, *list_cur = NULL;
487 unsigned int remaining;
488 unsigned int offset;
489 __be16 frag_id;
490
842cf6f4 491 if (hlen + ~FRAG_OFF_MASK + 1 > max_frame_len) {
e90b1cf9 492 if (net_ratelimit())
dfffaef1 493 pr_warn("capwap link mtu (%d) is less than minimum packet (%d)\n",
842cf6f4
JG
494 dst_mtu(dst),
495 hlen - skb_network_offset(skb) + ~FRAG_OFF_MASK + 1);
e90b1cf9
JG
496 goto error;
497 }
498
499 remaining = skb->len - hlen;
500 offset = 0;
501 frag_id = htons(atomic_inc_return(&tnl_vport->frag_id));
502
842cf6f4
JG
503 headroom = dst->header_len + 16;
504 if (!skb_network_offset(skb))
505 headroom += LL_RESERVED_SPACE(dst->dev);
506
e90b1cf9
JG
507 while (remaining) {
508 struct sk_buff *skb2;
509 int frag_size;
e90b1cf9
JG
510 struct udphdr *udph;
511 struct capwaphdr *cwh;
512
842cf6f4 513 frag_size = min(remaining, max_frame_len - hlen);
e90b1cf9
JG
514 if (remaining > frag_size)
515 frag_size &= FRAG_OFF_MASK;
516
517 skb2 = alloc_skb(headroom + hlen + frag_size, GFP_ATOMIC);
518 if (!skb2)
519 goto error;
520
521 skb_reserve(skb2, headroom);
522 __skb_put(skb2, hlen + frag_size);
e90b1cf9 523
842cf6f4
JG
524 if (skb_network_offset(skb))
525 skb_reset_mac_header(skb2);
526 skb_set_network_header(skb2, skb_network_offset(skb));
527 skb_set_transport_header(skb2, skb_transport_offset(skb));
528
529 /* Copy (Ethernet)/IP/UDP/CAPWAP header. */
e90b1cf9 530 copy_skb_metadata(skb, skb2);
842cf6f4 531 skb_copy_from_linear_data(skb, skb2->data, hlen);
e90b1cf9
JG
532
533 /* Copy this data chunk. */
534 if (skb_copy_bits(skb, hlen + offset, skb2->data + hlen, frag_size))
535 BUG();
536
e90b1cf9 537 udph = udp_hdr(skb2);
842cf6f4 538 udph->len = htons(skb2->len - skb_transport_offset(skb2));
e90b1cf9
JG
539
540 cwh = capwap_hdr(skb2);
541 if (remaining > frag_size)
40a75177 542 cwh->begin |= FRAG_HDR;
e90b1cf9 543 else
40a75177 544 cwh->begin |= FRAG_LAST_HDR;
e90b1cf9
JG
545 cwh->frag_id = frag_id;
546 cwh->frag_off = htons(offset);
547
548 if (result) {
549 list_cur->next = skb2;
550 list_cur = skb2;
551 } else
552 result = list_cur = skb2;
553
554 offset += frag_size;
555 remaining -= frag_size;
556 }
557
b59da960
BP
558 consume_skb(skb);
559 return result;
e90b1cf9
JG
560
561error:
850b6b3b 562 ovs_tnl_free_linked_skbs(result);
e90b1cf9 563 kfree_skb(skb);
b59da960 564 return NULL;
e90b1cf9
JG
565}
566
567/* All of the following functions relate to fragmentation reassembly. */
568
6455100f 569static struct frag_queue *ifq_cast(struct inet_frag_queue *ifq)
e90b1cf9
JG
570{
571 return container_of(ifq, struct frag_queue, ifq);
572}
573
574static u32 frag_hash(struct frag_match *match)
575{
576 return jhash_3words((__force u16)match->id, (__force u32)match->saddr,
577 (__force u32)match->daddr,
578 frag_state.rnd) & (INETFRAGS_HASHSZ - 1);
579}
580
2a4999f3
PS
581static struct frag_queue *queue_find(struct netns_frags *ns_frag_state,
582 struct frag_match *match)
e90b1cf9
JG
583{
584 struct inet_frag_queue *ifq;
585
586 read_lock(&frag_state.lock);
587
2a4999f3 588 ifq = inet_frag_find(ns_frag_state, &frag_state, match, frag_hash(match));
e90b1cf9
JG
589 if (!ifq)
590 return NULL;
591
592 /* Unlock happens inside inet_frag_find(). */
593
594 return ifq_cast(ifq);
595}
596
597static struct sk_buff *frag_reasm(struct frag_queue *fq, struct net_device *dev)
598{
599 struct sk_buff *head = fq->ifq.fragments;
600 struct sk_buff *frag;
601
602 /* Succeed or fail, we're done with this queue. */
603 inet_frag_kill(&fq->ifq, &frag_state);
604
605 if (fq->ifq.len > 65535)
606 return NULL;
607
608 /* Can't have the head be a clone. */
609 if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC))
610 return NULL;
611
612 /*
613 * We're about to build frag list for this SKB. If it already has a
614 * frag list, alloc a new SKB and put the existing frag list there.
615 */
616 if (skb_shinfo(head)->frag_list) {
617 int i;
618 int paged_len = 0;
619
620 frag = alloc_skb(0, GFP_ATOMIC);
621 if (!frag)
622 return NULL;
623
624 frag->next = head->next;
625 head->next = frag;
626 skb_shinfo(frag)->frag_list = skb_shinfo(head)->frag_list;
627 skb_shinfo(head)->frag_list = NULL;
628
629 for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
630 paged_len += skb_shinfo(head)->frags[i].size;
631 frag->len = frag->data_len = head->data_len - paged_len;
632 head->data_len -= frag->len;
633 head->len -= frag->len;
634
635 frag->ip_summed = head->ip_summed;
636 atomic_add(frag->truesize, &fq->ifq.net->mem);
637 }
638
639 skb_shinfo(head)->frag_list = head->next;
640 atomic_sub(head->truesize, &fq->ifq.net->mem);
641
642 /* Properly account for data in various packets. */
643 for (frag = head->next; frag; frag = frag->next) {
644 head->data_len += frag->len;
645 head->len += frag->len;
646
647 if (head->ip_summed != frag->ip_summed)
648 head->ip_summed = CHECKSUM_NONE;
649 else if (head->ip_summed == CHECKSUM_COMPLETE)
650 head->csum = csum_add(head->csum, frag->csum);
651
652 head->truesize += frag->truesize;
653 atomic_sub(frag->truesize, &fq->ifq.net->mem);
654 }
655
656 head->next = NULL;
657 head->dev = dev;
658 head->tstamp = fq->ifq.stamp;
659 fq->ifq.fragments = NULL;
660
661 return head;
662}
663
664static struct sk_buff *frag_queue(struct frag_queue *fq, struct sk_buff *skb,
665 u16 offset, bool frag_last)
666{
667 struct sk_buff *prev, *next;
668 struct net_device *dev;
669 int end;
670
671 if (fq->ifq.last_in & INET_FRAG_COMPLETE)
672 goto error;
673
674 if (!skb->len)
675 goto error;
676
677 end = offset + skb->len;
678
679 if (frag_last) {
680 /*
681 * Last fragment, shouldn't already have data past our end or
682 * have another last fragment.
683 */
684 if (end < fq->ifq.len || fq->ifq.last_in & INET_FRAG_LAST_IN)
685 goto error;
686
687 fq->ifq.last_in |= INET_FRAG_LAST_IN;
688 fq->ifq.len = end;
689 } else {
690 /* Fragments should align to 8 byte chunks. */
691 if (end & ~FRAG_OFF_MASK)
692 goto error;
693
694 if (end > fq->ifq.len) {
695 /*
696 * Shouldn't have data past the end, if we already
697 * have one.
698 */
699 if (fq->ifq.last_in & INET_FRAG_LAST_IN)
700 goto error;
701
702 fq->ifq.len = end;
703 }
704 }
705
706 /* Find where we fit in. */
707 prev = NULL;
708 for (next = fq->ifq.fragments; next != NULL; next = next->next) {
709 if (FRAG_CB(next)->offset >= offset)
710 break;
711 prev = next;
712 }
713
714 /*
715 * Overlapping fragments aren't allowed. We shouldn't start before
716 * the end of the previous fragment.
717 */
718 if (prev && FRAG_CB(prev)->offset + prev->len > offset)
719 goto error;
720
721 /* We also shouldn't end after the beginning of the next fragment. */
722 if (next && end > FRAG_CB(next)->offset)
723 goto error;
724
725 FRAG_CB(skb)->offset = offset;
726
727 /* Link into list. */
728 skb->next = next;
729 if (prev)
730 prev->next = skb;
731 else
732 fq->ifq.fragments = skb;
733
734 dev = skb->dev;
735 skb->dev = NULL;
736
737 fq->ifq.stamp = skb->tstamp;
738 fq->ifq.meat += skb->len;
739 atomic_add(skb->truesize, &fq->ifq.net->mem);
740 if (offset == 0)
741 fq->ifq.last_in |= INET_FRAG_FIRST_IN;
742
743 /* If we have all fragments do reassembly. */
744 if (fq->ifq.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
745 fq->ifq.meat == fq->ifq.len)
746 return frag_reasm(fq, dev);
747
748 write_lock(&frag_state.lock);
749 list_move_tail(&fq->ifq.lru_list, &fq->ifq.net->lru_list);
750 write_unlock(&frag_state.lock);
751
752 return NULL;
753
754error:
755 kfree_skb(skb);
756 return NULL;
757}
758
759static struct sk_buff *defrag(struct sk_buff *skb, bool frag_last)
760{
761 struct iphdr *iph = ip_hdr(skb);
762 struct capwaphdr *cwh = capwap_hdr(skb);
2a4999f3
PS
763 struct capwap_net *capwap_net = ovs_get_capwap_net(dev_net(skb->dev));
764 struct netns_frags *ns_frag_state = &capwap_net->frag_state;
e90b1cf9
JG
765 struct frag_match match;
766 u16 frag_off;
767 struct frag_queue *fq;
768
2a4999f3
PS
769 if (atomic_read(&ns_frag_state->mem) > ns_frag_state->high_thresh)
770 inet_frag_evictor(ns_frag_state, &frag_state);
e90b1cf9
JG
771
772 match.daddr = iph->daddr;
773 match.saddr = iph->saddr;
774 match.id = cwh->frag_id;
775 frag_off = ntohs(cwh->frag_off) & FRAG_OFF_MASK;
776
2a4999f3 777 fq = queue_find(ns_frag_state, &match);
e90b1cf9
JG
778 if (fq) {
779 spin_lock(&fq->ifq.lock);
780 skb = frag_queue(fq, skb, frag_off, frag_last);
781 spin_unlock(&fq->ifq.lock);
782
783 inet_frag_put(&fq->ifq, &frag_state);
784
785 return skb;
786 }
787
788 kfree_skb(skb);
789 return NULL;
790}
791
e90b1cf9
JG
792static void capwap_frag_init(struct inet_frag_queue *ifq, void *match_)
793{
794 struct frag_match *match = match_;
795
796 ifq_cast(ifq)->match = *match;
797}
798
799static unsigned int capwap_frag_hash(struct inet_frag_queue *ifq)
800{
801 return frag_hash(&ifq_cast(ifq)->match);
802}
803
804static int capwap_frag_match(struct inet_frag_queue *ifq, void *a_)
805{
806 struct frag_match *a = a_;
807 struct frag_match *b = &ifq_cast(ifq)->match;
808
809 return a->id == b->id && a->saddr == b->saddr && a->daddr == b->daddr;
810}
811
812/* Run when the timeout for a given queue expires. */
813static void capwap_frag_expire(unsigned long ifq)
814{
815 struct frag_queue *fq;
816
817 fq = ifq_cast((struct inet_frag_queue *)ifq);
818
819 spin_lock(&fq->ifq.lock);
820
821 if (!(fq->ifq.last_in & INET_FRAG_COMPLETE))
822 inet_frag_kill(&fq->ifq, &frag_state);
823
824 spin_unlock(&fq->ifq.lock);
825 inet_frag_put(&fq->ifq, &frag_state);
826}
827
850b6b3b 828const struct vport_ops ovs_capwap_vport_ops = {
df2c07f4 829 .type = OVS_VPORT_TYPE_CAPWAP,
f613a0d7 830 .flags = VPORT_F_TUN_ID,
e90b1cf9
JG
831 .init = capwap_init,
832 .exit = capwap_exit,
833 .create = capwap_create,
2a4999f3 834 .destroy = capwap_destroy,
850b6b3b
JG
835 .set_addr = ovs_tnl_set_addr,
836 .get_name = ovs_tnl_get_name,
837 .get_addr = ovs_tnl_get_addr,
838 .get_options = ovs_tnl_get_options,
839 .set_options = ovs_tnl_set_options,
840 .get_dev_flags = ovs_vport_gen_get_dev_flags,
841 .is_running = ovs_vport_gen_is_running,
842 .get_operstate = ovs_vport_gen_get_operstate,
843 .send = ovs_tnl_send,
e90b1cf9 844};
9d9a0a04
JG
845#else
846#warning CAPWAP tunneling will not be available on kernels before 2.6.26
847#endif /* Linux kernel < 2.6.26 */